git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mjpegenc_common.h"
  46 #include "mathops.h"
  47 #include "mpegutils.h"
  48 #include "mjpegenc.h"
  49 #include "msmpeg4.h"
  50 #include "pixblockdsp.h"
  51 #include "qpeldsp.h"
  52 #include "faandct.h"
  53 #include "thread.h"
  54 #include "aandcttab.h"
  55 #include "flv.h"
  56 #include "mpeg4video.h"
  57 #include "internal.h"
  58 #include "bytestream.h"
  59 #include <limits.h>
  60 #include "sp5x.h"
  61
  62 #define QUANT_BIAS_SHIFT 8
  63
  64 #define QMAT_SHIFT_MMX 16
  65 #define QMAT_SHIFT 21
  66
  67 static int encode_picture(MpegEncContext *s, int picture_number);
  68 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  69 static int sse_mb(MpegEncContext *s);
  70 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  71 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  72
  73 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  74 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  75
  76 const AVOption ff_mpv_generic_options[] = {
  77     FF_MPV_COMMON_OPTS
  78     { NULL },
  79 };
  80
  81 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  82                        uint16_t (*qmat16)[2][64],
  83                        const uint16_t *quant_matrix,
  84                        int bias, int qmin, int qmax, int intra)
  85 {
  86     FDCTDSPContext *fdsp = &s->fdsp;
  87     int qscale;
  88     int shift = 0;
  89
  90     for (qscale = qmin; qscale <= qmax; qscale++) {
  91         int i;
  92         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  93 #if CONFIG_FAANDCT
  94             fdsp->fdct == ff_faandct            ||
  95 #endif /* CONFIG_FAANDCT */
  96             fdsp->fdct == ff_jpeg_fdct_islow_10) {
  97             for (i = 0; i < 64; i++) {
  98                 const int j = s->idsp.idct_permutation[i];
  99                 int64_t den = (int64_t) qscale * quant_matrix[j];
 100                 /* 16 <= qscale * quant_matrix[i] <= 7905
 101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 102                  *             19952 <=              x  <= 249205026
 103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 104                  *           3444240 >= (1 << 36) / (x) >= 275 */
 105
 106                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 107             }
 108         } else if (fdsp->fdct == ff_fdct_ifast) {
 109             for (i = 0; i < 64; i++) {
 110                 const int j = s->idsp.idct_permutation[i];
 111                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 112                 /* 16 <= qscale * quant_matrix[i] <= 7905
 113                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 114                  *             19952 <=              x  <= 249205026
 115                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 116                  *           3444240 >= (1 << 36) / (x) >= 275 */
 117
 118                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 119             }
 120         } else {
 121             for (i = 0; i < 64; i++) {
 122                 const int j = s->idsp.idct_permutation[i];
 123                 int64_t den = (int64_t) qscale * quant_matrix[j];
 124                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 125                  * Assume x = qscale * quant_matrix[i]
 126                  * So             16 <=              x  <= 7905
 127                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 128                  * so          32768 >= (1 << 19) / (x) >= 67 */
 129                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 131                 //                    (qscale * quant_matrix[i]);
 132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 133
 134                 if (qmat16[qscale][0][i] == 0 ||
 135                     qmat16[qscale][0][i] == 128 * 256)
 136                     qmat16[qscale][0][i] = 128 * 256 - 1;
 137                 qmat16[qscale][1][i] =
 138                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 139                                 qmat16[qscale][0][i]);
 140             }
 141         }
 142
 143         for (i = intra; i < 64; i++) {
 144             int64_t max = 8191;
 145             if (fdsp->fdct == ff_fdct_ifast) {
 146                 max = (8191LL * ff_aanscales[i]) >> 14;
 147             }
 148             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 149                 shift++;
 150             }
 151         }
 152     }
 153     if (shift) {
 154         av_log(NULL, AV_LOG_INFO,
 155                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 156                QMAT_SHIFT - shift);
 157     }
 158 }
 159
 160 static inline void update_qscale(MpegEncContext *s)
 161 {
 162     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 163                 (FF_LAMBDA_SHIFT + 7);
 164     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 165
 166     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 167                  FF_LAMBDA_SHIFT;
 168 }
 169
 170 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 171 {
 172     int i;
 173
 174     if (matrix) {
 175         put_bits(pb, 1, 1);
 176         for (i = 0; i < 64; i++) {
 177             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 178         }
 179     } else
 180         put_bits(pb, 1, 0);
 181 }
 182
 183 /**
 184  * init s->current_picture.qscale_table from s->lambda_table
 185  */
 186 void ff_init_qscale_tab(MpegEncContext *s)
 187 {
 188     int8_t * const qscale_table = s->current_picture.qscale_table;
 189     int i;
 190
 191     for (i = 0; i < s->mb_num; i++) {
 192         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 193         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 194         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 195                                                   s->avctx->qmax);
 196     }
 197 }
 198
 199 static void update_duplicate_context_after_me(MpegEncContext *dst,
 200                                               MpegEncContext *src)
 201 {
 202 #define COPY(a) dst->a= src->a
 203     COPY(pict_type);
 204     COPY(current_picture);
 205     COPY(f_code);
 206     COPY(b_code);
 207     COPY(qscale);
 208     COPY(lambda);
 209     COPY(lambda2);
 210     COPY(picture_in_gop_number);
 211     COPY(gop_picture_number);
 212     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 213     COPY(progressive_frame);    // FIXME don't set in encode_header
 214     COPY(partitioned_frame);    // FIXME don't set in encode_header
 215 #undef COPY
 216 }
 217
 218 /**
 219  * Set the given MpegEncContext to defaults for encoding.
 220  * the changed fields will not depend upon the prior state of the MpegEncContext.
 221  */
 222 static void mpv_encode_defaults(MpegEncContext *s)
 223 {
 224     int i;
 225     ff_mpv_common_defaults(s);
 226
 227     for (i = -16; i < 16; i++) {
 228         default_fcode_tab[i + MAX_MV] = 1;
 229     }
 230     s->me.mv_penalty = default_mv_penalty;
 231     s->fcode_tab     = default_fcode_tab;
 232
 233     s->input_picture_number  = 0;
 234     s->picture_in_gop_number = 0;
 235 }
 236
 237 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 238     if (ARCH_X86)
 239         ff_dct_encode_init_x86(s);
 240
 241     if (CONFIG_H263_ENCODER)
 242         ff_h263dsp_init(&s->h263dsp);
 243     if (!s->dct_quantize)
 244         s->dct_quantize = ff_dct_quantize_c;
 245     if (!s->denoise_dct)
 246         s->denoise_dct  = denoise_dct_c;
 247     s->fast_dct_quantize = s->dct_quantize;
 248     if (s->avctx->trellis)
 249         s->dct_quantize  = dct_quantize_trellis_c;
 250
 251     return 0;
 252 }
 253
 254 /* init video encoder */
 255 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 256 {
 257     MpegEncContext *s = avctx->priv_data;
 258     int i, ret, format_supported;
 259
 260     mpv_encode_defaults(s);
 261
 262     switch (avctx->codec_id) {
 263     case AV_CODEC_ID_MPEG2VIDEO:
 264         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 265             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 266             av_log(avctx, AV_LOG_ERROR,
 267                    "only YUV420 and YUV422 are supported\n");
 268             return -1;
 269         }
 270         break;
 271     case AV_CODEC_ID_MJPEG:
 272     case AV_CODEC_ID_AMV:
 273         format_supported = 0;
 274         /* JPEG color space */
 275         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 276             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 277             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 278             (avctx->color_range == AVCOL_RANGE_JPEG &&
 279              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 280               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 281               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 282             format_supported = 1;
 283         /* MPEG color space */
 284         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 285                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 286                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 287                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 288             format_supported = 1;
 289
 290         if (!format_supported) {
 291             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 292             return -1;
 293         }
 294         break;
 295     default:
 296         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 297             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 298             return -1;
 299         }
 300     }
 301
 302     switch (avctx->pix_fmt) {
 303     case AV_PIX_FMT_YUVJ444P:
 304     case AV_PIX_FMT_YUV444P:
 305         s->chroma_format = CHROMA_444;
 306         break;
 307     case AV_PIX_FMT_YUVJ422P:
 308     case AV_PIX_FMT_YUV422P:
 309         s->chroma_format = CHROMA_422;
 310         break;
 311     case AV_PIX_FMT_YUVJ420P:
 312     case AV_PIX_FMT_YUV420P:
 313     default:
 314         s->chroma_format = CHROMA_420;
 315         break;
 316     }
 317
 318     s->bit_rate = avctx->bit_rate;
 319     s->width    = avctx->width;
 320     s->height   = avctx->height;
 321     if (avctx->gop_size > 600 &&
 322         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 323         av_log(avctx, AV_LOG_WARNING,
 324                "keyframe interval too large!, reducing it from %d to %d\n",
 325                avctx->gop_size, 600);
 326         avctx->gop_size = 600;
 327     }
 328     s->gop_size     = avctx->gop_size;
 329     s->avctx        = avctx;
 330     s->flags        = avctx->flags;
 331     s->flags2       = avctx->flags2;
 332     if (avctx->max_b_frames > MAX_B_FRAMES) {
 333         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 334                "is %d.\n", MAX_B_FRAMES);
 335         avctx->max_b_frames = MAX_B_FRAMES;
 336     }
 337     s->max_b_frames = avctx->max_b_frames;
 338     s->codec_id     = avctx->codec->id;
 339     s->strict_std_compliance = avctx->strict_std_compliance;
 340     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 341     s->mpeg_quant         = avctx->mpeg_quant;
 342     s->rtp_mode           = !!avctx->rtp_payload_size;
 343     s->intra_dc_precision = avctx->intra_dc_precision;
 344
 345     // workaround some differences between how applications specify dc precision
 346     if (s->intra_dc_precision < 0) {
 347         s->intra_dc_precision += 8;
 348     } else if (s->intra_dc_precision >= 8)
 349         s->intra_dc_precision -= 8;
 350
 351     if (s->intra_dc_precision < 0) {
 352         av_log(avctx, AV_LOG_ERROR,
 353                 "intra dc precision must be positive, note some applications use"
 354                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 355         return AVERROR(EINVAL);
 356     }
 357
 358     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 359         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 360         return AVERROR(EINVAL);
 361     }
 362     s->user_specified_pts = AV_NOPTS_VALUE;
 363
 364     if (s->gop_size <= 1) {
 365         s->intra_only = 1;
 366         s->gop_size   = 12;
 367     } else {
 368         s->intra_only = 0;
 369     }
 370
 371     s->me_method = avctx->me_method;
 372
 373     /* Fixed QSCALE */
 374     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 375
 376 #if FF_API_MPV_OPT
 377     FF_DISABLE_DEPRECATION_WARNINGS
 378     if (avctx->border_masking != 0.0)
 379         s->border_masking = avctx->border_masking;
 380     FF_ENABLE_DEPRECATION_WARNINGS
 381 #endif
 382
 383     s->adaptive_quant = (s->avctx->lumi_masking ||
 384                          s->avctx->dark_masking ||
 385                          s->avctx->temporal_cplx_masking ||
 386                          s->avctx->spatial_cplx_masking  ||
 387                          s->avctx->p_masking      ||
 388                          s->border_masking ||
 389                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 390                         !s->fixed_qscale;
 391
 392     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 393
 394     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 395         switch(avctx->codec_id) {
 396         case AV_CODEC_ID_MPEG1VIDEO:
 397         case AV_CODEC_ID_MPEG2VIDEO:
 398             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 399             break;
 400         case AV_CODEC_ID_MPEG4:
 401         case AV_CODEC_ID_MSMPEG4V1:
 402         case AV_CODEC_ID_MSMPEG4V2:
 403         case AV_CODEC_ID_MSMPEG4V3:
 404             if       (avctx->rc_max_rate >= 15000000) {
 405                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 406             } else if(avctx->rc_max_rate >=  2000000) {
 407                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 408             } else if(avctx->rc_max_rate >=   384000) {
 409                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 410             } else
 411                 avctx->rc_buffer_size = 40;
 412             avctx->rc_buffer_size *= 16384;
 413             break;
 414         }
 415         if (avctx->rc_buffer_size) {
 416             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 417         }
 418     }
 419
 420     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 421         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 422         return -1;
 423     }
 424
 425     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 426         av_log(avctx, AV_LOG_INFO,
 427                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 428     }
 429
 430     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 431         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 432         return -1;
 433     }
 434
 435     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 436         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 437         return -1;
 438     }
 439
 440     if (avctx->rc_max_rate &&
 441         avctx->rc_max_rate == avctx->bit_rate &&
 442         avctx->rc_max_rate != avctx->rc_min_rate) {
 443         av_log(avctx, AV_LOG_INFO,
 444                "impossible bitrate constraints, this will fail\n");
 445     }
 446
 447     if (avctx->rc_buffer_size &&
 448         avctx->bit_rate * (int64_t)avctx->time_base.num >
 449             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 450         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 451         return -1;
 452     }
 453
 454     if (!s->fixed_qscale &&
 455         avctx->bit_rate * av_q2d(avctx->time_base) >
 456             avctx->bit_rate_tolerance) {
 457         av_log(avctx, AV_LOG_WARNING,
 458                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 459         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 460     }
 461
 462     if (s->avctx->rc_max_rate &&
 463         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 464         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 465          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 466         90000LL * (avctx->rc_buffer_size - 1) >
 467             s->avctx->rc_max_rate * 0xFFFFLL) {
 468         av_log(avctx, AV_LOG_INFO,
 469                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 470                "specified vbv buffer is too large for the given bitrate!\n");
 471     }
 472
 473     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 474         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 475         s->codec_id != AV_CODEC_ID_FLV1) {
 476         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 477         return -1;
 478     }
 479
 480     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 481         av_log(avctx, AV_LOG_ERROR,
 482                "OBMC is only supported with simple mb decision\n");
 483         return -1;
 484     }
 485
 486     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 487         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 488         return -1;
 489     }
 490
 491     if (s->max_b_frames                    &&
 492         s->codec_id != AV_CODEC_ID_MPEG4      &&
 493         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 494         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 495         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 496         return -1;
 497     }
 498     if (s->max_b_frames < 0) {
 499         av_log(avctx, AV_LOG_ERROR,
 500                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 501         return -1;
 502     }
 503
 504     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 505          s->codec_id == AV_CODEC_ID_H263  ||
 506          s->codec_id == AV_CODEC_ID_H263P) &&
 507         (avctx->sample_aspect_ratio.num > 255 ||
 508          avctx->sample_aspect_ratio.den > 255)) {
 509         av_log(avctx, AV_LOG_WARNING,
 510                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 511                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 512         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 513                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 514     }
 515
 516     if ((s->codec_id == AV_CODEC_ID_H263  ||
 517          s->codec_id == AV_CODEC_ID_H263P) &&
 518         (avctx->width  > 2048 ||
 519          avctx->height > 1152 )) {
 520         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 521         return -1;
 522     }
 523     if ((s->codec_id == AV_CODEC_ID_H263  ||
 524          s->codec_id == AV_CODEC_ID_H263P) &&
 525         ((avctx->width &3) ||
 526          (avctx->height&3) )) {
 527         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 528         return -1;
 529     }
 530
 531     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 532         (avctx->width  > 4095 ||
 533          avctx->height > 4095 )) {
 534         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 535         return -1;
 536     }
 537
 538     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 539         (avctx->width  > 16383 ||
 540          avctx->height > 16383 )) {
 541         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 542         return -1;
 543     }
 544
 545     if (s->codec_id == AV_CODEC_ID_RV10 &&
 546         (avctx->width &15 ||
 547          avctx->height&15 )) {
 548         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 549         return AVERROR(EINVAL);
 550     }
 551
 552     if (s->codec_id == AV_CODEC_ID_RV20 &&
 553         (avctx->width &3 ||
 554          avctx->height&3 )) {
 555         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 556         return AVERROR(EINVAL);
 557     }
 558
 559     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 560          s->codec_id == AV_CODEC_ID_WMV2) &&
 561          avctx->width & 1) {
 562          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 563          return -1;
 564     }
 565
 566     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 567         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 568         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 569         return -1;
 570     }
 571
 572     // FIXME mpeg2 uses that too
 573     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 574                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 575         av_log(avctx, AV_LOG_ERROR,
 576                "mpeg2 style quantization not supported by codec\n");
 577         return -1;
 578     }
 579
 580     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 581         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 582         return -1;
 583     }
 584
 585     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 586         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 587         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 588         return -1;
 589     }
 590
 591     if (s->avctx->scenechange_threshold < 1000000000 &&
 592         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 593         av_log(avctx, AV_LOG_ERROR,
 594                "closed gop with scene change detection are not supported yet, "
 595                "set threshold to 1000000000\n");
 596         return -1;
 597     }
 598
 599     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 600         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 601             av_log(avctx, AV_LOG_ERROR,
 602                   "low delay forcing is only available for mpeg2\n");
 603             return -1;
 604         }
 605         if (s->max_b_frames != 0) {
 606             av_log(avctx, AV_LOG_ERROR,
 607                    "b frames cannot be used with low delay\n");
 608             return -1;
 609         }
 610     }
 611
 612     if (s->q_scale_type == 1) {
 613         if (avctx->qmax > 12) {
 614             av_log(avctx, AV_LOG_ERROR,
 615                    "non linear quant only supports qmax <= 12 currently\n");
 616             return -1;
 617         }
 618     }
 619
 620     if (s->avctx->thread_count > 1         &&
 621         s->codec_id != AV_CODEC_ID_MPEG4      &&
 622         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 623         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 624         s->codec_id != AV_CODEC_ID_MJPEG      &&
 625         (s->codec_id != AV_CODEC_ID_H263P)) {
 626         av_log(avctx, AV_LOG_ERROR,
 627                "multi threaded encoding not supported by codec\n");
 628         return -1;
 629     }
 630
 631     if (s->avctx->thread_count < 1) {
 632         av_log(avctx, AV_LOG_ERROR,
 633                "automatic thread number detection not supported by codec, "
 634                "patch welcome\n");
 635         return -1;
 636     }
 637
 638     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 639         s->rtp_mode = 1;
 640
 641     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 642         s->h263_slice_structured = 1;
 643
 644     if (!avctx->time_base.den || !avctx->time_base.num) {
 645         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 646         return -1;
 647     }
 648
 649     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 650         av_log(avctx, AV_LOG_INFO,
 651                "notice: b_frame_strategy only affects the first pass\n");
 652         avctx->b_frame_strategy = 0;
 653     }
 654
 655     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 656     if (i > 1) {
 657         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 658         avctx->time_base.den /= i;
 659         avctx->time_base.num /= i;
 660         //return -1;
 661     }
 662
 663     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 664         // (a + x * 3 / 8) / x
 665         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 666         s->inter_quant_bias = 0;
 667     } else {
 668         s->intra_quant_bias = 0;
 669         // (a - x / 4) / x
 670         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 671     }
 672
 673     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 674         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 675         return AVERROR(EINVAL);
 676     }
 677
 678     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 679         s->intra_quant_bias = avctx->intra_quant_bias;
 680     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 681         s->inter_quant_bias = avctx->inter_quant_bias;
 682
 683     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 684
 685     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 686         s->avctx->time_base.den > (1 << 16) - 1) {
 687         av_log(avctx, AV_LOG_ERROR,
 688                "timebase %d/%d not supported by MPEG 4 standard, "
 689                "the maximum admitted value for the timebase denominator "
 690                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 691                (1 << 16) - 1);
 692         return -1;
 693     }
 694     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 695
 696     switch (avctx->codec->id) {
 697     case AV_CODEC_ID_MPEG1VIDEO:
 698         s->out_format = FMT_MPEG1;
 699         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 700         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 701         break;
 702     case AV_CODEC_ID_MPEG2VIDEO:
 703         s->out_format = FMT_MPEG1;
 704         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 705         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 706         s->rtp_mode   = 1;
 707         break;
 708     case AV_CODEC_ID_MJPEG:
 709     case AV_CODEC_ID_AMV:
 710         s->out_format = FMT_MJPEG;
 711         s->intra_only = 1; /* force intra only for jpeg */
 712         if (!CONFIG_MJPEG_ENCODER ||
 713             ff_mjpeg_encode_init(s) < 0)
 714             return -1;
 715         avctx->delay = 0;
 716         s->low_delay = 1;
 717         break;
 718     case AV_CODEC_ID_H261:
 719         if (!CONFIG_H261_ENCODER)
 720             return -1;
 721         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 722             av_log(avctx, AV_LOG_ERROR,
 723                    "The specified picture size of %dx%d is not valid for the "
 724                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 725                     s->width, s->height);
 726             return -1;
 727         }
 728         s->out_format = FMT_H261;
 729         avctx->delay  = 0;
 730         s->low_delay  = 1;
 731         s->rtp_mode   = 0; /* Sliced encoding not supported */
 732         break;
 733     case AV_CODEC_ID_H263:
 734         if (!CONFIG_H263_ENCODER)
 735             return -1;
 736         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 737                              s->width, s->height) == 8) {
 738             av_log(avctx, AV_LOG_ERROR,
 739                    "The specified picture size of %dx%d is not valid for "
 740                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 741                    "352x288, 704x576, and 1408x1152. "
 742                    "Try H.263+.\n", s->width, s->height);
 743             return -1;
 744         }
 745         s->out_format = FMT_H263;
 746         avctx->delay  = 0;
 747         s->low_delay  = 1;
 748         break;
 749     case AV_CODEC_ID_H263P:
 750         s->out_format = FMT_H263;
 751         s->h263_plus  = 1;
 752         /* Fx */
 753         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 754         s->modified_quant  = s->h263_aic;
 755         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 756         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 757
 758         /* /Fx */
 759         /* These are just to be sure */
 760         avctx->delay = 0;
 761         s->low_delay = 1;
 762         break;
 763     case AV_CODEC_ID_FLV1:
 764         s->out_format      = FMT_H263;
 765         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 766         s->unrestricted_mv = 1;
 767         s->rtp_mode  = 0; /* don't allow GOB */
 768         avctx->delay = 0;
 769         s->low_delay = 1;
 770         break;
 771     case AV_CODEC_ID_RV10:
 772         s->out_format = FMT_H263;
 773         avctx->delay  = 0;
 774         s->low_delay  = 1;
 775         break;
 776     case AV_CODEC_ID_RV20:
 777         s->out_format      = FMT_H263;
 778         avctx->delay       = 0;
 779         s->low_delay       = 1;
 780         s->modified_quant  = 1;
 781         s->h263_aic        = 1;
 782         s->h263_plus       = 1;
 783         s->loop_filter     = 1;
 784         s->unrestricted_mv = 0;
 785         break;
 786     case AV_CODEC_ID_MPEG4:
 787         s->out_format      = FMT_H263;
 788         s->h263_pred       = 1;
 789         s->unrestricted_mv = 1;
 790         s->low_delay       = s->max_b_frames ? 0 : 1;
 791         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 792         break;
 793     case AV_CODEC_ID_MSMPEG4V2:
 794         s->out_format      = FMT_H263;
 795         s->h263_pred       = 1;
 796         s->unrestricted_mv = 1;
 797         s->msmpeg4_version = 2;
 798         avctx->delay       = 0;
 799         s->low_delay       = 1;
 800         break;
 801     case AV_CODEC_ID_MSMPEG4V3:
 802         s->out_format        = FMT_H263;
 803         s->h263_pred         = 1;
 804         s->unrestricted_mv   = 1;
 805         s->msmpeg4_version   = 3;
 806         s->flipflop_rounding = 1;
 807         avctx->delay         = 0;
 808         s->low_delay         = 1;
 809         break;
 810     case AV_CODEC_ID_WMV1:
 811         s->out_format        = FMT_H263;
 812         s->h263_pred         = 1;
 813         s->unrestricted_mv   = 1;
 814         s->msmpeg4_version   = 4;
 815         s->flipflop_rounding = 1;
 816         avctx->delay         = 0;
 817         s->low_delay         = 1;
 818         break;
 819     case AV_CODEC_ID_WMV2:
 820         s->out_format        = FMT_H263;
 821         s->h263_pred         = 1;
 822         s->unrestricted_mv   = 1;
 823         s->msmpeg4_version   = 5;
 824         s->flipflop_rounding = 1;
 825         avctx->delay         = 0;
 826         s->low_delay         = 1;
 827         break;
 828     default:
 829         return -1;
 830     }
 831
 832     avctx->has_b_frames = !s->low_delay;
 833
 834     s->encoding = 1;
 835
 836     s->progressive_frame    =
 837     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 838                                                 CODEC_FLAG_INTERLACED_ME) ||
 839                                 s->alternate_scan);
 840
 841     /* init */
 842     ff_mpv_idct_init(s);
 843     if (ff_mpv_common_init(s) < 0)
 844         return -1;
 845
 846     ff_fdctdsp_init(&s->fdsp, avctx);
 847     ff_me_cmp_init(&s->mecc, avctx);
 848     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 849     ff_pixblockdsp_init(&s->pdsp, avctx);
 850     ff_qpeldsp_init(&s->qdsp);
 851
 852     s->avctx->coded_frame = s->current_picture.f;
 853
 854     if (s->msmpeg4_version) {
 855         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 856                           2 * 2 * (MAX_LEVEL + 1) *
 857                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 858     }
 859     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 860
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 866     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 867     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 868                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 869     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 870                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 871
 872     if (s->avctx->noise_reduction) {
 873         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 874                           2 * 64 * sizeof(uint16_t), fail);
 875     }
 876
 877     ff_dct_encode_init(s);
 878
 879     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 880         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 881
 882     s->quant_precision = 5;
 883
 884     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 885     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 886
 887     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 888         ff_h261_encode_init(s);
 889     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 890         ff_h263_encode_init(s);
 891     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 892         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 893             return ret;
 894     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 895         && s->out_format == FMT_MPEG1)
 896         ff_mpeg1_encode_init(s);
 897
 898     /* init q matrix */
 899     for (i = 0; i < 64; i++) {
 900         int j = s->idsp.idct_permutation[i];
 901         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 902             s->mpeg_quant) {
 903             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 904             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 905         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 906             s->intra_matrix[j] =
 907             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 908         } else {
 909             /* mpeg1/2 */
 910             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 911             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 912         }
 913         if (s->avctx->intra_matrix)
 914             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 915         if (s->avctx->inter_matrix)
 916             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 917     }
 918
 919     /* precompute matrix */
 920     /* for mjpeg, we do include qscale in the matrix */
 921     if (s->out_format != FMT_MJPEG) {
 922         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 923                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 924                           31, 1);
 925         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 926                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 927                           31, 0);
 928     }
 929
 930     if (ff_rate_control_init(s) < 0)
 931         return -1;
 932
 933 #if FF_API_ERROR_RATE
 934     FF_DISABLE_DEPRECATION_WARNINGS
 935     if (avctx->error_rate)
 936         s->error_rate = avctx->error_rate;
 937     FF_ENABLE_DEPRECATION_WARNINGS;
 938 #endif
 939
 940 #if FF_API_NORMALIZE_AQP
 941     FF_DISABLE_DEPRECATION_WARNINGS
 942     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 943         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 944     FF_ENABLE_DEPRECATION_WARNINGS;
 945 #endif
 946
 947 #if FF_API_MV0
 948     FF_DISABLE_DEPRECATION_WARNINGS
 949     if (avctx->flags & CODEC_FLAG_MV0)
 950         s->mpv_flags |= FF_MPV_FLAG_MV0;
 951     FF_ENABLE_DEPRECATION_WARNINGS
 952 #endif
 953
 954 #if FF_API_MPV_OPT
 955     FF_DISABLE_DEPRECATION_WARNINGS
 956     if (avctx->rc_qsquish != 0.0)
 957         s->rc_qsquish = avctx->rc_qsquish;
 958     if (avctx->rc_qmod_amp != 0.0)
 959         s->rc_qmod_amp = avctx->rc_qmod_amp;
 960     if (avctx->rc_qmod_freq)
 961         s->rc_qmod_freq = avctx->rc_qmod_freq;
 962     if (avctx->rc_buffer_aggressivity != 1.0)
 963         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 964     if (avctx->rc_initial_cplx != 0.0)
 965         s->rc_initial_cplx = avctx->rc_initial_cplx;
 966     if (avctx->lmin)
 967         s->lmin = avctx->lmin;
 968     if (avctx->lmax)
 969         s->lmax = avctx->lmax;
 970
 971     if (avctx->rc_eq) {
 972         av_freep(&s->rc_eq);
 973         s->rc_eq = av_strdup(avctx->rc_eq);
 974         if (!s->rc_eq)
 975             return AVERROR(ENOMEM);
 976     }
 977     FF_ENABLE_DEPRECATION_WARNINGS
 978 #endif
 979
 980     if (avctx->b_frame_strategy == 2) {
 981         for (i = 0; i < s->max_b_frames + 2; i++) {
 982             s->tmp_frames[i] = av_frame_alloc();
 983             if (!s->tmp_frames[i])
 984                 return AVERROR(ENOMEM);
 985
 986             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 987             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 988             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 989
 990             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 991             if (ret < 0)
 992                 return ret;
 993         }
 994     }
 995
 996     return 0;
 997 fail:
 998     ff_mpv_encode_end(avctx);
 999     return AVERROR_UNKNOWN;
1000 }
1001
1002 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1003 {
1004     MpegEncContext *s = avctx->priv_data;
1005     int i;
1006
1007     ff_rate_control_uninit(s);
1008
1009     ff_mpv_common_end(s);
1010     if (CONFIG_MJPEG_ENCODER &&
1011         s->out_format == FMT_MJPEG)
1012         ff_mjpeg_encode_close(s);
1013
1014     av_freep(&avctx->extradata);
1015
1016     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1017         av_frame_free(&s->tmp_frames[i]);
1018
1019     ff_free_picture_tables(&s->new_picture);
1020     ff_mpeg_unref_picture(s, &s->new_picture);
1021
1022     av_freep(&s->avctx->stats_out);
1023     av_freep(&s->ac_stats);
1024
1025     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1026     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1027     s->q_chroma_intra_matrix=   NULL;
1028     s->q_chroma_intra_matrix16= NULL;
1029     av_freep(&s->q_intra_matrix);
1030     av_freep(&s->q_inter_matrix);
1031     av_freep(&s->q_intra_matrix16);
1032     av_freep(&s->q_inter_matrix16);
1033     av_freep(&s->input_picture);
1034     av_freep(&s->reordered_input_picture);
1035     av_freep(&s->dct_offset);
1036
1037     return 0;
1038 }
1039
1040 static int get_sae(uint8_t *src, int ref, int stride)
1041 {
1042     int x,y;
1043     int acc = 0;
1044
1045     for (y = 0; y < 16; y++) {
1046         for (x = 0; x < 16; x++) {
1047             acc += FFABS(src[x + y * stride] - ref);
1048         }
1049     }
1050
1051     return acc;
1052 }
1053
1054 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1055                            uint8_t *ref, int stride)
1056 {
1057     int x, y, w, h;
1058     int acc = 0;
1059
1060     w = s->width  & ~15;
1061     h = s->height & ~15;
1062
1063     for (y = 0; y < h; y += 16) {
1064         for (x = 0; x < w; x += 16) {
1065             int offset = x + y * stride;
1066             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1067                                       stride, 16);
1068             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1069             int sae  = get_sae(src + offset, mean, stride);
1070
1071             acc += sae + 500 < sad;
1072         }
1073     }
1074     return acc;
1075 }
1076
1077
1078 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1079 {
1080     Picture *pic = NULL;
1081     int64_t pts;
1082     int i, display_picture_number = 0, ret;
1083     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1084                                                  (s->low_delay ? 0 : 1);
1085     int direct = 1;
1086
1087     if (pic_arg) {
1088         pts = pic_arg->pts;
1089         display_picture_number = s->input_picture_number++;
1090
1091         if (pts != AV_NOPTS_VALUE) {
1092             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1093                 int64_t last = s->user_specified_pts;
1094
1095                 if (pts <= last) {
1096                     av_log(s->avctx, AV_LOG_ERROR,
1097                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1098                            pts, last);
1099                     return AVERROR(EINVAL);
1100                 }
1101
1102                 if (!s->low_delay && display_picture_number == 1)
1103                     s->dts_delta = pts - last;
1104             }
1105             s->user_specified_pts = pts;
1106         } else {
1107             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1108                 s->user_specified_pts =
1109                 pts = s->user_specified_pts + 1;
1110                 av_log(s->avctx, AV_LOG_INFO,
1111                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1112                        pts);
1113             } else {
1114                 pts = display_picture_number;
1115             }
1116         }
1117     }
1118
1119     if (pic_arg) {
1120         if (!pic_arg->buf[0] ||
1121             pic_arg->linesize[0] != s->linesize ||
1122             pic_arg->linesize[1] != s->uvlinesize ||
1123             pic_arg->linesize[2] != s->uvlinesize)
1124             direct = 0;
1125         if ((s->width & 15) || (s->height & 15))
1126             direct = 0;
1127         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1128             direct = 0;
1129         if (s->linesize & (STRIDE_ALIGN-1))
1130             direct = 0;
1131
1132         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1133                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1134
1135         i = ff_find_unused_picture(s, direct);
1136         if (i < 0)
1137             return i;
1138
1139         pic = &s->picture[i];
1140         pic->reference = 3;
1141
1142         if (direct) {
1143             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1144                 return ret;
1145             if (ff_alloc_picture(s, pic, 1) < 0) {
1146                 return -1;
1147             }
1148         } else {
1149             if (ff_alloc_picture(s, pic, 0) < 0) {
1150                 return -1;
1151             }
1152
1153             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1154                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1155                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1156                 // empty
1157             } else {
1158                 int h_chroma_shift, v_chroma_shift;
1159                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1160                                                  &h_chroma_shift,
1161                                                  &v_chroma_shift);
1162
1163                 for (i = 0; i < 3; i++) {
1164                     int src_stride = pic_arg->linesize[i];
1165                     int dst_stride = i ? s->uvlinesize : s->linesize;
1166                     int h_shift = i ? h_chroma_shift : 0;
1167                     int v_shift = i ? v_chroma_shift : 0;
1168                     int w = s->width  >> h_shift;
1169                     int h = s->height >> v_shift;
1170                     uint8_t *src = pic_arg->data[i];
1171                     uint8_t *dst = pic->f->data[i];
1172                     int vpad = 16;
1173
1174                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1175                         && !s->progressive_sequence
1176                         && FFALIGN(s->height, 32) - s->height > 16)
1177                         vpad = 32;
1178
1179                     if (!s->avctx->rc_buffer_size)
1180                         dst += INPLACE_OFFSET;
1181
1182                     if (src_stride == dst_stride)
1183                         memcpy(dst, src, src_stride * h);
1184                     else {
1185                         int h2 = h;
1186                         uint8_t *dst2 = dst;
1187                         while (h2--) {
1188                             memcpy(dst2, src, w);
1189                             dst2 += dst_stride;
1190                             src += src_stride;
1191                         }
1192                     }
1193                     if ((s->width & 15) || (s->height & (vpad-1))) {
1194                         s->mpvencdsp.draw_edges(dst, dst_stride,
1195                                                 w, h,
1196                                                 16 >> h_shift,
1197                                                 vpad >> v_shift,
1198                                                 EDGE_BOTTOM);
1199                     }
1200                 }
1201             }
1202         }
1203         ret = av_frame_copy_props(pic->f, pic_arg);
1204         if (ret < 0)
1205             return ret;
1206
1207         pic->f->display_picture_number = display_picture_number;
1208         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1209     }
1210
1211     /* shift buffer entries */
1212     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1213         s->input_picture[i - 1] = s->input_picture[i];
1214
1215     s->input_picture[encoding_delay] = (Picture*) pic;
1216
1217     return 0;
1218 }
1219
1220 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1221 {
1222     int x, y, plane;
1223     int score = 0;
1224     int64_t score64 = 0;
1225
1226     for (plane = 0; plane < 3; plane++) {
1227         const int stride = p->f->linesize[plane];
1228         const int bw = plane ? 1 : 2;
1229         for (y = 0; y < s->mb_height * bw; y++) {
1230             for (x = 0; x < s->mb_width * bw; x++) {
1231                 int off = p->shared ? 0 : 16;
1232                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1233                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1234                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1235
1236                 switch (FFABS(s->avctx->frame_skip_exp)) {
1237                 case 0: score    =  FFMAX(score, v);          break;
1238                 case 1: score   += FFABS(v);                  break;
1239                 case 2: score64 += v * (int64_t)v;                       break;
1240                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1241                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1242                 }
1243             }
1244         }
1245     }
1246     emms_c();
1247
1248     if (score)
1249         score64 = score;
1250     if (s->avctx->frame_skip_exp < 0)
1251         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1252                       -1.0/s->avctx->frame_skip_exp);
1253
1254     if (score64 < s->avctx->frame_skip_threshold)
1255         return 1;
1256     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1257         return 1;
1258     return 0;
1259 }
1260
1261 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1262 {
1263     AVPacket pkt = { 0 };
1264     int ret, got_output;
1265
1266     av_init_packet(&pkt);
1267     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1268     if (ret < 0)
1269         return ret;
1270
1271     ret = pkt.size;
1272     av_free_packet(&pkt);
1273     return ret;
1274 }
1275
1276 static int estimate_best_b_count(MpegEncContext *s)
1277 {
1278     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1279     AVCodecContext *c = avcodec_alloc_context3(NULL);
1280     const int scale = s->avctx->brd_scale;
1281     int i, j, out_size, p_lambda, b_lambda, lambda2;
1282     int64_t best_rd  = INT64_MAX;
1283     int best_b_count = -1;
1284
1285     av_assert0(scale >= 0 && scale <= 3);
1286
1287     //emms_c();
1288     //s->next_picture_ptr->quality;
1289     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1290     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1291     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1292     if (!b_lambda) // FIXME we should do this somewhere else
1293         b_lambda = p_lambda;
1294     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1295                FF_LAMBDA_SHIFT;
1296
1297     c->width        = s->width  >> scale;
1298     c->height       = s->height >> scale;
1299     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1300     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1301     c->mb_decision  = s->avctx->mb_decision;
1302     c->me_cmp       = s->avctx->me_cmp;
1303     c->mb_cmp       = s->avctx->mb_cmp;
1304     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1305     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1306     c->time_base    = s->avctx->time_base;
1307     c->max_b_frames = s->max_b_frames;
1308
1309     if (avcodec_open2(c, codec, NULL) < 0)
1310         return -1;
1311
1312     for (i = 0; i < s->max_b_frames + 2; i++) {
1313         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1314                                                 s->next_picture_ptr;
1315         uint8_t *data[4];
1316
1317         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1318             pre_input = *pre_input_ptr;
1319             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1320
1321             if (!pre_input.shared && i) {
1322                 data[0] += INPLACE_OFFSET;
1323                 data[1] += INPLACE_OFFSET;
1324                 data[2] += INPLACE_OFFSET;
1325             }
1326
1327             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1328                                        s->tmp_frames[i]->linesize[0],
1329                                        data[0],
1330                                        pre_input.f->linesize[0],
1331                                        c->width, c->height);
1332             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1333                                        s->tmp_frames[i]->linesize[1],
1334                                        data[1],
1335                                        pre_input.f->linesize[1],
1336                                        c->width >> 1, c->height >> 1);
1337             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1338                                        s->tmp_frames[i]->linesize[2],
1339                                        data[2],
1340                                        pre_input.f->linesize[2],
1341                                        c->width >> 1, c->height >> 1);
1342         }
1343     }
1344
1345     for (j = 0; j < s->max_b_frames + 1; j++) {
1346         int64_t rd = 0;
1347
1348         if (!s->input_picture[j])
1349             break;
1350
1351         c->error[0] = c->error[1] = c->error[2] = 0;
1352
1353         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1354         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1355
1356         out_size = encode_frame(c, s->tmp_frames[0]);
1357
1358         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1359
1360         for (i = 0; i < s->max_b_frames + 1; i++) {
1361             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1362
1363             s->tmp_frames[i + 1]->pict_type = is_p ?
1364                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1365             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1366
1367             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1368
1369             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1370         }
1371
1372         /* get the delayed frames */
1373         while (out_size) {
1374             out_size = encode_frame(c, NULL);
1375             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1376         }
1377
1378         rd += c->error[0] + c->error[1] + c->error[2];
1379
1380         if (rd < best_rd) {
1381             best_rd = rd;
1382             best_b_count = j;
1383         }
1384     }
1385
1386     avcodec_close(c);
1387     av_freep(&c);
1388
1389     return best_b_count;
1390 }
1391
1392 static int select_input_picture(MpegEncContext *s)
1393 {
1394     int i, ret;
1395
1396     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1397         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1398     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1399
1400     /* set next picture type & ordering */
1401     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1402         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1403             if (s->picture_in_gop_number < s->gop_size &&
1404                 s->next_picture_ptr &&
1405                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1406                 // FIXME check that te gop check above is +-1 correct
1407                 av_frame_unref(s->input_picture[0]->f);
1408
1409                 ff_vbv_update(s, 0);
1410
1411                 goto no_output_pic;
1412             }
1413         }
1414
1415         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1416             !s->next_picture_ptr || s->intra_only) {
1417             s->reordered_input_picture[0] = s->input_picture[0];
1418             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1419             s->reordered_input_picture[0]->f->coded_picture_number =
1420                 s->coded_picture_number++;
1421         } else {
1422             int b_frames;
1423
1424             if (s->flags & CODEC_FLAG_PASS2) {
1425                 for (i = 0; i < s->max_b_frames + 1; i++) {
1426                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1427
1428                     if (pict_num >= s->rc_context.num_entries)
1429                         break;
1430                     if (!s->input_picture[i]) {
1431                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1432                         break;
1433                     }
1434
1435                     s->input_picture[i]->f->pict_type =
1436                         s->rc_context.entry[pict_num].new_pict_type;
1437                 }
1438             }
1439
1440             if (s->avctx->b_frame_strategy == 0) {
1441                 b_frames = s->max_b_frames;
1442                 while (b_frames && !s->input_picture[b_frames])
1443                     b_frames--;
1444             } else if (s->avctx->b_frame_strategy == 1) {
1445                 for (i = 1; i < s->max_b_frames + 1; i++) {
1446                     if (s->input_picture[i] &&
1447                         s->input_picture[i]->b_frame_score == 0) {
1448                         s->input_picture[i]->b_frame_score =
1449                             get_intra_count(s,
1450                                             s->input_picture[i    ]->f->data[0],
1451                                             s->input_picture[i - 1]->f->data[0],
1452                                             s->linesize) + 1;
1453                     }
1454                 }
1455                 for (i = 0; i < s->max_b_frames + 1; i++) {
1456                     if (!s->input_picture[i] ||
1457                         s->input_picture[i]->b_frame_score - 1 >
1458                             s->mb_num / s->avctx->b_sensitivity)
1459                         break;
1460                 }
1461
1462                 b_frames = FFMAX(0, i - 1);
1463
1464                 /* reset scores */
1465                 for (i = 0; i < b_frames + 1; i++) {
1466                     s->input_picture[i]->b_frame_score = 0;
1467                 }
1468             } else if (s->avctx->b_frame_strategy == 2) {
1469                 b_frames = estimate_best_b_count(s);
1470             } else {
1471                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1472                 b_frames = 0;
1473             }
1474
1475             emms_c();
1476
1477             for (i = b_frames - 1; i >= 0; i--) {
1478                 int type = s->input_picture[i]->f->pict_type;
1479                 if (type && type != AV_PICTURE_TYPE_B)
1480                     b_frames = i;
1481             }
1482             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1483                 b_frames == s->max_b_frames) {
1484                 av_log(s->avctx, AV_LOG_ERROR,
1485                        "warning, too many b frames in a row\n");
1486             }
1487
1488             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1489                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1490                     s->gop_size > s->picture_in_gop_number) {
1491                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1492                 } else {
1493                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1494                         b_frames = 0;
1495                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1496                 }
1497             }
1498
1499             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1500                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1501                 b_frames--;
1502
1503             s->reordered_input_picture[0] = s->input_picture[b_frames];
1504             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1505                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1506             s->reordered_input_picture[0]->f->coded_picture_number =
1507                 s->coded_picture_number++;
1508             for (i = 0; i < b_frames; i++) {
1509                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1510                 s->reordered_input_picture[i + 1]->f->pict_type =
1511                     AV_PICTURE_TYPE_B;
1512                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1513                     s->coded_picture_number++;
1514             }
1515         }
1516     }
1517 no_output_pic:
1518     if (s->reordered_input_picture[0]) {
1519         s->reordered_input_picture[0]->reference =
1520            s->reordered_input_picture[0]->f->pict_type !=
1521                AV_PICTURE_TYPE_B ? 3 : 0;
1522
1523         ff_mpeg_unref_picture(s, &s->new_picture);
1524         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1525             return ret;
1526
1527         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1528             // input is a shared pix, so we can't modifiy it -> alloc a new
1529             // one & ensure that the shared one is reuseable
1530
1531             Picture *pic;
1532             int i = ff_find_unused_picture(s, 0);
1533             if (i < 0)
1534                 return i;
1535             pic = &s->picture[i];
1536
1537             pic->reference = s->reordered_input_picture[0]->reference;
1538             if (ff_alloc_picture(s, pic, 0) < 0) {
1539                 return -1;
1540             }
1541
1542             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1543             if (ret < 0)
1544                 return ret;
1545
1546             /* mark us unused / free shared pic */
1547             av_frame_unref(s->reordered_input_picture[0]->f);
1548             s->reordered_input_picture[0]->shared = 0;
1549
1550             s->current_picture_ptr = pic;
1551         } else {
1552             // input is not a shared pix -> reuse buffer for current_pix
1553             s->current_picture_ptr = s->reordered_input_picture[0];
1554             for (i = 0; i < 4; i++) {
1555                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1556             }
1557         }
1558         ff_mpeg_unref_picture(s, &s->current_picture);
1559         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1560                                        s->current_picture_ptr)) < 0)
1561             return ret;
1562
1563         s->picture_number = s->new_picture.f->display_picture_number;
1564     } else {
1565         ff_mpeg_unref_picture(s, &s->new_picture);
1566     }
1567     return 0;
1568 }
1569
1570 static void frame_end(MpegEncContext *s)
1571 {
1572     if (s->unrestricted_mv &&
1573         s->current_picture.reference &&
1574         !s->intra_only) {
1575         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1576         int hshift = desc->log2_chroma_w;
1577         int vshift = desc->log2_chroma_h;
1578         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1579                                 s->current_picture.f->linesize[0],
1580                                 s->h_edge_pos, s->v_edge_pos,
1581                                 EDGE_WIDTH, EDGE_WIDTH,
1582                                 EDGE_TOP | EDGE_BOTTOM);
1583         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1584                                 s->current_picture.f->linesize[1],
1585                                 s->h_edge_pos >> hshift,
1586                                 s->v_edge_pos >> vshift,
1587                                 EDGE_WIDTH >> hshift,
1588                                 EDGE_WIDTH >> vshift,
1589                                 EDGE_TOP | EDGE_BOTTOM);
1590         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1591                                 s->current_picture.f->linesize[2],
1592                                 s->h_edge_pos >> hshift,
1593                                 s->v_edge_pos >> vshift,
1594                                 EDGE_WIDTH >> hshift,
1595                                 EDGE_WIDTH >> vshift,
1596                                 EDGE_TOP | EDGE_BOTTOM);
1597     }
1598
1599     emms_c();
1600
1601     s->last_pict_type                 = s->pict_type;
1602     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1603     if (s->pict_type!= AV_PICTURE_TYPE_B)
1604         s->last_non_b_pict_type = s->pict_type;
1605
1606     s->avctx->coded_frame = s->current_picture_ptr->f;
1607
1608 }
1609
1610 static void update_noise_reduction(MpegEncContext *s)
1611 {
1612     int intra, i;
1613
1614     for (intra = 0; intra < 2; intra++) {
1615         if (s->dct_count[intra] > (1 << 16)) {
1616             for (i = 0; i < 64; i++) {
1617                 s->dct_error_sum[intra][i] >>= 1;
1618             }
1619             s->dct_count[intra] >>= 1;
1620         }
1621
1622         for (i = 0; i < 64; i++) {
1623             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1624                                        s->dct_count[intra] +
1625                                        s->dct_error_sum[intra][i] / 2) /
1626                                       (s->dct_error_sum[intra][i] + 1);
1627         }
1628     }
1629 }
1630
1631 static int frame_start(MpegEncContext *s)
1632 {
1633     int ret;
1634
1635     /* mark & release old frames */
1636     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1637         s->last_picture_ptr != s->next_picture_ptr &&
1638         s->last_picture_ptr->f->buf[0]) {
1639         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1640     }
1641
1642     s->current_picture_ptr->f->pict_type = s->pict_type;
1643     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1644
1645     ff_mpeg_unref_picture(s, &s->current_picture);
1646     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1647                                    s->current_picture_ptr)) < 0)
1648         return ret;
1649
1650     if (s->pict_type != AV_PICTURE_TYPE_B) {
1651         s->last_picture_ptr = s->next_picture_ptr;
1652         if (!s->droppable)
1653             s->next_picture_ptr = s->current_picture_ptr;
1654     }
1655
1656     if (s->last_picture_ptr) {
1657         ff_mpeg_unref_picture(s, &s->last_picture);
1658         if (s->last_picture_ptr->f->buf[0] &&
1659             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1660                                        s->last_picture_ptr)) < 0)
1661             return ret;
1662     }
1663     if (s->next_picture_ptr) {
1664         ff_mpeg_unref_picture(s, &s->next_picture);
1665         if (s->next_picture_ptr->f->buf[0] &&
1666             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1667                                        s->next_picture_ptr)) < 0)
1668             return ret;
1669     }
1670
1671     if (s->picture_structure!= PICT_FRAME) {
1672         int i;
1673         for (i = 0; i < 4; i++) {
1674             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1675                 s->current_picture.f->data[i] +=
1676                     s->current_picture.f->linesize[i];
1677             }
1678             s->current_picture.f->linesize[i] *= 2;
1679             s->last_picture.f->linesize[i]    *= 2;
1680             s->next_picture.f->linesize[i]    *= 2;
1681         }
1682     }
1683
1684     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1685         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1686         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1687     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1688         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1689         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1690     } else {
1691         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1692         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1693     }
1694
1695     if (s->dct_error_sum) {
1696         av_assert2(s->avctx->noise_reduction && s->encoding);
1697         update_noise_reduction(s);
1698     }
1699
1700     return 0;
1701 }
1702
1703 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1704                           const AVFrame *pic_arg, int *got_packet)
1705 {
1706     MpegEncContext *s = avctx->priv_data;
1707     int i, stuffing_count, ret;
1708     int context_count = s->slice_context_count;
1709
1710     s->picture_in_gop_number++;
1711
1712     if (load_input_picture(s, pic_arg) < 0)
1713         return -1;
1714
1715     if (select_input_picture(s) < 0) {
1716         return -1;
1717     }
1718
1719     /* output? */
1720     if (s->new_picture.f->data[0]) {
1721         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1722         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1723                                               :
1724                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1725         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1726             return ret;
1727         if (s->mb_info) {
1728             s->mb_info_ptr = av_packet_new_side_data(pkt,
1729                                  AV_PKT_DATA_H263_MB_INFO,
1730                                  s->mb_width*s->mb_height*12);
1731             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1732         }
1733
1734         for (i = 0; i < context_count; i++) {
1735             int start_y = s->thread_context[i]->start_mb_y;
1736             int   end_y = s->thread_context[i]->  end_mb_y;
1737             int h       = s->mb_height;
1738             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1739             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1740
1741             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1742         }
1743
1744         s->pict_type = s->new_picture.f->pict_type;
1745         //emms_c();
1746         ret = frame_start(s);
1747         if (ret < 0)
1748             return ret;
1749 vbv_retry:
1750         ret = encode_picture(s, s->picture_number);
1751         if (growing_buffer) {
1752             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1753             pkt->data = s->pb.buf;
1754             pkt->size = avctx->internal->byte_buffer_size;
1755         }
1756         if (ret < 0)
1757             return -1;
1758
1759         avctx->header_bits = s->header_bits;
1760         avctx->mv_bits     = s->mv_bits;
1761         avctx->misc_bits   = s->misc_bits;
1762         avctx->i_tex_bits  = s->i_tex_bits;
1763         avctx->p_tex_bits  = s->p_tex_bits;
1764         avctx->i_count     = s->i_count;
1765         // FIXME f/b_count in avctx
1766         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1767         avctx->skip_count  = s->skip_count;
1768
1769         frame_end(s);
1770
1771         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1772             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1773
1774         if (avctx->rc_buffer_size) {
1775             RateControlContext *rcc = &s->rc_context;
1776             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1777
1778             if (put_bits_count(&s->pb) > max_size &&
1779                 s->lambda < s->lmax) {
1780                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1781                                        (s->qscale + 1) / s->qscale);
1782                 if (s->adaptive_quant) {
1783                     int i;
1784                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1785                         s->lambda_table[i] =
1786                             FFMAX(s->lambda_table[i] + 1,
1787                                   s->lambda_table[i] * (s->qscale + 1) /
1788                                   s->qscale);
1789                 }
1790                 s->mb_skipped = 0;        // done in frame_start()
1791                 // done in encode_picture() so we must undo it
1792                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1793                     if (s->flipflop_rounding          ||
1794                         s->codec_id == AV_CODEC_ID_H263P ||
1795                         s->codec_id == AV_CODEC_ID_MPEG4)
1796                         s->no_rounding ^= 1;
1797                 }
1798                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1799                     s->time_base       = s->last_time_base;
1800                     s->last_non_b_time = s->time - s->pp_time;
1801                 }
1802                 for (i = 0; i < context_count; i++) {
1803                     PutBitContext *pb = &s->thread_context[i]->pb;
1804                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1805                 }
1806                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1807                 goto vbv_retry;
1808             }
1809
1810             av_assert0(s->avctx->rc_max_rate);
1811         }
1812
1813         if (s->flags & CODEC_FLAG_PASS1)
1814             ff_write_pass1_stats(s);
1815
1816         for (i = 0; i < 4; i++) {
1817             s->current_picture_ptr->f->error[i] =
1818             s->current_picture.f->error[i] =
1819                 s->current_picture.error[i];
1820             avctx->error[i] += s->current_picture_ptr->f->error[i];
1821         }
1822
1823         if (s->flags & CODEC_FLAG_PASS1)
1824             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1825                    avctx->i_tex_bits + avctx->p_tex_bits ==
1826                        put_bits_count(&s->pb));
1827         flush_put_bits(&s->pb);
1828         s->frame_bits  = put_bits_count(&s->pb);
1829
1830         stuffing_count = ff_vbv_update(s, s->frame_bits);
1831         s->stuffing_bits = 8*stuffing_count;
1832         if (stuffing_count) {
1833             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1834                     stuffing_count + 50) {
1835                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1836                 return -1;
1837             }
1838
1839             switch (s->codec_id) {
1840             case AV_CODEC_ID_MPEG1VIDEO:
1841             case AV_CODEC_ID_MPEG2VIDEO:
1842                 while (stuffing_count--) {
1843                     put_bits(&s->pb, 8, 0);
1844                 }
1845             break;
1846             case AV_CODEC_ID_MPEG4:
1847                 put_bits(&s->pb, 16, 0);
1848                 put_bits(&s->pb, 16, 0x1C3);
1849                 stuffing_count -= 4;
1850                 while (stuffing_count--) {
1851                     put_bits(&s->pb, 8, 0xFF);
1852                 }
1853             break;
1854             default:
1855                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1856             }
1857             flush_put_bits(&s->pb);
1858             s->frame_bits  = put_bits_count(&s->pb);
1859         }
1860
1861         /* update mpeg1/2 vbv_delay for CBR */
1862         if (s->avctx->rc_max_rate                          &&
1863             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1864             s->out_format == FMT_MPEG1                     &&
1865             90000LL * (avctx->rc_buffer_size - 1) <=
1866                 s->avctx->rc_max_rate * 0xFFFFLL) {
1867             int vbv_delay, min_delay;
1868             double inbits  = s->avctx->rc_max_rate *
1869                              av_q2d(s->avctx->time_base);
1870             int    minbits = s->frame_bits - 8 *
1871                              (s->vbv_delay_ptr - s->pb.buf - 1);
1872             double bits    = s->rc_context.buffer_index + minbits - inbits;
1873
1874             if (bits < 0)
1875                 av_log(s->avctx, AV_LOG_ERROR,
1876                        "Internal error, negative bits\n");
1877
1878             assert(s->repeat_first_field == 0);
1879
1880             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1881             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1882                         s->avctx->rc_max_rate;
1883
1884             vbv_delay = FFMAX(vbv_delay, min_delay);
1885
1886             av_assert0(vbv_delay < 0xFFFF);
1887
1888             s->vbv_delay_ptr[0] &= 0xF8;
1889             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1890             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1891             s->vbv_delay_ptr[2] &= 0x07;
1892             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1893             avctx->vbv_delay     = vbv_delay * 300;
1894         }
1895         s->total_bits     += s->frame_bits;
1896         avctx->frame_bits  = s->frame_bits;
1897
1898         pkt->pts = s->current_picture.f->pts;
1899         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1900             if (!s->current_picture.f->coded_picture_number)
1901                 pkt->dts = pkt->pts - s->dts_delta;
1902             else
1903                 pkt->dts = s->reordered_pts;
1904             s->reordered_pts = pkt->pts;
1905         } else
1906             pkt->dts = pkt->pts;
1907         if (s->current_picture.f->key_frame)
1908             pkt->flags |= AV_PKT_FLAG_KEY;
1909         if (s->mb_info)
1910             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1911     } else {
1912         s->frame_bits = 0;
1913     }
1914
1915     /* release non-reference frames */
1916     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1917         if (!s->picture[i].reference)
1918             ff_mpeg_unref_picture(s, &s->picture[i]);
1919     }
1920
1921     av_assert1((s->frame_bits & 7) == 0);
1922
1923     pkt->size = s->frame_bits / 8;
1924     *got_packet = !!pkt->size;
1925     return 0;
1926 }
1927
1928 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1929                                                 int n, int threshold)
1930 {
1931     static const char tab[64] = {
1932         3, 2, 2, 1, 1, 1, 1, 1,
1933         1, 1, 1, 1, 1, 1, 1, 1,
1934         1, 1, 1, 1, 1, 1, 1, 1,
1935         0, 0, 0, 0, 0, 0, 0, 0,
1936         0, 0, 0, 0, 0, 0, 0, 0,
1937         0, 0, 0, 0, 0, 0, 0, 0,
1938         0, 0, 0, 0, 0, 0, 0, 0,
1939         0, 0, 0, 0, 0, 0, 0, 0
1940     };
1941     int score = 0;
1942     int run = 0;
1943     int i;
1944     int16_t *block = s->block[n];
1945     const int last_index = s->block_last_index[n];
1946     int skip_dc;
1947
1948     if (threshold < 0) {
1949         skip_dc = 0;
1950         threshold = -threshold;
1951     } else
1952         skip_dc = 1;
1953
1954     /* Are all we could set to zero already zero? */
1955     if (last_index <= skip_dc - 1)
1956         return;
1957
1958     for (i = 0; i <= last_index; i++) {
1959         const int j = s->intra_scantable.permutated[i];
1960         const int level = FFABS(block[j]);
1961         if (level == 1) {
1962             if (skip_dc && i == 0)
1963                 continue;
1964             score += tab[run];
1965             run = 0;
1966         } else if (level > 1) {
1967             return;
1968         } else {
1969             run++;
1970         }
1971     }
1972     if (score >= threshold)
1973         return;
1974     for (i = skip_dc; i <= last_index; i++) {
1975         const int j = s->intra_scantable.permutated[i];
1976         block[j] = 0;
1977     }
1978     if (block[0])
1979         s->block_last_index[n] = 0;
1980     else
1981         s->block_last_index[n] = -1;
1982 }
1983
1984 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1985                                int last_index)
1986 {
1987     int i;
1988     const int maxlevel = s->max_qcoeff;
1989     const int minlevel = s->min_qcoeff;
1990     int overflow = 0;
1991
1992     if (s->mb_intra) {
1993         i = 1; // skip clipping of intra dc
1994     } else
1995         i = 0;
1996
1997     for (; i <= last_index; i++) {
1998         const int j = s->intra_scantable.permutated[i];
1999         int level = block[j];
2000
2001         if (level > maxlevel) {
2002             level = maxlevel;
2003             overflow++;
2004         } else if (level < minlevel) {
2005             level = minlevel;
2006             overflow++;
2007         }
2008
2009         block[j] = level;
2010     }
2011
2012     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2013         av_log(s->avctx, AV_LOG_INFO,
2014                "warning, clipping %d dct coefficients to %d..%d\n",
2015                overflow, minlevel, maxlevel);
2016 }
2017
2018 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2019 {
2020     int x, y;
2021     // FIXME optimize
2022     for (y = 0; y < 8; y++) {
2023         for (x = 0; x < 8; x++) {
2024             int x2, y2;
2025             int sum = 0;
2026             int sqr = 0;
2027             int count = 0;
2028
2029             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2030                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2031                     int v = ptr[x2 + y2 * stride];
2032                     sum += v;
2033                     sqr += v * v;
2034                     count++;
2035                 }
2036             }
2037             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2038         }
2039     }
2040 }
2041
2042 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2043                                                 int motion_x, int motion_y,
2044                                                 int mb_block_height,
2045                                                 int mb_block_width,
2046                                                 int mb_block_count)
2047 {
2048     int16_t weight[12][64];
2049     int16_t orig[12][64];
2050     const int mb_x = s->mb_x;
2051     const int mb_y = s->mb_y;
2052     int i;
2053     int skip_dct[12];
2054     int dct_offset = s->linesize * 8; // default for progressive frames
2055     int uv_dct_offset = s->uvlinesize * 8;
2056     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2057     ptrdiff_t wrap_y, wrap_c;
2058
2059     for (i = 0; i < mb_block_count; i++)
2060         skip_dct[i] = s->skipdct;
2061
2062     if (s->adaptive_quant) {
2063         const int last_qp = s->qscale;
2064         const int mb_xy = mb_x + mb_y * s->mb_stride;
2065
2066         s->lambda = s->lambda_table[mb_xy];
2067         update_qscale(s);
2068
2069         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2070             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2071             s->dquant = s->qscale - last_qp;
2072
2073             if (s->out_format == FMT_H263) {
2074                 s->dquant = av_clip(s->dquant, -2, 2);
2075
2076                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2077                     if (!s->mb_intra) {
2078                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2079                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2080                                 s->dquant = 0;
2081                         }
2082                         if (s->mv_type == MV_TYPE_8X8)
2083                             s->dquant = 0;
2084                     }
2085                 }
2086             }
2087         }
2088         ff_set_qscale(s, last_qp + s->dquant);
2089     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2090         ff_set_qscale(s, s->qscale + s->dquant);
2091
2092     wrap_y = s->linesize;
2093     wrap_c = s->uvlinesize;
2094     ptr_y  = s->new_picture.f->data[0] +
2095              (mb_y * 16 * wrap_y)              + mb_x * 16;
2096     ptr_cb = s->new_picture.f->data[1] +
2097              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2098     ptr_cr = s->new_picture.f->data[2] +
2099              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2100
2101     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2102         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2103         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2104         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2105         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2106                                  wrap_y, wrap_y,
2107                                  16, 16, mb_x * 16, mb_y * 16,
2108                                  s->width, s->height);
2109         ptr_y = ebuf;
2110         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2111                                  wrap_c, wrap_c,
2112                                  mb_block_width, mb_block_height,
2113                                  mb_x * mb_block_width, mb_y * mb_block_height,
2114                                  cw, ch);
2115         ptr_cb = ebuf + 16 * wrap_y;
2116         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2117                                  wrap_c, wrap_c,
2118                                  mb_block_width, mb_block_height,
2119                                  mb_x * mb_block_width, mb_y * mb_block_height,
2120                                  cw, ch);
2121         ptr_cr = ebuf + 16 * wrap_y + 16;
2122     }
2123
2124     if (s->mb_intra) {
2125         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2126             int progressive_score, interlaced_score;
2127
2128             s->interlaced_dct = 0;
2129             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2130                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2131                                                      NULL, wrap_y, 8) - 400;
2132
2133             if (progressive_score > 0) {
2134                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2135                                                         NULL, wrap_y * 2, 8) +
2136                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2137                                                         NULL, wrap_y * 2, 8);
2138                 if (progressive_score > interlaced_score) {
2139                     s->interlaced_dct = 1;
2140
2141                     dct_offset = wrap_y;
2142                     uv_dct_offset = wrap_c;
2143                     wrap_y <<= 1;
2144                     if (s->chroma_format == CHROMA_422 ||
2145                         s->chroma_format == CHROMA_444)
2146                         wrap_c <<= 1;
2147                 }
2148             }
2149         }
2150
2151         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2152         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2153         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2154         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2155
2156         if (s->flags & CODEC_FLAG_GRAY) {
2157             skip_dct[4] = 1;
2158             skip_dct[5] = 1;
2159         } else {
2160             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2161             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2162             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2163                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2164                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2165             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2166                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2167                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2168                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2169                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2170                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2171                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2172             }
2173         }
2174     } else {
2175         op_pixels_func (*op_pix)[4];
2176         qpel_mc_func (*op_qpix)[16];
2177         uint8_t *dest_y, *dest_cb, *dest_cr;
2178
2179         dest_y  = s->dest[0];
2180         dest_cb = s->dest[1];
2181         dest_cr = s->dest[2];
2182
2183         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2184             op_pix  = s->hdsp.put_pixels_tab;
2185             op_qpix = s->qdsp.put_qpel_pixels_tab;
2186         } else {
2187             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2188             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2189         }
2190
2191         if (s->mv_dir & MV_DIR_FORWARD) {
2192             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2193                           s->last_picture.f->data,
2194                           op_pix, op_qpix);
2195             op_pix  = s->hdsp.avg_pixels_tab;
2196             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2197         }
2198         if (s->mv_dir & MV_DIR_BACKWARD) {
2199             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2200                           s->next_picture.f->data,
2201                           op_pix, op_qpix);
2202         }
2203
2204         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2205             int progressive_score, interlaced_score;
2206
2207             s->interlaced_dct = 0;
2208             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2209                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2210                                                      ptr_y + wrap_y * 8,
2211                                                      wrap_y, 8) - 400;
2212
2213             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2214                 progressive_score -= 400;
2215
2216             if (progressive_score > 0) {
2217                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2218                                                         wrap_y * 2, 8) +
2219                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2220                                                         ptr_y + wrap_y,
2221                                                         wrap_y * 2, 8);
2222
2223                 if (progressive_score > interlaced_score) {
2224                     s->interlaced_dct = 1;
2225
2226                     dct_offset = wrap_y;
2227                     uv_dct_offset = wrap_c;
2228                     wrap_y <<= 1;
2229                     if (s->chroma_format == CHROMA_422)
2230                         wrap_c <<= 1;
2231                 }
2232             }
2233         }
2234
2235         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2236         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2237         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2238                             dest_y + dct_offset, wrap_y);
2239         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2240                             dest_y + dct_offset + 8, wrap_y);
2241
2242         if (s->flags & CODEC_FLAG_GRAY) {
2243             skip_dct[4] = 1;
2244             skip_dct[5] = 1;
2245         } else {
2246             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2247             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2248             if (!s->chroma_y_shift) { /* 422 */
2249                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2250                                     dest_cb + uv_dct_offset, wrap_c);
2251                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2252                                     dest_cr + uv_dct_offset, wrap_c);
2253             }
2254         }
2255         /* pre quantization */
2256         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2257                 2 * s->qscale * s->qscale) {
2258             // FIXME optimize
2259             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2260                 skip_dct[0] = 1;
2261             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2262                 skip_dct[1] = 1;
2263             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2264                                wrap_y, 8) < 20 * s->qscale)
2265                 skip_dct[2] = 1;
2266             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2267                                wrap_y, 8) < 20 * s->qscale)
2268                 skip_dct[3] = 1;
2269             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2270                 skip_dct[4] = 1;
2271             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2272                 skip_dct[5] = 1;
2273             if (!s->chroma_y_shift) { /* 422 */
2274                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2275                                    dest_cb + uv_dct_offset,
2276                                    wrap_c, 8) < 20 * s->qscale)
2277                     skip_dct[6] = 1;
2278                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2279                                    dest_cr + uv_dct_offset,
2280                                    wrap_c, 8) < 20 * s->qscale)
2281                     skip_dct[7] = 1;
2282             }
2283         }
2284     }
2285
2286     if (s->quantizer_noise_shaping) {
2287         if (!skip_dct[0])
2288             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2289         if (!skip_dct[1])
2290             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2291         if (!skip_dct[2])
2292             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2293         if (!skip_dct[3])
2294             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2295         if (!skip_dct[4])
2296             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2297         if (!skip_dct[5])
2298             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2299         if (!s->chroma_y_shift) { /* 422 */
2300             if (!skip_dct[6])
2301                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2302                                   wrap_c);
2303             if (!skip_dct[7])
2304                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2305                                   wrap_c);
2306         }
2307         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2308     }
2309
2310     /* DCT & quantize */
2311     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2312     {
2313         for (i = 0; i < mb_block_count; i++) {
2314             if (!skip_dct[i]) {
2315                 int overflow;
2316                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2317                 // FIXME we could decide to change to quantizer instead of
2318                 // clipping
2319                 // JS: I don't think that would be a good idea it could lower
2320                 //     quality instead of improve it. Just INTRADC clipping
2321                 //     deserves changes in quantizer
2322                 if (overflow)
2323                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2324             } else
2325                 s->block_last_index[i] = -1;
2326         }
2327         if (s->quantizer_noise_shaping) {
2328             for (i = 0; i < mb_block_count; i++) {
2329                 if (!skip_dct[i]) {
2330                     s->block_last_index[i] =
2331                         dct_quantize_refine(s, s->block[i], weight[i],
2332                                             orig[i], i, s->qscale);
2333                 }
2334             }
2335         }
2336
2337         if (s->luma_elim_threshold && !s->mb_intra)
2338             for (i = 0; i < 4; i++)
2339                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2340         if (s->chroma_elim_threshold && !s->mb_intra)
2341             for (i = 4; i < mb_block_count; i++)
2342                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2343
2344         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2345             for (i = 0; i < mb_block_count; i++) {
2346                 if (s->block_last_index[i] == -1)
2347                     s->coded_score[i] = INT_MAX / 256;
2348             }
2349         }
2350     }
2351
2352     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2353         s->block_last_index[4] =
2354         s->block_last_index[5] = 0;
2355         s->block[4][0] =
2356         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2357         if (!s->chroma_y_shift) { /* 422 / 444 */
2358             for (i=6; i<12; i++) {
2359                 s->block_last_index[i] = 0;
2360                 s->block[i][0] = s->block[4][0];
2361             }
2362         }
2363     }
2364
2365     // non c quantize code returns incorrect block_last_index FIXME
2366     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2367         for (i = 0; i < mb_block_count; i++) {
2368             int j;
2369             if (s->block_last_index[i] > 0) {
2370                 for (j = 63; j > 0; j--) {
2371                     if (s->block[i][s->intra_scantable.permutated[j]])
2372                         break;
2373                 }
2374                 s->block_last_index[i] = j;
2375             }
2376         }
2377     }
2378
2379     /* huffman encode */
2380     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2381     case AV_CODEC_ID_MPEG1VIDEO:
2382     case AV_CODEC_ID_MPEG2VIDEO:
2383         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2384             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2385         break;
2386     case AV_CODEC_ID_MPEG4:
2387         if (CONFIG_MPEG4_ENCODER)
2388             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2389         break;
2390     case AV_CODEC_ID_MSMPEG4V2:
2391     case AV_CODEC_ID_MSMPEG4V3:
2392     case AV_CODEC_ID_WMV1:
2393         if (CONFIG_MSMPEG4_ENCODER)
2394             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2395         break;
2396     case AV_CODEC_ID_WMV2:
2397         if (CONFIG_WMV2_ENCODER)
2398             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2399         break;
2400     case AV_CODEC_ID_H261:
2401         if (CONFIG_H261_ENCODER)
2402             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2403         break;
2404     case AV_CODEC_ID_H263:
2405     case AV_CODEC_ID_H263P:
2406     case AV_CODEC_ID_FLV1:
2407     case AV_CODEC_ID_RV10:
2408     case AV_CODEC_ID_RV20:
2409         if (CONFIG_H263_ENCODER)
2410             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2411         break;
2412     case AV_CODEC_ID_MJPEG:
2413     case AV_CODEC_ID_AMV:
2414         if (CONFIG_MJPEG_ENCODER)
2415             ff_mjpeg_encode_mb(s, s->block);
2416         break;
2417     default:
2418         av_assert1(0);
2419     }
2420 }
2421
2422 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2423 {
2424     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2425     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2426     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2427 }
2428
2429 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2430     int i;
2431
2432     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2433
2434     /* mpeg1 */
2435     d->mb_skip_run= s->mb_skip_run;
2436     for(i=0; i<3; i++)
2437         d->last_dc[i] = s->last_dc[i];
2438
2439     /* statistics */
2440     d->mv_bits= s->mv_bits;
2441     d->i_tex_bits= s->i_tex_bits;
2442     d->p_tex_bits= s->p_tex_bits;
2443     d->i_count= s->i_count;
2444     d->f_count= s->f_count;
2445     d->b_count= s->b_count;
2446     d->skip_count= s->skip_count;
2447     d->misc_bits= s->misc_bits;
2448     d->last_bits= 0;
2449
2450     d->mb_skipped= 0;
2451     d->qscale= s->qscale;
2452     d->dquant= s->dquant;
2453
2454     d->esc3_level_length= s->esc3_level_length;
2455 }
2456
2457 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2458     int i;
2459
2460     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2461     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2462
2463     /* mpeg1 */
2464     d->mb_skip_run= s->mb_skip_run;
2465     for(i=0; i<3; i++)
2466         d->last_dc[i] = s->last_dc[i];
2467
2468     /* statistics */
2469     d->mv_bits= s->mv_bits;
2470     d->i_tex_bits= s->i_tex_bits;
2471     d->p_tex_bits= s->p_tex_bits;
2472     d->i_count= s->i_count;
2473     d->f_count= s->f_count;
2474     d->b_count= s->b_count;
2475     d->skip_count= s->skip_count;
2476     d->misc_bits= s->misc_bits;
2477
2478     d->mb_intra= s->mb_intra;
2479     d->mb_skipped= s->mb_skipped;
2480     d->mv_type= s->mv_type;
2481     d->mv_dir= s->mv_dir;
2482     d->pb= s->pb;
2483     if(s->data_partitioning){
2484         d->pb2= s->pb2;
2485         d->tex_pb= s->tex_pb;
2486     }
2487     d->block= s->block;
2488     for(i=0; i<8; i++)
2489         d->block_last_index[i]= s->block_last_index[i];
2490     d->interlaced_dct= s->interlaced_dct;
2491     d->qscale= s->qscale;
2492
2493     d->esc3_level_length= s->esc3_level_length;
2494 }
2495
2496 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2497                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2498                            int *dmin, int *next_block, int motion_x, int motion_y)
2499 {
2500     int score;
2501     uint8_t *dest_backup[3];
2502
2503     copy_context_before_encode(s, backup, type);
2504
2505     s->block= s->blocks[*next_block];
2506     s->pb= pb[*next_block];
2507     if(s->data_partitioning){
2508         s->pb2   = pb2   [*next_block];
2509         s->tex_pb= tex_pb[*next_block];
2510     }
2511
2512     if(*next_block){
2513         memcpy(dest_backup, s->dest, sizeof(s->dest));
2514         s->dest[0] = s->rd_scratchpad;
2515         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2516         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2517         av_assert0(s->linesize >= 32); //FIXME
2518     }
2519
2520     encode_mb(s, motion_x, motion_y);
2521
2522     score= put_bits_count(&s->pb);
2523     if(s->data_partitioning){
2524         score+= put_bits_count(&s->pb2);
2525         score+= put_bits_count(&s->tex_pb);
2526     }
2527
2528     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2529         ff_mpv_decode_mb(s, s->block);
2530
2531         score *= s->lambda2;
2532         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2533     }
2534
2535     if(*next_block){
2536         memcpy(s->dest, dest_backup, sizeof(s->dest));
2537     }
2538
2539     if(score<*dmin){
2540         *dmin= score;
2541         *next_block^=1;
2542
2543         copy_context_after_encode(best, s, type);
2544     }
2545 }
2546
2547 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2548     uint32_t *sq = ff_square_tab + 256;
2549     int acc=0;
2550     int x,y;
2551
2552     if(w==16 && h==16)
2553         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2554     else if(w==8 && h==8)
2555         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2556
2557     for(y=0; y<h; y++){
2558         for(x=0; x<w; x++){
2559             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2560         }
2561     }
2562
2563     av_assert2(acc>=0);
2564
2565     return acc;
2566 }
2567
2568 static int sse_mb(MpegEncContext *s){
2569     int w= 16;
2570     int h= 16;
2571
2572     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2573     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2574
2575     if(w==16 && h==16)
2576       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2577         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2578                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2579                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2580       }else{
2581         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2582                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2583                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2584       }
2585     else
2586         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2587                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2588                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2589 }
2590
2591 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2592     MpegEncContext *s= *(void**)arg;
2593
2594
2595     s->me.pre_pass=1;
2596     s->me.dia_size= s->avctx->pre_dia_size;
2597     s->first_slice_line=1;
2598     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2599         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2600             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2601         }
2602         s->first_slice_line=0;
2603     }
2604
2605     s->me.pre_pass=0;
2606
2607     return 0;
2608 }
2609
2610 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2611     MpegEncContext *s= *(void**)arg;
2612
2613     ff_check_alignment();
2614
2615     s->me.dia_size= s->avctx->dia_size;
2616     s->first_slice_line=1;
2617     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2618         s->mb_x=0; //for block init below
2619         ff_init_block_index(s);
2620         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2621             s->block_index[0]+=2;
2622             s->block_index[1]+=2;
2623             s->block_index[2]+=2;
2624             s->block_index[3]+=2;
2625
2626             /* compute motion vector & mb_type and store in context */
2627             if(s->pict_type==AV_PICTURE_TYPE_B)
2628                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2629             else
2630                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2631         }
2632         s->first_slice_line=0;
2633     }
2634     return 0;
2635 }
2636
2637 static int mb_var_thread(AVCodecContext *c, void *arg){
2638     MpegEncContext *s= *(void**)arg;
2639     int mb_x, mb_y;
2640
2641     ff_check_alignment();
2642
2643     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2644         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2645             int xx = mb_x * 16;
2646             int yy = mb_y * 16;
2647             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2648             int varc;
2649             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2650
2651             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2652                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2653
2654             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2655             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2656             s->me.mb_var_sum_temp    += varc;
2657         }
2658     }
2659     return 0;
2660 }
2661
2662 static void write_slice_end(MpegEncContext *s){
2663     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2664         if(s->partitioned_frame){
2665             ff_mpeg4_merge_partitions(s);
2666         }
2667
2668         ff_mpeg4_stuffing(&s->pb);
2669     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2670         ff_mjpeg_encode_stuffing(s);
2671     }
2672
2673     avpriv_align_put_bits(&s->pb);
2674     flush_put_bits(&s->pb);
2675
2676     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2677         s->misc_bits+= get_bits_diff(s);
2678 }
2679
2680 static void write_mb_info(MpegEncContext *s)
2681 {
2682     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2683     int offset = put_bits_count(&s->pb);
2684     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2685     int gobn = s->mb_y / s->gob_index;
2686     int pred_x, pred_y;
2687     if (CONFIG_H263_ENCODER)
2688         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2689     bytestream_put_le32(&ptr, offset);
2690     bytestream_put_byte(&ptr, s->qscale);
2691     bytestream_put_byte(&ptr, gobn);
2692     bytestream_put_le16(&ptr, mba);
2693     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2694     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2695     /* 4MV not implemented */
2696     bytestream_put_byte(&ptr, 0); /* hmv2 */
2697     bytestream_put_byte(&ptr, 0); /* vmv2 */
2698 }
2699
2700 static void update_mb_info(MpegEncContext *s, int startcode)
2701 {
2702     if (!s->mb_info)
2703         return;
2704     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2705         s->mb_info_size += 12;
2706         s->prev_mb_info = s->last_mb_info;
2707     }
2708     if (startcode) {
2709         s->prev_mb_info = put_bits_count(&s->pb)/8;
2710         /* This might have incremented mb_info_size above, and we return without
2711          * actually writing any info into that slot yet. But in that case,
2712          * this will be called again at the start of the after writing the
2713          * start code, actually writing the mb info. */
2714         return;
2715     }
2716
2717     s->last_mb_info = put_bits_count(&s->pb)/8;
2718     if (!s->mb_info_size)
2719         s->mb_info_size += 12;
2720     write_mb_info(s);
2721 }
2722
2723 static int encode_thread(AVCodecContext *c, void *arg){
2724     MpegEncContext *s= *(void**)arg;
2725     int mb_x, mb_y, pdif = 0;
2726     int chr_h= 16>>s->chroma_y_shift;
2727     int i, j;
2728     MpegEncContext best_s = { 0 }, backup_s;
2729     uint8_t bit_buf[2][MAX_MB_BYTES];
2730     uint8_t bit_buf2[2][MAX_MB_BYTES];
2731     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2732     PutBitContext pb[2], pb2[2], tex_pb[2];
2733
2734     ff_check_alignment();
2735
2736     for(i=0; i<2; i++){
2737         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2738         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2739         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2740     }
2741
2742     s->last_bits= put_bits_count(&s->pb);
2743     s->mv_bits=0;
2744     s->misc_bits=0;
2745     s->i_tex_bits=0;
2746     s->p_tex_bits=0;
2747     s->i_count=0;
2748     s->f_count=0;
2749     s->b_count=0;
2750     s->skip_count=0;
2751
2752     for(i=0; i<3; i++){
2753         /* init last dc values */
2754         /* note: quant matrix value (8) is implied here */
2755         s->last_dc[i] = 128 << s->intra_dc_precision;
2756
2757         s->current_picture.error[i] = 0;
2758     }
2759     if(s->codec_id==AV_CODEC_ID_AMV){
2760         s->last_dc[0] = 128*8/13;
2761         s->last_dc[1] = 128*8/14;
2762         s->last_dc[2] = 128*8/14;
2763     }
2764     s->mb_skip_run = 0;
2765     memset(s->last_mv, 0, sizeof(s->last_mv));
2766
2767     s->last_mv_dir = 0;
2768
2769     switch(s->codec_id){
2770     case AV_CODEC_ID_H263:
2771     case AV_CODEC_ID_H263P:
2772     case AV_CODEC_ID_FLV1:
2773         if (CONFIG_H263_ENCODER)
2774             s->gob_index = ff_h263_get_gob_height(s);
2775         break;
2776     case AV_CODEC_ID_MPEG4:
2777         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2778             ff_mpeg4_init_partitions(s);
2779         break;
2780     }
2781
2782     s->resync_mb_x=0;
2783     s->resync_mb_y=0;
2784     s->first_slice_line = 1;
2785     s->ptr_lastgob = s->pb.buf;
2786     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2787         s->mb_x=0;
2788         s->mb_y= mb_y;
2789
2790         ff_set_qscale(s, s->qscale);
2791         ff_init_block_index(s);
2792
2793         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2794             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2795             int mb_type= s->mb_type[xy];
2796 //            int d;
2797             int dmin= INT_MAX;
2798             int dir;
2799
2800             if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES
2801                 && s->slice_context_count == 1
2802                 && s->pb.buf == s->avctx->internal->byte_buffer) {
2803                 int new_size =  s->avctx->internal->byte_buffer_size
2804                               + s->avctx->internal->byte_buffer_size/4
2805                               + s->mb_width*MAX_MB_BYTES;
2806                 int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2807                 int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2808
2809                 uint8_t *new_buffer = NULL;
2810                 int new_buffer_size = 0;
2811
2812                 av_fast_padded_malloc(&new_buffer, &new_buffer_size, new_size);
2813                 if (new_buffer) {
2814                     memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2815                     av_free(s->avctx->internal->byte_buffer);
2816                     s->avctx->internal->byte_buffer      = new_buffer;
2817                     s->avctx->internal->byte_buffer_size = new_buffer_size;
2818                     rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2819                     s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2820                     s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2821                 }
2822             }
2823             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2824                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2825                 return -1;
2826             }
2827             if(s->data_partitioning){
2828                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2829                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2830                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2831                     return -1;
2832                 }
2833             }
2834
2835             s->mb_x = mb_x;
2836             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2837             ff_update_block_index(s);
2838
2839             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2840                 ff_h261_reorder_mb_index(s);
2841                 xy= s->mb_y*s->mb_stride + s->mb_x;
2842                 mb_type= s->mb_type[xy];
2843             }
2844
2845             /* write gob / video packet header  */
2846             if(s->rtp_mode){
2847                 int current_packet_size, is_gob_start;
2848
2849                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2850
2851                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2852
2853                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2854
2855                 switch(s->codec_id){
2856                 case AV_CODEC_ID_H263:
2857                 case AV_CODEC_ID_H263P:
2858                     if(!s->h263_slice_structured)
2859                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2860                     break;
2861                 case AV_CODEC_ID_MPEG2VIDEO:
2862                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2863                 case AV_CODEC_ID_MPEG1VIDEO:
2864                     if(s->mb_skip_run) is_gob_start=0;
2865                     break;
2866                 case AV_CODEC_ID_MJPEG:
2867                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2868                     break;
2869                 }
2870
2871                 if(is_gob_start){
2872                     if(s->start_mb_y != mb_y || mb_x!=0){
2873                         write_slice_end(s);
2874
2875                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2876                             ff_mpeg4_init_partitions(s);
2877                         }
2878                     }
2879
2880                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2881                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2882
2883                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2884                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2885                         int d = 100 / s->error_rate;
2886                         if(r % d == 0){
2887                             current_packet_size=0;
2888                             s->pb.buf_ptr= s->ptr_lastgob;
2889                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2890                         }
2891                     }
2892
2893                     if (s->avctx->rtp_callback){
2894                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2895                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2896                     }
2897                     update_mb_info(s, 1);
2898
2899                     switch(s->codec_id){
2900                     case AV_CODEC_ID_MPEG4:
2901                         if (CONFIG_MPEG4_ENCODER) {
2902                             ff_mpeg4_encode_video_packet_header(s);
2903                             ff_mpeg4_clean_buffers(s);
2904                         }
2905                     break;
2906                     case AV_CODEC_ID_MPEG1VIDEO:
2907                     case AV_CODEC_ID_MPEG2VIDEO:
2908                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2909                             ff_mpeg1_encode_slice_header(s);
2910                             ff_mpeg1_clean_buffers(s);
2911                         }
2912                     break;
2913                     case AV_CODEC_ID_H263:
2914                     case AV_CODEC_ID_H263P:
2915                         if (CONFIG_H263_ENCODER)
2916                             ff_h263_encode_gob_header(s, mb_y);
2917                     break;
2918                     }
2919
2920                     if(s->flags&CODEC_FLAG_PASS1){
2921                         int bits= put_bits_count(&s->pb);
2922                         s->misc_bits+= bits - s->last_bits;
2923                         s->last_bits= bits;
2924                     }
2925
2926                     s->ptr_lastgob += current_packet_size;
2927                     s->first_slice_line=1;
2928                     s->resync_mb_x=mb_x;
2929                     s->resync_mb_y=mb_y;
2930                 }
2931             }
2932
2933             if(  (s->resync_mb_x   == s->mb_x)
2934                && s->resync_mb_y+1 == s->mb_y){
2935                 s->first_slice_line=0;
2936             }
2937
2938             s->mb_skipped=0;
2939             s->dquant=0; //only for QP_RD
2940
2941             update_mb_info(s, 0);
2942
2943             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2944                 int next_block=0;
2945                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2946
2947                 copy_context_before_encode(&backup_s, s, -1);
2948                 backup_s.pb= s->pb;
2949                 best_s.data_partitioning= s->data_partitioning;
2950                 best_s.partitioned_frame= s->partitioned_frame;
2951                 if(s->data_partitioning){
2952                     backup_s.pb2= s->pb2;
2953                     backup_s.tex_pb= s->tex_pb;
2954                 }
2955
2956                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2957                     s->mv_dir = MV_DIR_FORWARD;
2958                     s->mv_type = MV_TYPE_16X16;
2959                     s->mb_intra= 0;
2960                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2961                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2962                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2963                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2964                 }
2965                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2966                     s->mv_dir = MV_DIR_FORWARD;
2967                     s->mv_type = MV_TYPE_FIELD;
2968                     s->mb_intra= 0;
2969                     for(i=0; i<2; i++){
2970                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2971                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2972                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2973                     }
2974                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2975                                  &dmin, &next_block, 0, 0);
2976                 }
2977                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2978                     s->mv_dir = MV_DIR_FORWARD;
2979                     s->mv_type = MV_TYPE_16X16;
2980                     s->mb_intra= 0;
2981                     s->mv[0][0][0] = 0;
2982                     s->mv[0][0][1] = 0;
2983                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2984                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2985                 }
2986                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2987                     s->mv_dir = MV_DIR_FORWARD;
2988                     s->mv_type = MV_TYPE_8X8;
2989                     s->mb_intra= 0;
2990                     for(i=0; i<4; i++){
2991                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2992                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2993                     }
2994                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2995                                  &dmin, &next_block, 0, 0);
2996                 }
2997                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2998                     s->mv_dir = MV_DIR_FORWARD;
2999                     s->mv_type = MV_TYPE_16X16;
3000                     s->mb_intra= 0;
3001                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3002                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3003                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3004                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3005                 }
3006                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3007                     s->mv_dir = MV_DIR_BACKWARD;
3008                     s->mv_type = MV_TYPE_16X16;
3009                     s->mb_intra= 0;
3010                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3011                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3012                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3013                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3014                 }
3015                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3016                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3017                     s->mv_type = MV_TYPE_16X16;
3018                     s->mb_intra= 0;
3019                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3020                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3021                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3022                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3023                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3024                                  &dmin, &next_block, 0, 0);
3025                 }
3026                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3027                     s->mv_dir = MV_DIR_FORWARD;
3028                     s->mv_type = MV_TYPE_FIELD;
3029                     s->mb_intra= 0;
3030                     for(i=0; i<2; i++){
3031                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3032                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3033                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3034                     }
3035                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3036                                  &dmin, &next_block, 0, 0);
3037                 }
3038                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3039                     s->mv_dir = MV_DIR_BACKWARD;
3040                     s->mv_type = MV_TYPE_FIELD;
3041                     s->mb_intra= 0;
3042                     for(i=0; i<2; i++){
3043                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3044                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3045                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3046                     }
3047                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3048                                  &dmin, &next_block, 0, 0);
3049                 }
3050                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3051                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3052                     s->mv_type = MV_TYPE_FIELD;
3053                     s->mb_intra= 0;
3054                     for(dir=0; dir<2; dir++){
3055                         for(i=0; i<2; i++){
3056                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3057                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3058                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3059                         }
3060                     }
3061                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3062                                  &dmin, &next_block, 0, 0);
3063                 }
3064                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3065                     s->mv_dir = 0;
3066                     s->mv_type = MV_TYPE_16X16;
3067                     s->mb_intra= 1;
3068                     s->mv[0][0][0] = 0;
3069                     s->mv[0][0][1] = 0;
3070                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3071                                  &dmin, &next_block, 0, 0);
3072                     if(s->h263_pred || s->h263_aic){
3073                         if(best_s.mb_intra)
3074                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3075                         else
3076                             ff_clean_intra_table_entries(s); //old mode?
3077                     }
3078                 }
3079
3080                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3081                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3082                         const int last_qp= backup_s.qscale;
3083                         int qpi, qp, dc[6];
3084                         int16_t ac[6][16];
3085                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3086                         static const int dquant_tab[4]={-1,1,-2,2};
3087                         int storecoefs = s->mb_intra && s->dc_val[0];
3088
3089                         av_assert2(backup_s.dquant == 0);
3090
3091                         //FIXME intra
3092                         s->mv_dir= best_s.mv_dir;
3093                         s->mv_type = MV_TYPE_16X16;
3094                         s->mb_intra= best_s.mb_intra;
3095                         s->mv[0][0][0] = best_s.mv[0][0][0];
3096                         s->mv[0][0][1] = best_s.mv[0][0][1];
3097                         s->mv[1][0][0] = best_s.mv[1][0][0];
3098                         s->mv[1][0][1] = best_s.mv[1][0][1];
3099
3100                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3101                         for(; qpi<4; qpi++){
3102                             int dquant= dquant_tab[qpi];
3103                             qp= last_qp + dquant;
3104                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3105                                 continue;
3106                             backup_s.dquant= dquant;
3107                             if(storecoefs){
3108                                 for(i=0; i<6; i++){
3109                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3110                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3111                                 }
3112                             }
3113
3114                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3115                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3116                             if(best_s.qscale != qp){
3117                                 if(storecoefs){
3118                                     for(i=0; i<6; i++){
3119                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3120                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3121                                     }
3122                                 }
3123                             }
3124                         }
3125                     }
3126                 }
3127                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3128                     int mx= s->b_direct_mv_table[xy][0];
3129                     int my= s->b_direct_mv_table[xy][1];
3130
3131                     backup_s.dquant = 0;
3132                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3133                     s->mb_intra= 0;
3134                     ff_mpeg4_set_direct_mv(s, mx, my);
3135                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3136                                  &dmin, &next_block, mx, my);
3137                 }
3138                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3139                     backup_s.dquant = 0;
3140                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3141                     s->mb_intra= 0;
3142                     ff_mpeg4_set_direct_mv(s, 0, 0);
3143                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3144                                  &dmin, &next_block, 0, 0);
3145                 }
3146                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3147                     int coded=0;
3148                     for(i=0; i<6; i++)
3149                         coded |= s->block_last_index[i];
3150                     if(coded){
3151                         int mx,my;
3152                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3153                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3154                             mx=my=0; //FIXME find the one we actually used
3155                             ff_mpeg4_set_direct_mv(s, mx, my);
3156                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3157                             mx= s->mv[1][0][0];
3158                             my= s->mv[1][0][1];
3159                         }else{
3160                             mx= s->mv[0][0][0];
3161                             my= s->mv[0][0][1];
3162                         }
3163
3164                         s->mv_dir= best_s.mv_dir;
3165                         s->mv_type = best_s.mv_type;
3166                         s->mb_intra= 0;
3167 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3168                         s->mv[0][0][1] = best_s.mv[0][0][1];
3169                         s->mv[1][0][0] = best_s.mv[1][0][0];
3170                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3171                         backup_s.dquant= 0;
3172                         s->skipdct=1;
3173                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3174                                         &dmin, &next_block, mx, my);
3175                         s->skipdct=0;
3176                     }
3177                 }
3178
3179                 s->current_picture.qscale_table[xy] = best_s.qscale;
3180
3181                 copy_context_after_encode(s, &best_s, -1);
3182
3183                 pb_bits_count= put_bits_count(&s->pb);
3184                 flush_put_bits(&s->pb);
3185                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3186                 s->pb= backup_s.pb;
3187
3188                 if(s->data_partitioning){
3189                     pb2_bits_count= put_bits_count(&s->pb2);
3190                     flush_put_bits(&s->pb2);
3191                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3192                     s->pb2= backup_s.pb2;
3193
3194                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3195                     flush_put_bits(&s->tex_pb);
3196                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3197                     s->tex_pb= backup_s.tex_pb;
3198                 }
3199                 s->last_bits= put_bits_count(&s->pb);
3200
3201                 if (CONFIG_H263_ENCODER &&
3202                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3203                     ff_h263_update_motion_val(s);
3204
3205                 if(next_block==0){ //FIXME 16 vs linesize16
3206                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3207                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3208                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3209                 }
3210
3211                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3212                     ff_mpv_decode_mb(s, s->block);
3213             } else {
3214                 int motion_x = 0, motion_y = 0;
3215                 s->mv_type=MV_TYPE_16X16;
3216                 // only one MB-Type possible
3217
3218                 switch(mb_type){
3219                 case CANDIDATE_MB_TYPE_INTRA:
3220                     s->mv_dir = 0;
3221                     s->mb_intra= 1;
3222                     motion_x= s->mv[0][0][0] = 0;
3223                     motion_y= s->mv[0][0][1] = 0;
3224                     break;
3225                 case CANDIDATE_MB_TYPE_INTER:
3226                     s->mv_dir = MV_DIR_FORWARD;
3227                     s->mb_intra= 0;
3228                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3229                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3230                     break;
3231                 case CANDIDATE_MB_TYPE_INTER_I:
3232                     s->mv_dir = MV_DIR_FORWARD;
3233                     s->mv_type = MV_TYPE_FIELD;
3234                     s->mb_intra= 0;
3235                     for(i=0; i<2; i++){
3236                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3237                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3238                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3239                     }
3240                     break;
3241                 case CANDIDATE_MB_TYPE_INTER4V:
3242                     s->mv_dir = MV_DIR_FORWARD;
3243                     s->mv_type = MV_TYPE_8X8;
3244                     s->mb_intra= 0;
3245                     for(i=0; i<4; i++){
3246                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3247                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3248                     }
3249                     break;
3250                 case CANDIDATE_MB_TYPE_DIRECT:
3251                     if (CONFIG_MPEG4_ENCODER) {
3252                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3253                         s->mb_intra= 0;
3254                         motion_x=s->b_direct_mv_table[xy][0];
3255                         motion_y=s->b_direct_mv_table[xy][1];
3256                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3257                     }
3258                     break;
3259                 case CANDIDATE_MB_TYPE_DIRECT0:
3260                     if (CONFIG_MPEG4_ENCODER) {
3261                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3262                         s->mb_intra= 0;
3263                         ff_mpeg4_set_direct_mv(s, 0, 0);
3264                     }
3265                     break;
3266                 case CANDIDATE_MB_TYPE_BIDIR:
3267                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3268                     s->mb_intra= 0;
3269                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3270                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3271                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3272                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3273                     break;
3274                 case CANDIDATE_MB_TYPE_BACKWARD:
3275                     s->mv_dir = MV_DIR_BACKWARD;
3276                     s->mb_intra= 0;
3277                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3278                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3279                     break;
3280                 case CANDIDATE_MB_TYPE_FORWARD:
3281                     s->mv_dir = MV_DIR_FORWARD;
3282                     s->mb_intra= 0;
3283                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3284                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3285                     break;
3286                 case CANDIDATE_MB_TYPE_FORWARD_I:
3287                     s->mv_dir = MV_DIR_FORWARD;
3288                     s->mv_type = MV_TYPE_FIELD;
3289                     s->mb_intra= 0;
3290                     for(i=0; i<2; i++){
3291                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3292                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3293                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3294                     }
3295                     break;
3296                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3297                     s->mv_dir = MV_DIR_BACKWARD;
3298                     s->mv_type = MV_TYPE_FIELD;
3299                     s->mb_intra= 0;
3300                     for(i=0; i<2; i++){
3301                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3302                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3303                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3304                     }
3305                     break;
3306                 case CANDIDATE_MB_TYPE_BIDIR_I:
3307                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3308                     s->mv_type = MV_TYPE_FIELD;
3309                     s->mb_intra= 0;
3310                     for(dir=0; dir<2; dir++){
3311                         for(i=0; i<2; i++){
3312                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3313                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3314                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3315                         }
3316                     }
3317                     break;
3318                 default:
3319                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3320                 }
3321
3322                 encode_mb(s, motion_x, motion_y);
3323
3324                 // RAL: Update last macroblock type
3325                 s->last_mv_dir = s->mv_dir;
3326
3327                 if (CONFIG_H263_ENCODER &&
3328                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3329                     ff_h263_update_motion_val(s);
3330
3331                 ff_mpv_decode_mb(s, s->block);
3332             }
3333
3334             /* clean the MV table in IPS frames for direct mode in B frames */
3335             if(s->mb_intra /* && I,P,S_TYPE */){
3336                 s->p_mv_table[xy][0]=0;
3337                 s->p_mv_table[xy][1]=0;
3338             }
3339
3340             if(s->flags&CODEC_FLAG_PSNR){
3341                 int w= 16;
3342                 int h= 16;
3343
3344                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3345                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3346
3347                 s->current_picture.error[0] += sse(
3348                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3349                     s->dest[0], w, h, s->linesize);
3350                 s->current_picture.error[1] += sse(
3351                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3352                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3353                 s->current_picture.error[2] += sse(
3354                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3355                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3356             }
3357             if(s->loop_filter){
3358                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3359                     ff_h263_loop_filter(s);
3360             }
3361             av_dlog(s->avctx, "MB %d %d bits\n",
3362                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3363         }
3364     }
3365
3366     //not beautiful here but we must write it before flushing so it has to be here
3367     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3368         ff_msmpeg4_encode_ext_header(s);
3369
3370     write_slice_end(s);
3371
3372     /* Send the last GOB if RTP */
3373     if (s->avctx->rtp_callback) {
3374         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3375         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3376         /* Call the RTP callback to send the last GOB */
3377         emms_c();
3378         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3379     }
3380
3381     return 0;
3382 }
3383
3384 #define MERGE(field) dst->field += src->field; src->field=0
3385 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3386     MERGE(me.scene_change_score);
3387     MERGE(me.mc_mb_var_sum_temp);
3388     MERGE(me.mb_var_sum_temp);
3389 }
3390
3391 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3392     int i;
3393
3394     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3395     MERGE(dct_count[1]);
3396     MERGE(mv_bits);
3397     MERGE(i_tex_bits);
3398     MERGE(p_tex_bits);
3399     MERGE(i_count);
3400     MERGE(f_count);
3401     MERGE(b_count);
3402     MERGE(skip_count);
3403     MERGE(misc_bits);
3404     MERGE(er.error_count);
3405     MERGE(padding_bug_score);
3406     MERGE(current_picture.error[0]);
3407     MERGE(current_picture.error[1]);
3408     MERGE(current_picture.error[2]);
3409
3410     if(dst->avctx->noise_reduction){
3411         for(i=0; i<64; i++){
3412             MERGE(dct_error_sum[0][i]);
3413             MERGE(dct_error_sum[1][i]);
3414         }
3415     }
3416
3417     assert(put_bits_count(&src->pb) % 8 ==0);
3418     assert(put_bits_count(&dst->pb) % 8 ==0);
3419     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3420     flush_put_bits(&dst->pb);
3421 }
3422
3423 static int estimate_qp(MpegEncContext *s, int dry_run){
3424     if (s->next_lambda){
3425         s->current_picture_ptr->f->quality =
3426         s->current_picture.f->quality = s->next_lambda;
3427         if(!dry_run) s->next_lambda= 0;
3428     } else if (!s->fixed_qscale) {
3429         s->current_picture_ptr->f->quality =
3430         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3431         if (s->current_picture.f->quality < 0)
3432             return -1;
3433     }
3434
3435     if(s->adaptive_quant){
3436         switch(s->codec_id){
3437         case AV_CODEC_ID_MPEG4:
3438             if (CONFIG_MPEG4_ENCODER)
3439                 ff_clean_mpeg4_qscales(s);
3440             break;
3441         case AV_CODEC_ID_H263:
3442         case AV_CODEC_ID_H263P:
3443         case AV_CODEC_ID_FLV1:
3444             if (CONFIG_H263_ENCODER)
3445                 ff_clean_h263_qscales(s);
3446             break;
3447         default:
3448             ff_init_qscale_tab(s);
3449         }
3450
3451         s->lambda= s->lambda_table[0];
3452         //FIXME broken
3453     }else
3454         s->lambda = s->current_picture.f->quality;
3455     update_qscale(s);
3456     return 0;
3457 }
3458
3459 /* must be called before writing the header */
3460 static void set_frame_distances(MpegEncContext * s){
3461     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3462     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3463
3464     if(s->pict_type==AV_PICTURE_TYPE_B){
3465         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3466         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3467     }else{
3468         s->pp_time= s->time - s->last_non_b_time;
3469         s->last_non_b_time= s->time;
3470         assert(s->picture_number==0 || s->pp_time > 0);
3471     }
3472 }
3473
3474 static int encode_picture(MpegEncContext *s, int picture_number)
3475 {
3476     int i, ret;
3477     int bits;
3478     int context_count = s->slice_context_count;
3479
3480     s->picture_number = picture_number;
3481
3482     /* Reset the average MB variance */
3483     s->me.mb_var_sum_temp    =
3484     s->me.mc_mb_var_sum_temp = 0;
3485
3486     /* we need to initialize some time vars before we can encode b-frames */
3487     // RAL: Condition added for MPEG1VIDEO
3488     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3489         set_frame_distances(s);
3490     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3491         ff_set_mpeg4_time(s);
3492
3493     s->me.scene_change_score=0;
3494
3495 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3496
3497     if(s->pict_type==AV_PICTURE_TYPE_I){
3498         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3499         else                        s->no_rounding=0;
3500     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3501         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3502             s->no_rounding ^= 1;
3503     }
3504
3505     if(s->flags & CODEC_FLAG_PASS2){
3506         if (estimate_qp(s,1) < 0)
3507             return -1;
3508         ff_get_2pass_fcode(s);
3509     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3510         if(s->pict_type==AV_PICTURE_TYPE_B)
3511             s->lambda= s->last_lambda_for[s->pict_type];
3512         else
3513             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3514         update_qscale(s);
3515     }
3516
3517     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3518         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3519         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3520         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3521         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3522     }
3523
3524     s->mb_intra=0; //for the rate distortion & bit compare functions
3525     for(i=1; i<context_count; i++){
3526         ret = ff_update_duplicate_context(s->thread_context[i], s);
3527         if (ret < 0)
3528             return ret;
3529     }
3530
3531     if(ff_init_me(s)<0)
3532         return -1;
3533
3534     /* Estimate motion for every MB */
3535     if(s->pict_type != AV_PICTURE_TYPE_I){
3536         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3537         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3538         if (s->pict_type != AV_PICTURE_TYPE_B) {
3539             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3540                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3541             }
3542         }
3543
3544         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3545     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3546         /* I-Frame */
3547         for(i=0; i<s->mb_stride*s->mb_height; i++)
3548             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3549
3550         if(!s->fixed_qscale){
3551             /* finding spatial complexity for I-frame rate control */
3552             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3553         }
3554     }
3555     for(i=1; i<context_count; i++){
3556         merge_context_after_me(s, s->thread_context[i]);
3557     }
3558     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3559     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3560     emms_c();
3561
3562     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3563         s->pict_type= AV_PICTURE_TYPE_I;
3564         for(i=0; i<s->mb_stride*s->mb_height; i++)
3565             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3566         if(s->msmpeg4_version >= 3)
3567             s->no_rounding=1;
3568         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3569                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3570     }
3571
3572     if(!s->umvplus){
3573         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3574             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3575
3576             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3577                 int a,b;
3578                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3579                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3580                 s->f_code= FFMAX3(s->f_code, a, b);
3581             }
3582
3583             ff_fix_long_p_mvs(s);
3584             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3585             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3586                 int j;
3587                 for(i=0; i<2; i++){
3588                     for(j=0; j<2; j++)
3589                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3590                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3591                 }
3592             }
3593         }
3594
3595         if(s->pict_type==AV_PICTURE_TYPE_B){
3596             int a, b;
3597
3598             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3599             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3600             s->f_code = FFMAX(a, b);
3601
3602             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3603             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3604             s->b_code = FFMAX(a, b);
3605
3606             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3607             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3608             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3609             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3610             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3611                 int dir, j;
3612                 for(dir=0; dir<2; dir++){
3613                     for(i=0; i<2; i++){
3614                         for(j=0; j<2; j++){
3615                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3616                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3617                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3618                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3619                         }
3620                     }
3621                 }
3622             }
3623         }
3624     }
3625
3626     if (estimate_qp(s, 0) < 0)
3627         return -1;
3628
3629     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3630         s->qscale= 3; //reduce clipping problems
3631
3632     if (s->out_format == FMT_MJPEG) {
3633         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3634         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3635
3636         if (s->avctx->intra_matrix) {
3637             chroma_matrix =
3638             luma_matrix = s->avctx->intra_matrix;
3639         }
3640         if (s->avctx->chroma_intra_matrix)
3641             chroma_matrix = s->avctx->chroma_intra_matrix;
3642
3643         /* for mjpeg, we do include qscale in the matrix */
3644         for(i=1;i<64;i++){
3645             int j = s->idsp.idct_permutation[i];
3646
3647             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3648             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3649         }
3650         s->y_dc_scale_table=
3651         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3652         s->chroma_intra_matrix[0] =
3653         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3654         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3655                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3656         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3657                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3658         s->qscale= 8;
3659     }
3660     if(s->codec_id == AV_CODEC_ID_AMV){
3661         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3662         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3663         for(i=1;i<64;i++){
3664             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3665
3666             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3667             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3668         }
3669         s->y_dc_scale_table= y;
3670         s->c_dc_scale_table= c;
3671         s->intra_matrix[0] = 13;
3672         s->chroma_intra_matrix[0] = 14;
3673         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3674                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3675         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3676                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3677         s->qscale= 8;
3678     }
3679
3680     //FIXME var duplication
3681     s->current_picture_ptr->f->key_frame =
3682     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3683     s->current_picture_ptr->f->pict_type =
3684     s->current_picture.f->pict_type = s->pict_type;
3685
3686     if (s->current_picture.f->key_frame)
3687         s->picture_in_gop_number=0;
3688
3689     s->mb_x = s->mb_y = 0;
3690     s->last_bits= put_bits_count(&s->pb);
3691     switch(s->out_format) {
3692     case FMT_MJPEG:
3693         if (CONFIG_MJPEG_ENCODER)
3694             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3695                                            s->intra_matrix, s->chroma_intra_matrix);
3696         break;
3697     case FMT_H261:
3698         if (CONFIG_H261_ENCODER)
3699             ff_h261_encode_picture_header(s, picture_number);
3700         break;
3701     case FMT_H263:
3702         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3703             ff_wmv2_encode_picture_header(s, picture_number);
3704         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3705             ff_msmpeg4_encode_picture_header(s, picture_number);
3706         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3707             ff_mpeg4_encode_picture_header(s, picture_number);
3708         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3709             ff_rv10_encode_picture_header(s, picture_number);
3710         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3711             ff_rv20_encode_picture_header(s, picture_number);
3712         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3713             ff_flv_encode_picture_header(s, picture_number);
3714         else if (CONFIG_H263_ENCODER)
3715             ff_h263_encode_picture_header(s, picture_number);
3716         break;
3717     case FMT_MPEG1:
3718         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3719             ff_mpeg1_encode_picture_header(s, picture_number);
3720         break;
3721     default:
3722         av_assert0(0);
3723     }
3724     bits= put_bits_count(&s->pb);
3725     s->header_bits= bits - s->last_bits;
3726
3727     for(i=1; i<context_count; i++){
3728         update_duplicate_context_after_me(s->thread_context[i], s);
3729     }
3730     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3731     for(i=1; i<context_count; i++){
3732         merge_context_after_encode(s, s->thread_context[i]);
3733     }
3734     emms_c();
3735     return 0;
3736 }
3737
3738 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3739     const int intra= s->mb_intra;
3740     int i;
3741
3742     s->dct_count[intra]++;
3743
3744     for(i=0; i<64; i++){
3745         int level= block[i];
3746
3747         if(level){
3748             if(level>0){
3749                 s->dct_error_sum[intra][i] += level;
3750                 level -= s->dct_offset[intra][i];
3751                 if(level<0) level=0;
3752             }else{
3753                 s->dct_error_sum[intra][i] -= level;
3754                 level += s->dct_offset[intra][i];
3755                 if(level>0) level=0;
3756             }
3757             block[i]= level;
3758         }
3759     }
3760 }
3761
3762 static int dct_quantize_trellis_c(MpegEncContext *s,
3763                                   int16_t *block, int n,
3764                                   int qscale, int *overflow){
3765     const int *qmat;
3766     const uint8_t *scantable= s->intra_scantable.scantable;
3767     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3768     int max=0;
3769     unsigned int threshold1, threshold2;
3770     int bias=0;
3771     int run_tab[65];
3772     int level_tab[65];
3773     int score_tab[65];
3774     int survivor[65];
3775     int survivor_count;
3776     int last_run=0;
3777     int last_level=0;
3778     int last_score= 0;
3779     int last_i;
3780     int coeff[2][64];
3781     int coeff_count[64];
3782     int qmul, qadd, start_i, last_non_zero, i, dc;
3783     const int esc_length= s->ac_esc_length;
3784     uint8_t * length;
3785     uint8_t * last_length;
3786     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3787
3788     s->fdsp.fdct(block);
3789
3790     if(s->dct_error_sum)
3791         s->denoise_dct(s, block);
3792     qmul= qscale*16;
3793     qadd= ((qscale-1)|1)*8;
3794
3795     if (s->mb_intra) {
3796         int q;
3797         if (!s->h263_aic) {
3798             if (n < 4)
3799                 q = s->y_dc_scale;
3800             else
3801                 q = s->c_dc_scale;
3802             q = q << 3;
3803         } else{
3804             /* For AIC we skip quant/dequant of INTRADC */
3805             q = 1 << 3;
3806             qadd=0;
3807         }
3808
3809         /* note: block[0] is assumed to be positive */
3810         block[0] = (block[0] + (q >> 1)) / q;
3811         start_i = 1;
3812         last_non_zero = 0;
3813         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3814         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3815             bias= 1<<(QMAT_SHIFT-1);
3816         length     = s->intra_ac_vlc_length;
3817         last_length= s->intra_ac_vlc_last_length;
3818     } else {
3819         start_i = 0;
3820         last_non_zero = -1;
3821         qmat = s->q_inter_matrix[qscale];
3822         length     = s->inter_ac_vlc_length;
3823         last_length= s->inter_ac_vlc_last_length;
3824     }
3825     last_i= start_i;
3826
3827     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3828     threshold2= (threshold1<<1);
3829
3830     for(i=63; i>=start_i; i--) {
3831         const int j = scantable[i];
3832         int level = block[j] * qmat[j];
3833
3834         if(((unsigned)(level+threshold1))>threshold2){
3835             last_non_zero = i;
3836             break;
3837         }
3838     }
3839
3840     for(i=start_i; i<=last_non_zero; i++) {
3841         const int j = scantable[i];
3842         int level = block[j] * qmat[j];
3843
3844 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3845 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3846         if(((unsigned)(level+threshold1))>threshold2){
3847             if(level>0){
3848                 level= (bias + level)>>QMAT_SHIFT;
3849                 coeff[0][i]= level;
3850                 coeff[1][i]= level-1;
3851 //                coeff[2][k]= level-2;
3852             }else{
3853                 level= (bias - level)>>QMAT_SHIFT;
3854                 coeff[0][i]= -level;
3855                 coeff[1][i]= -level+1;
3856 //                coeff[2][k]= -level+2;
3857             }
3858             coeff_count[i]= FFMIN(level, 2);
3859             av_assert2(coeff_count[i]);
3860             max |=level;
3861         }else{
3862             coeff[0][i]= (level>>31)|1;
3863             coeff_count[i]= 1;
3864         }
3865     }
3866
3867     *overflow= s->max_qcoeff < max; //overflow might have happened
3868
3869     if(last_non_zero < start_i){
3870         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3871         return last_non_zero;
3872     }
3873
3874     score_tab[start_i]= 0;
3875     survivor[0]= start_i;
3876     survivor_count= 1;
3877
3878     for(i=start_i; i<=last_non_zero; i++){
3879         int level_index, j, zero_distortion;
3880         int dct_coeff= FFABS(block[ scantable[i] ]);
3881         int best_score=256*256*256*120;
3882
3883         if (s->fdsp.fdct == ff_fdct_ifast)
3884             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3885         zero_distortion= dct_coeff*dct_coeff;
3886
3887         for(level_index=0; level_index < coeff_count[i]; level_index++){
3888             int distortion;
3889             int level= coeff[level_index][i];
3890             const int alevel= FFABS(level);
3891             int unquant_coeff;
3892
3893             av_assert2(level);
3894
3895             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3896                 unquant_coeff= alevel*qmul + qadd;
3897             }else{ //MPEG1
3898                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3899                 if(s->mb_intra){
3900                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3901                         unquant_coeff =   (unquant_coeff - 1) | 1;
3902                 }else{
3903                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3904                         unquant_coeff =   (unquant_coeff - 1) | 1;
3905                 }
3906                 unquant_coeff<<= 3;
3907             }
3908
3909             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3910             level+=64;
3911             if((level&(~127)) == 0){
3912                 for(j=survivor_count-1; j>=0; j--){
3913                     int run= i - survivor[j];
3914                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3915                     score += score_tab[i-run];
3916
3917                     if(score < best_score){
3918                         best_score= score;
3919                         run_tab[i+1]= run;
3920                         level_tab[i+1]= level-64;
3921                     }
3922                 }
3923
3924                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3925                     for(j=survivor_count-1; j>=0; j--){
3926                         int run= i - survivor[j];
3927                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3928                         score += score_tab[i-run];
3929                         if(score < last_score){
3930                             last_score= score;
3931                             last_run= run;
3932                             last_level= level-64;
3933                             last_i= i+1;
3934                         }
3935                     }
3936                 }
3937             }else{
3938                 distortion += esc_length*lambda;
3939                 for(j=survivor_count-1; j>=0; j--){
3940                     int run= i - survivor[j];
3941                     int score= distortion + score_tab[i-run];
3942
3943                     if(score < best_score){
3944                         best_score= score;
3945                         run_tab[i+1]= run;
3946                         level_tab[i+1]= level-64;
3947                     }
3948                 }
3949
3950                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3951                   for(j=survivor_count-1; j>=0; j--){
3952                         int run= i - survivor[j];
3953                         int score= distortion + score_tab[i-run];
3954                         if(score < last_score){
3955                             last_score= score;
3956                             last_run= run;
3957                             last_level= level-64;
3958                             last_i= i+1;
3959                         }
3960                     }
3961                 }
3962             }
3963         }
3964
3965         score_tab[i+1]= best_score;
3966
3967         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3968         if(last_non_zero <= 27){
3969             for(; survivor_count; survivor_count--){
3970                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3971                     break;
3972             }
3973         }else{
3974             for(; survivor_count; survivor_count--){
3975                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3976                     break;
3977             }
3978         }
3979
3980         survivor[ survivor_count++ ]= i+1;
3981     }
3982
3983     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3984         last_score= 256*256*256*120;
3985         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3986             int score= score_tab[i];
3987             if(i) score += lambda*2; //FIXME exacter?
3988
3989             if(score < last_score){
3990                 last_score= score;
3991                 last_i= i;
3992                 last_level= level_tab[i];
3993                 last_run= run_tab[i];
3994             }
3995         }
3996     }
3997
3998     s->coded_score[n] = last_score;
3999
4000     dc= FFABS(block[0]);
4001     last_non_zero= last_i - 1;
4002     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4003
4004     if(last_non_zero < start_i)
4005         return last_non_zero;
4006
4007     if(last_non_zero == 0 && start_i == 0){
4008         int best_level= 0;
4009         int best_score= dc * dc;
4010
4011         for(i=0; i<coeff_count[0]; i++){
4012             int level= coeff[i][0];
4013             int alevel= FFABS(level);
4014             int unquant_coeff, score, distortion;
4015
4016             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4017                     unquant_coeff= (alevel*qmul + qadd)>>3;
4018             }else{ //MPEG1
4019                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4020                     unquant_coeff =   (unquant_coeff - 1) | 1;
4021             }
4022             unquant_coeff = (unquant_coeff + 4) >> 3;
4023             unquant_coeff<<= 3 + 3;
4024
4025             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4026             level+=64;
4027             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4028             else                    score= distortion + esc_length*lambda;
4029
4030             if(score < best_score){
4031                 best_score= score;
4032                 best_level= level - 64;
4033             }
4034         }
4035         block[0]= best_level;
4036         s->coded_score[n] = best_score - dc*dc;
4037         if(best_level == 0) return -1;
4038         else                return last_non_zero;
4039     }
4040
4041     i= last_i;
4042     av_assert2(last_level);
4043
4044     block[ perm_scantable[last_non_zero] ]= last_level;
4045     i -= last_run + 1;
4046
4047     for(; i>start_i; i -= run_tab[i] + 1){
4048         block[ perm_scantable[i-1] ]= level_tab[i];
4049     }
4050
4051     return last_non_zero;
4052 }
4053
4054 //#define REFINE_STATS 1
4055 static int16_t basis[64][64];
4056
4057 static void build_basis(uint8_t *perm){
4058     int i, j, x, y;
4059     emms_c();
4060     for(i=0; i<8; i++){
4061         for(j=0; j<8; j++){
4062             for(y=0; y<8; y++){
4063                 for(x=0; x<8; x++){
4064                     double s= 0.25*(1<<BASIS_SHIFT);
4065                     int index= 8*i + j;
4066                     int perm_index= perm[index];
4067                     if(i==0) s*= sqrt(0.5);
4068                     if(j==0) s*= sqrt(0.5);
4069                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4070                 }
4071             }
4072         }
4073     }
4074 }
4075
4076 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4077                         int16_t *block, int16_t *weight, int16_t *orig,
4078                         int n, int qscale){
4079     int16_t rem[64];
4080     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4081     const uint8_t *scantable= s->intra_scantable.scantable;
4082     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4083 //    unsigned int threshold1, threshold2;
4084 //    int bias=0;
4085     int run_tab[65];
4086     int prev_run=0;
4087     int prev_level=0;
4088     int qmul, qadd, start_i, last_non_zero, i, dc;
4089     uint8_t * length;
4090     uint8_t * last_length;
4091     int lambda;
4092     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4093 #ifdef REFINE_STATS
4094 static int count=0;
4095 static int after_last=0;
4096 static int to_zero=0;
4097 static int from_zero=0;
4098 static int raise=0;
4099 static int lower=0;
4100 static int messed_sign=0;
4101 #endif
4102
4103     if(basis[0][0] == 0)
4104         build_basis(s->idsp.idct_permutation);
4105
4106     qmul= qscale*2;
4107     qadd= (qscale-1)|1;
4108     if (s->mb_intra) {
4109         if (!s->h263_aic) {
4110             if (n < 4)
4111                 q = s->y_dc_scale;
4112             else
4113                 q = s->c_dc_scale;
4114         } else{
4115             /* For AIC we skip quant/dequant of INTRADC */
4116             q = 1;
4117             qadd=0;
4118         }
4119         q <<= RECON_SHIFT-3;
4120         /* note: block[0] is assumed to be positive */
4121         dc= block[0]*q;
4122 //        block[0] = (block[0] + (q >> 1)) / q;
4123         start_i = 1;
4124 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4125 //            bias= 1<<(QMAT_SHIFT-1);
4126         length     = s->intra_ac_vlc_length;
4127         last_length= s->intra_ac_vlc_last_length;
4128     } else {
4129         dc= 0;
4130         start_i = 0;
4131         length     = s->inter_ac_vlc_length;
4132         last_length= s->inter_ac_vlc_last_length;
4133     }
4134     last_non_zero = s->block_last_index[n];
4135
4136 #ifdef REFINE_STATS
4137 {START_TIMER
4138 #endif
4139     dc += (1<<(RECON_SHIFT-1));
4140     for(i=0; i<64; i++){
4141         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4142     }
4143 #ifdef REFINE_STATS
4144 STOP_TIMER("memset rem[]")}
4145 #endif
4146     sum=0;
4147     for(i=0; i<64; i++){
4148         int one= 36;
4149         int qns=4;
4150         int w;
4151
4152         w= FFABS(weight[i]) + qns*one;
4153         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4154
4155         weight[i] = w;
4156 //        w=weight[i] = (63*qns + (w/2)) / w;
4157
4158         av_assert2(w>0);
4159         av_assert2(w<(1<<6));
4160         sum += w*w;
4161     }
4162     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4163 #ifdef REFINE_STATS
4164 {START_TIMER
4165 #endif
4166     run=0;
4167     rle_index=0;
4168     for(i=start_i; i<=last_non_zero; i++){
4169         int j= perm_scantable[i];
4170         const int level= block[j];
4171         int coeff;
4172
4173         if(level){
4174             if(level<0) coeff= qmul*level - qadd;
4175             else        coeff= qmul*level + qadd;
4176             run_tab[rle_index++]=run;
4177             run=0;
4178
4179             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4180         }else{
4181             run++;
4182         }
4183     }
4184 #ifdef REFINE_STATS
4185 if(last_non_zero>0){
4186 STOP_TIMER("init rem[]")
4187 }
4188 }
4189
4190 {START_TIMER
4191 #endif
4192     for(;;){
4193         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4194         int best_coeff=0;
4195         int best_change=0;
4196         int run2, best_unquant_change=0, analyze_gradient;
4197 #ifdef REFINE_STATS
4198 {START_TIMER
4199 #endif
4200         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4201
4202         if(analyze_gradient){
4203 #ifdef REFINE_STATS
4204 {START_TIMER
4205 #endif
4206             for(i=0; i<64; i++){
4207                 int w= weight[i];
4208
4209                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4210             }
4211 #ifdef REFINE_STATS
4212 STOP_TIMER("rem*w*w")}
4213 {START_TIMER
4214 #endif
4215             s->fdsp.fdct(d1);
4216 #ifdef REFINE_STATS
4217 STOP_TIMER("dct")}
4218 #endif
4219         }
4220
4221         if(start_i){
4222             const int level= block[0];
4223             int change, old_coeff;
4224
4225             av_assert2(s->mb_intra);
4226
4227             old_coeff= q*level;
4228
4229             for(change=-1; change<=1; change+=2){
4230                 int new_level= level + change;
4231                 int score, new_coeff;
4232
4233                 new_coeff= q*new_level;
4234                 if(new_coeff >= 2048 || new_coeff < 0)
4235                     continue;
4236
4237                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4238                                                   new_coeff - old_coeff);
4239                 if(score<best_score){
4240                     best_score= score;
4241                     best_coeff= 0;
4242                     best_change= change;
4243                     best_unquant_change= new_coeff - old_coeff;
4244                 }
4245             }
4246         }
4247
4248         run=0;
4249         rle_index=0;
4250         run2= run_tab[rle_index++];
4251         prev_level=0;
4252         prev_run=0;
4253
4254         for(i=start_i; i<64; i++){
4255             int j= perm_scantable[i];
4256             const int level= block[j];
4257             int change, old_coeff;
4258
4259             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4260                 break;
4261
4262             if(level){
4263                 if(level<0) old_coeff= qmul*level - qadd;
4264                 else        old_coeff= qmul*level + qadd;
4265                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4266             }else{
4267                 old_coeff=0;
4268                 run2--;
4269                 av_assert2(run2>=0 || i >= last_non_zero );
4270             }
4271
4272             for(change=-1; change<=1; change+=2){
4273                 int new_level= level + change;
4274                 int score, new_coeff, unquant_change;
4275
4276                 score=0;
4277                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4278                    continue;
4279
4280                 if(new_level){
4281                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4282                     else            new_coeff= qmul*new_level + qadd;
4283                     if(new_coeff >= 2048 || new_coeff <= -2048)
4284                         continue;
4285                     //FIXME check for overflow
4286
4287                     if(level){
4288                         if(level < 63 && level > -63){
4289                             if(i < last_non_zero)
4290                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4291                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4292                             else
4293                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4294                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4295                         }
4296                     }else{
4297                         av_assert2(FFABS(new_level)==1);
4298
4299                         if(analyze_gradient){
4300                             int g= d1[ scantable[i] ];
4301                             if(g && (g^new_level) >= 0)
4302                                 continue;
4303                         }
4304
4305                         if(i < last_non_zero){
4306                             int next_i= i + run2 + 1;
4307                             int next_level= block[ perm_scantable[next_i] ] + 64;
4308
4309                             if(next_level&(~127))
4310                                 next_level= 0;
4311
4312                             if(next_i < last_non_zero)
4313                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4314                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4315                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4316                             else
4317                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4318                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4319                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4320                         }else{
4321                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4322                             if(prev_level){
4323                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4324                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4325                             }
4326                         }
4327                     }
4328                 }else{
4329                     new_coeff=0;
4330                     av_assert2(FFABS(level)==1);
4331
4332                     if(i < last_non_zero){
4333                         int next_i= i + run2 + 1;
4334                         int next_level= block[ perm_scantable[next_i] ] + 64;
4335
4336                         if(next_level&(~127))
4337                             next_level= 0;
4338
4339                         if(next_i < last_non_zero)
4340                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4341                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4342                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4343                         else
4344                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4345                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4346                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4347                     }else{
4348                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4349                         if(prev_level){
4350                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4351                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4352                         }
4353                     }
4354                 }
4355
4356                 score *= lambda;
4357
4358                 unquant_change= new_coeff - old_coeff;
4359                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4360
4361                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4362                                                    unquant_change);
4363                 if(score<best_score){
4364                     best_score= score;
4365                     best_coeff= i;
4366                     best_change= change;
4367                     best_unquant_change= unquant_change;
4368                 }
4369             }
4370             if(level){
4371                 prev_level= level + 64;
4372                 if(prev_level&(~127))
4373                     prev_level= 0;
4374                 prev_run= run;
4375                 run=0;
4376             }else{
4377                 run++;
4378             }
4379         }
4380 #ifdef REFINE_STATS
4381 STOP_TIMER("iterative step")}
4382 #endif
4383
4384         if(best_change){
4385             int j= perm_scantable[ best_coeff ];
4386
4387             block[j] += best_change;
4388
4389             if(best_coeff > last_non_zero){
4390                 last_non_zero= best_coeff;
4391                 av_assert2(block[j]);
4392 #ifdef REFINE_STATS
4393 after_last++;
4394 #endif
4395             }else{
4396 #ifdef REFINE_STATS
4397 if(block[j]){
4398     if(block[j] - best_change){
4399         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4400             raise++;
4401         }else{
4402             lower++;
4403         }
4404     }else{
4405         from_zero++;
4406     }
4407 }else{
4408     to_zero++;
4409 }
4410 #endif
4411                 for(; last_non_zero>=start_i; last_non_zero--){
4412                     if(block[perm_scantable[last_non_zero]])
4413                         break;
4414                 }
4415             }
4416 #ifdef REFINE_STATS
4417 count++;
4418 if(256*256*256*64 % count == 0){
4419     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4420 }
4421 #endif
4422             run=0;
4423             rle_index=0;
4424             for(i=start_i; i<=last_non_zero; i++){
4425                 int j= perm_scantable[i];
4426                 const int level= block[j];
4427
4428                  if(level){
4429                      run_tab[rle_index++]=run;
4430                      run=0;
4431                  }else{
4432                      run++;
4433                  }
4434             }
4435
4436             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4437         }else{
4438             break;
4439         }
4440     }
4441 #ifdef REFINE_STATS
4442 if(last_non_zero>0){
4443 STOP_TIMER("iterative search")
4444 }
4445 }
4446 #endif
4447
4448     return last_non_zero;
4449 }
4450
4451 int ff_dct_quantize_c(MpegEncContext *s,
4452                         int16_t *block, int n,
4453                         int qscale, int *overflow)
4454 {
4455     int i, j, level, last_non_zero, q, start_i;
4456     const int *qmat;
4457     const uint8_t *scantable= s->intra_scantable.scantable;
4458     int bias;
4459     int max=0;
4460     unsigned int threshold1, threshold2;
4461
4462     s->fdsp.fdct(block);
4463
4464     if(s->dct_error_sum)
4465         s->denoise_dct(s, block);
4466
4467     if (s->mb_intra) {
4468         if (!s->h263_aic) {
4469             if (n < 4)
4470                 q = s->y_dc_scale;
4471             else
4472                 q = s->c_dc_scale;
4473             q = q << 3;
4474         } else
4475             /* For AIC we skip quant/dequant of INTRADC */
4476             q = 1 << 3;
4477
4478         /* note: block[0] is assumed to be positive */
4479         block[0] = (block[0] + (q >> 1)) / q;
4480         start_i = 1;
4481         last_non_zero = 0;
4482         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4483         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4484     } else {
4485         start_i = 0;
4486         last_non_zero = -1;
4487         qmat = s->q_inter_matrix[qscale];
4488         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4489     }
4490     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4491     threshold2= (threshold1<<1);
4492     for(i=63;i>=start_i;i--) {
4493         j = scantable[i];
4494         level = block[j] * qmat[j];
4495
4496         if(((unsigned)(level+threshold1))>threshold2){
4497             last_non_zero = i;
4498             break;
4499         }else{
4500             block[j]=0;
4501         }
4502     }
4503     for(i=start_i; i<=last_non_zero; i++) {
4504         j = scantable[i];
4505         level = block[j] * qmat[j];
4506
4507 //        if(   bias+level >= (1<<QMAT_SHIFT)
4508 //           || bias-level >= (1<<QMAT_SHIFT)){
4509         if(((unsigned)(level+threshold1))>threshold2){
4510             if(level>0){
4511                 level= (bias + level)>>QMAT_SHIFT;
4512                 block[j]= level;
4513             }else{
4514                 level= (bias - level)>>QMAT_SHIFT;
4515                 block[j]= -level;
4516             }
4517             max |=level;
4518         }else{
4519             block[j]=0;
4520         }
4521     }
4522     *overflow= s->max_qcoeff < max; //overflow might have happened
4523
4524     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4525     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4526         ff_block_permute(block, s->idsp.idct_permutation,
4527                          scantable, last_non_zero);
4528
4529     return last_non_zero;
4530 }
4531
4532 #define OFFSET(x) offsetof(MpegEncContext, x)
4533 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4534 static const AVOption h263_options[] = {
4535     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4536     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4537     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4538     FF_MPV_COMMON_OPTS
4539     { NULL },
4540 };
4541
4542 static const AVClass h263_class = {
4543     .class_name = "H.263 encoder",
4544     .item_name  = av_default_item_name,
4545     .option     = h263_options,
4546     .version    = LIBAVUTIL_VERSION_INT,
4547 };
4548
4549 AVCodec ff_h263_encoder = {
4550     .name           = "h263",
4551     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4552     .type           = AVMEDIA_TYPE_VIDEO,
4553     .id             = AV_CODEC_ID_H263,
4554     .priv_data_size = sizeof(MpegEncContext),
4555     .init           = ff_mpv_encode_init,
4556     .encode2        = ff_mpv_encode_picture,
4557     .close          = ff_mpv_encode_end,
4558     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4559     .priv_class     = &h263_class,
4560 };
4561
4562 static const AVOption h263p_options[] = {
4563     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4564     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4565     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4566     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4567     FF_MPV_COMMON_OPTS
4568     { NULL },
4569 };
4570 static const AVClass h263p_class = {
4571     .class_name = "H.263p encoder",
4572     .item_name  = av_default_item_name,
4573     .option     = h263p_options,
4574     .version    = LIBAVUTIL_VERSION_INT,
4575 };
4576
4577 AVCodec ff_h263p_encoder = {
4578     .name           = "h263p",
4579     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4580     .type           = AVMEDIA_TYPE_VIDEO,
4581     .id             = AV_CODEC_ID_H263P,
4582     .priv_data_size = sizeof(MpegEncContext),
4583     .init           = ff_mpv_encode_init,
4584     .encode2        = ff_mpv_encode_picture,
4585     .close          = ff_mpv_encode_end,
4586     .capabilities   = CODEC_CAP_SLICE_THREADS,
4587     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4588     .priv_class     = &h263p_class,
4589 };
4590
4591 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4592
4593 AVCodec ff_msmpeg4v2_encoder = {
4594     .name           = "msmpeg4v2",
4595     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4596     .type           = AVMEDIA_TYPE_VIDEO,
4597     .id             = AV_CODEC_ID_MSMPEG4V2,
4598     .priv_data_size = sizeof(MpegEncContext),
4599     .init           = ff_mpv_encode_init,
4600     .encode2        = ff_mpv_encode_picture,
4601     .close          = ff_mpv_encode_end,
4602     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4603     .priv_class     = &msmpeg4v2_class,
4604 };
4605
4606 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4607
4608 AVCodec ff_msmpeg4v3_encoder = {
4609     .name           = "msmpeg4",
4610     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4611     .type           = AVMEDIA_TYPE_VIDEO,
4612     .id             = AV_CODEC_ID_MSMPEG4V3,
4613     .priv_data_size = sizeof(MpegEncContext),
4614     .init           = ff_mpv_encode_init,
4615     .encode2        = ff_mpv_encode_picture,
4616     .close          = ff_mpv_encode_end,
4617     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4618     .priv_class     = &msmpeg4v3_class,
4619 };
4620
4621 FF_MPV_GENERIC_CLASS(wmv1)
4622
4623 AVCodec ff_wmv1_encoder = {
4624     .name           = "wmv1",
4625     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4626     .type           = AVMEDIA_TYPE_VIDEO,
4627     .id             = AV_CODEC_ID_WMV1,
4628     .priv_data_size = sizeof(MpegEncContext),
4629     .init           = ff_mpv_encode_init,
4630     .encode2        = ff_mpv_encode_picture,
4631     .close          = ff_mpv_encode_end,
4632     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4633     .priv_class     = &wmv1_class,
4634 };