git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mjpegenc_common.h"
  46 #include "mathops.h"
  47 #include "mpegutils.h"
  48 #include "mjpegenc.h"
  49 #include "msmpeg4.h"
  50 #include "pixblockdsp.h"
  51 #include "qpeldsp.h"
  52 #include "faandct.h"
  53 #include "thread.h"
  54 #include "aandcttab.h"
  55 #include "flv.h"
  56 #include "mpeg4video.h"
  57 #include "internal.h"
  58 #include "bytestream.h"
  59 #include <limits.h>
  60 #include "sp5x.h"
  61
  62 #define QUANT_BIAS_SHIFT 8
  63
  64 #define QMAT_SHIFT_MMX 16
  65 #define QMAT_SHIFT 21
  66
  67 static int encode_picture(MpegEncContext *s, int picture_number);
  68 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  69 static int sse_mb(MpegEncContext *s);
  70 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  71 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  72
  73 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  74 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  75
  76 const AVOption ff_mpv_generic_options[] = {
  77     FF_MPV_COMMON_OPTS
  78     { NULL },
  79 };
  80
  81 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  82                        uint16_t (*qmat16)[2][64],
  83                        const uint16_t *quant_matrix,
  84                        int bias, int qmin, int qmax, int intra)
  85 {
  86     FDCTDSPContext *fdsp = &s->fdsp;
  87     int qscale;
  88     int shift = 0;
  89
  90     for (qscale = qmin; qscale <= qmax; qscale++) {
  91         int i;
  92         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  93 #if CONFIG_FAANDCT
  94             fdsp->fdct == ff_faandct            ||
  95 #endif /* CONFIG_FAANDCT */
  96             fdsp->fdct == ff_jpeg_fdct_islow_10) {
  97             for (i = 0; i < 64; i++) {
  98                 const int j = s->idsp.idct_permutation[i];
  99                 int64_t den = (int64_t) qscale * quant_matrix[j];
 100                 /* 16 <= qscale * quant_matrix[i] <= 7905
 101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 102                  *             19952 <=              x  <= 249205026
 103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 104                  *           3444240 >= (1 << 36) / (x) >= 275 */
 105
 106                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 107             }
 108         } else if (fdsp->fdct == ff_fdct_ifast) {
 109             for (i = 0; i < 64; i++) {
 110                 const int j = s->idsp.idct_permutation[i];
 111                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 112                 /* 16 <= qscale * quant_matrix[i] <= 7905
 113                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 114                  *             19952 <=              x  <= 249205026
 115                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 116                  *           3444240 >= (1 << 36) / (x) >= 275 */
 117
 118                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 119             }
 120         } else {
 121             for (i = 0; i < 64; i++) {
 122                 const int j = s->idsp.idct_permutation[i];
 123                 int64_t den = (int64_t) qscale * quant_matrix[j];
 124                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 125                  * Assume x = qscale * quant_matrix[i]
 126                  * So             16 <=              x  <= 7905
 127                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 128                  * so          32768 >= (1 << 19) / (x) >= 67 */
 129                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 131                 //                    (qscale * quant_matrix[i]);
 132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 133
 134                 if (qmat16[qscale][0][i] == 0 ||
 135                     qmat16[qscale][0][i] == 128 * 256)
 136                     qmat16[qscale][0][i] = 128 * 256 - 1;
 137                 qmat16[qscale][1][i] =
 138                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 139                                 qmat16[qscale][0][i]);
 140             }
 141         }
 142
 143         for (i = intra; i < 64; i++) {
 144             int64_t max = 8191;
 145             if (fdsp->fdct == ff_fdct_ifast) {
 146                 max = (8191LL * ff_aanscales[i]) >> 14;
 147             }
 148             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 149                 shift++;
 150             }
 151         }
 152     }
 153     if (shift) {
 154         av_log(NULL, AV_LOG_INFO,
 155                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 156                QMAT_SHIFT - shift);
 157     }
 158 }
 159
 160 static inline void update_qscale(MpegEncContext *s)
 161 {
 162     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 163                 (FF_LAMBDA_SHIFT + 7);
 164     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 165
 166     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 167                  FF_LAMBDA_SHIFT;
 168 }
 169
 170 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 171 {
 172     int i;
 173
 174     if (matrix) {
 175         put_bits(pb, 1, 1);
 176         for (i = 0; i < 64; i++) {
 177             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 178         }
 179     } else
 180         put_bits(pb, 1, 0);
 181 }
 182
 183 /**
 184  * init s->current_picture.qscale_table from s->lambda_table
 185  */
 186 void ff_init_qscale_tab(MpegEncContext *s)
 187 {
 188     int8_t * const qscale_table = s->current_picture.qscale_table;
 189     int i;
 190
 191     for (i = 0; i < s->mb_num; i++) {
 192         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 193         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 194         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 195                                                   s->avctx->qmax);
 196     }
 197 }
 198
 199 static void update_duplicate_context_after_me(MpegEncContext *dst,
 200                                               MpegEncContext *src)
 201 {
 202 #define COPY(a) dst->a= src->a
 203     COPY(pict_type);
 204     COPY(current_picture);
 205     COPY(f_code);
 206     COPY(b_code);
 207     COPY(qscale);
 208     COPY(lambda);
 209     COPY(lambda2);
 210     COPY(picture_in_gop_number);
 211     COPY(gop_picture_number);
 212     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 213     COPY(progressive_frame);    // FIXME don't set in encode_header
 214     COPY(partitioned_frame);    // FIXME don't set in encode_header
 215 #undef COPY
 216 }
 217
 218 /**
 219  * Set the given MpegEncContext to defaults for encoding.
 220  * the changed fields will not depend upon the prior state of the MpegEncContext.
 221  */
 222 static void mpv_encode_defaults(MpegEncContext *s)
 223 {
 224     int i;
 225     ff_mpv_common_defaults(s);
 226
 227     for (i = -16; i < 16; i++) {
 228         default_fcode_tab[i + MAX_MV] = 1;
 229     }
 230     s->me.mv_penalty = default_mv_penalty;
 231     s->fcode_tab     = default_fcode_tab;
 232
 233     s->input_picture_number  = 0;
 234     s->picture_in_gop_number = 0;
 235 }
 236
 237 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 238     if (ARCH_X86)
 239         ff_dct_encode_init_x86(s);
 240
 241     if (CONFIG_H263_ENCODER)
 242         ff_h263dsp_init(&s->h263dsp);
 243     if (!s->dct_quantize)
 244         s->dct_quantize = ff_dct_quantize_c;
 245     if (!s->denoise_dct)
 246         s->denoise_dct  = denoise_dct_c;
 247     s->fast_dct_quantize = s->dct_quantize;
 248     if (s->avctx->trellis)
 249         s->dct_quantize  = dct_quantize_trellis_c;
 250
 251     return 0;
 252 }
 253
 254 /* init video encoder */
 255 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 256 {
 257     MpegEncContext *s = avctx->priv_data;
 258     int i, ret, format_supported;
 259
 260     mpv_encode_defaults(s);
 261
 262     switch (avctx->codec_id) {
 263     case AV_CODEC_ID_MPEG2VIDEO:
 264         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 265             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 266             av_log(avctx, AV_LOG_ERROR,
 267                    "only YUV420 and YUV422 are supported\n");
 268             return -1;
 269         }
 270         break;
 271     case AV_CODEC_ID_MJPEG:
 272     case AV_CODEC_ID_AMV:
 273         format_supported = 0;
 274         /* JPEG color space */
 275         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 276             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 277             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 278             (avctx->color_range == AVCOL_RANGE_JPEG &&
 279              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 280               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 281               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 282             format_supported = 1;
 283         /* MPEG color space */
 284         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 285                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 286                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 287                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 288             format_supported = 1;
 289
 290         if (!format_supported) {
 291             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 292             return -1;
 293         }
 294         break;
 295     default:
 296         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 297             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 298             return -1;
 299         }
 300     }
 301
 302     switch (avctx->pix_fmt) {
 303     case AV_PIX_FMT_YUVJ444P:
 304     case AV_PIX_FMT_YUV444P:
 305         s->chroma_format = CHROMA_444;
 306         break;
 307     case AV_PIX_FMT_YUVJ422P:
 308     case AV_PIX_FMT_YUV422P:
 309         s->chroma_format = CHROMA_422;
 310         break;
 311     case AV_PIX_FMT_YUVJ420P:
 312     case AV_PIX_FMT_YUV420P:
 313     default:
 314         s->chroma_format = CHROMA_420;
 315         break;
 316     }
 317
 318     s->bit_rate = avctx->bit_rate;
 319     s->width    = avctx->width;
 320     s->height   = avctx->height;
 321     if (avctx->gop_size > 600 &&
 322         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 323         av_log(avctx, AV_LOG_WARNING,
 324                "keyframe interval too large!, reducing it from %d to %d\n",
 325                avctx->gop_size, 600);
 326         avctx->gop_size = 600;
 327     }
 328     s->gop_size     = avctx->gop_size;
 329     s->avctx        = avctx;
 330     s->flags        = avctx->flags;
 331     s->flags2       = avctx->flags2;
 332     if (avctx->max_b_frames > MAX_B_FRAMES) {
 333         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 334                "is %d.\n", MAX_B_FRAMES);
 335         avctx->max_b_frames = MAX_B_FRAMES;
 336     }
 337     s->max_b_frames = avctx->max_b_frames;
 338     s->codec_id     = avctx->codec->id;
 339     s->strict_std_compliance = avctx->strict_std_compliance;
 340     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 341     s->mpeg_quant         = avctx->mpeg_quant;
 342     s->rtp_mode           = !!avctx->rtp_payload_size;
 343     s->intra_dc_precision = avctx->intra_dc_precision;
 344
 345     // workaround some differences between how applications specify dc precision
 346     if (s->intra_dc_precision < 0) {
 347         s->intra_dc_precision += 8;
 348     } else if (s->intra_dc_precision >= 8)
 349         s->intra_dc_precision -= 8;
 350
 351     if (s->intra_dc_precision < 0) {
 352         av_log(avctx, AV_LOG_ERROR,
 353                 "intra dc precision must be positive, note some applications use"
 354                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 355         return AVERROR(EINVAL);
 356     }
 357
 358     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 359         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 360         return AVERROR(EINVAL);
 361     }
 362     s->user_specified_pts = AV_NOPTS_VALUE;
 363
 364     if (s->gop_size <= 1) {
 365         s->intra_only = 1;
 366         s->gop_size   = 12;
 367     } else {
 368         s->intra_only = 0;
 369     }
 370
 371     s->me_method = avctx->me_method;
 372
 373     /* Fixed QSCALE */
 374     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 375
 376 #if FF_API_MPV_OPT
 377     FF_DISABLE_DEPRECATION_WARNINGS
 378     if (avctx->border_masking != 0.0)
 379         s->border_masking = avctx->border_masking;
 380     FF_ENABLE_DEPRECATION_WARNINGS
 381 #endif
 382
 383     s->adaptive_quant = (s->avctx->lumi_masking ||
 384                          s->avctx->dark_masking ||
 385                          s->avctx->temporal_cplx_masking ||
 386                          s->avctx->spatial_cplx_masking  ||
 387                          s->avctx->p_masking      ||
 388                          s->border_masking ||
 389                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 390                         !s->fixed_qscale;
 391
 392     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 393
 394     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 395         switch(avctx->codec_id) {
 396         case AV_CODEC_ID_MPEG1VIDEO:
 397         case AV_CODEC_ID_MPEG2VIDEO:
 398             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 399             break;
 400         case AV_CODEC_ID_MPEG4:
 401         case AV_CODEC_ID_MSMPEG4V1:
 402         case AV_CODEC_ID_MSMPEG4V2:
 403         case AV_CODEC_ID_MSMPEG4V3:
 404             if       (avctx->rc_max_rate >= 15000000) {
 405                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 406             } else if(avctx->rc_max_rate >=  2000000) {
 407                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 408             } else if(avctx->rc_max_rate >=   384000) {
 409                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 410             } else
 411                 avctx->rc_buffer_size = 40;
 412             avctx->rc_buffer_size *= 16384;
 413             break;
 414         }
 415         if (avctx->rc_buffer_size) {
 416             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 417         }
 418     }
 419
 420     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 421         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 422         return -1;
 423     }
 424
 425     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 426         av_log(avctx, AV_LOG_INFO,
 427                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 428     }
 429
 430     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 431         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 432         return -1;
 433     }
 434
 435     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 436         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 437         return -1;
 438     }
 439
 440     if (avctx->rc_max_rate &&
 441         avctx->rc_max_rate == avctx->bit_rate &&
 442         avctx->rc_max_rate != avctx->rc_min_rate) {
 443         av_log(avctx, AV_LOG_INFO,
 444                "impossible bitrate constraints, this will fail\n");
 445     }
 446
 447     if (avctx->rc_buffer_size &&
 448         avctx->bit_rate * (int64_t)avctx->time_base.num >
 449             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 450         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 451         return -1;
 452     }
 453
 454     if (!s->fixed_qscale &&
 455         avctx->bit_rate * av_q2d(avctx->time_base) >
 456             avctx->bit_rate_tolerance) {
 457         av_log(avctx, AV_LOG_WARNING,
 458                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 459         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 460     }
 461
 462     if (s->avctx->rc_max_rate &&
 463         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 464         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 465          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 466         90000LL * (avctx->rc_buffer_size - 1) >
 467             s->avctx->rc_max_rate * 0xFFFFLL) {
 468         av_log(avctx, AV_LOG_INFO,
 469                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 470                "specified vbv buffer is too large for the given bitrate!\n");
 471     }
 472
 473     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 474         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 475         s->codec_id != AV_CODEC_ID_FLV1) {
 476         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 477         return -1;
 478     }
 479
 480     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 481         av_log(avctx, AV_LOG_ERROR,
 482                "OBMC is only supported with simple mb decision\n");
 483         return -1;
 484     }
 485
 486     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 487         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 488         return -1;
 489     }
 490
 491     if (s->max_b_frames                    &&
 492         s->codec_id != AV_CODEC_ID_MPEG4      &&
 493         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 494         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 495         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 496         return -1;
 497     }
 498     if (s->max_b_frames < 0) {
 499         av_log(avctx, AV_LOG_ERROR,
 500                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 501         return -1;
 502     }
 503
 504     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 505          s->codec_id == AV_CODEC_ID_H263  ||
 506          s->codec_id == AV_CODEC_ID_H263P) &&
 507         (avctx->sample_aspect_ratio.num > 255 ||
 508          avctx->sample_aspect_ratio.den > 255)) {
 509         av_log(avctx, AV_LOG_WARNING,
 510                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 511                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 512         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 513                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 514     }
 515
 516     if ((s->codec_id == AV_CODEC_ID_H263  ||
 517          s->codec_id == AV_CODEC_ID_H263P) &&
 518         (avctx->width  > 2048 ||
 519          avctx->height > 1152 )) {
 520         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 521         return -1;
 522     }
 523     if ((s->codec_id == AV_CODEC_ID_H263  ||
 524          s->codec_id == AV_CODEC_ID_H263P) &&
 525         ((avctx->width &3) ||
 526          (avctx->height&3) )) {
 527         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 528         return -1;
 529     }
 530
 531     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 532         (avctx->width  > 4095 ||
 533          avctx->height > 4095 )) {
 534         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 535         return -1;
 536     }
 537
 538     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 539         (avctx->width  > 16383 ||
 540          avctx->height > 16383 )) {
 541         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 542         return -1;
 543     }
 544
 545     if (s->codec_id == AV_CODEC_ID_RV10 &&
 546         (avctx->width &15 ||
 547          avctx->height&15 )) {
 548         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 549         return AVERROR(EINVAL);
 550     }
 551
 552     if (s->codec_id == AV_CODEC_ID_RV20 &&
 553         (avctx->width &3 ||
 554          avctx->height&3 )) {
 555         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 556         return AVERROR(EINVAL);
 557     }
 558
 559     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 560          s->codec_id == AV_CODEC_ID_WMV2) &&
 561          avctx->width & 1) {
 562          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 563          return -1;
 564     }
 565
 566     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 567         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 568         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 569         return -1;
 570     }
 571
 572     // FIXME mpeg2 uses that too
 573     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 574                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 575         av_log(avctx, AV_LOG_ERROR,
 576                "mpeg2 style quantization not supported by codec\n");
 577         return -1;
 578     }
 579
 580     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 581         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 582         return -1;
 583     }
 584
 585     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 586         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 587         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 588         return -1;
 589     }
 590
 591     if (s->avctx->scenechange_threshold < 1000000000 &&
 592         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 593         av_log(avctx, AV_LOG_ERROR,
 594                "closed gop with scene change detection are not supported yet, "
 595                "set threshold to 1000000000\n");
 596         return -1;
 597     }
 598
 599     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 600         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 601             av_log(avctx, AV_LOG_ERROR,
 602                   "low delay forcing is only available for mpeg2\n");
 603             return -1;
 604         }
 605         if (s->max_b_frames != 0) {
 606             av_log(avctx, AV_LOG_ERROR,
 607                    "b frames cannot be used with low delay\n");
 608             return -1;
 609         }
 610     }
 611
 612     if (s->q_scale_type == 1) {
 613         if (avctx->qmax > 12) {
 614             av_log(avctx, AV_LOG_ERROR,
 615                    "non linear quant only supports qmax <= 12 currently\n");
 616             return -1;
 617         }
 618     }
 619
 620     if (s->avctx->thread_count > 1         &&
 621         s->codec_id != AV_CODEC_ID_MPEG4      &&
 622         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 623         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 624         s->codec_id != AV_CODEC_ID_MJPEG      &&
 625         (s->codec_id != AV_CODEC_ID_H263P)) {
 626         av_log(avctx, AV_LOG_ERROR,
 627                "multi threaded encoding not supported by codec\n");
 628         return -1;
 629     }
 630
 631     if (s->avctx->thread_count < 1) {
 632         av_log(avctx, AV_LOG_ERROR,
 633                "automatic thread number detection not supported by codec, "
 634                "patch welcome\n");
 635         return -1;
 636     }
 637
 638     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 639         s->rtp_mode = 1;
 640
 641     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 642         s->h263_slice_structured = 1;
 643
 644     if (!avctx->time_base.den || !avctx->time_base.num) {
 645         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 646         return -1;
 647     }
 648
 649     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 650         av_log(avctx, AV_LOG_INFO,
 651                "notice: b_frame_strategy only affects the first pass\n");
 652         avctx->b_frame_strategy = 0;
 653     }
 654
 655     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 656     if (i > 1) {
 657         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 658         avctx->time_base.den /= i;
 659         avctx->time_base.num /= i;
 660         //return -1;
 661     }
 662
 663     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 664         // (a + x * 3 / 8) / x
 665         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 666         s->inter_quant_bias = 0;
 667     } else {
 668         s->intra_quant_bias = 0;
 669         // (a - x / 4) / x
 670         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 671     }
 672
 673     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 674         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 675         return AVERROR(EINVAL);
 676     }
 677
 678     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 679         s->intra_quant_bias = avctx->intra_quant_bias;
 680     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 681         s->inter_quant_bias = avctx->inter_quant_bias;
 682
 683     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 684
 685     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 686         s->avctx->time_base.den > (1 << 16) - 1) {
 687         av_log(avctx, AV_LOG_ERROR,
 688                "timebase %d/%d not supported by MPEG 4 standard, "
 689                "the maximum admitted value for the timebase denominator "
 690                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 691                (1 << 16) - 1);
 692         return -1;
 693     }
 694     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 695
 696     switch (avctx->codec->id) {
 697     case AV_CODEC_ID_MPEG1VIDEO:
 698         s->out_format = FMT_MPEG1;
 699         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 700         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 701         break;
 702     case AV_CODEC_ID_MPEG2VIDEO:
 703         s->out_format = FMT_MPEG1;
 704         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 705         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 706         s->rtp_mode   = 1;
 707         break;
 708     case AV_CODEC_ID_MJPEG:
 709     case AV_CODEC_ID_AMV:
 710         s->out_format = FMT_MJPEG;
 711         s->intra_only = 1; /* force intra only for jpeg */
 712         if (!CONFIG_MJPEG_ENCODER ||
 713             ff_mjpeg_encode_init(s) < 0)
 714             return -1;
 715         avctx->delay = 0;
 716         s->low_delay = 1;
 717         break;
 718     case AV_CODEC_ID_H261:
 719         if (!CONFIG_H261_ENCODER)
 720             return -1;
 721         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 722             av_log(avctx, AV_LOG_ERROR,
 723                    "The specified picture size of %dx%d is not valid for the "
 724                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 725                     s->width, s->height);
 726             return -1;
 727         }
 728         s->out_format = FMT_H261;
 729         avctx->delay  = 0;
 730         s->low_delay  = 1;
 731         s->rtp_mode   = 0; /* Sliced encoding not supported */
 732         break;
 733     case AV_CODEC_ID_H263:
 734         if (!CONFIG_H263_ENCODER)
 735             return -1;
 736         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 737                              s->width, s->height) == 8) {
 738             av_log(avctx, AV_LOG_ERROR,
 739                    "The specified picture size of %dx%d is not valid for "
 740                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 741                    "352x288, 704x576, and 1408x1152. "
 742                    "Try H.263+.\n", s->width, s->height);
 743             return -1;
 744         }
 745         s->out_format = FMT_H263;
 746         avctx->delay  = 0;
 747         s->low_delay  = 1;
 748         break;
 749     case AV_CODEC_ID_H263P:
 750         s->out_format = FMT_H263;
 751         s->h263_plus  = 1;
 752         /* Fx */
 753         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 754         s->modified_quant  = s->h263_aic;
 755         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 756         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 757
 758         /* /Fx */
 759         /* These are just to be sure */
 760         avctx->delay = 0;
 761         s->low_delay = 1;
 762         break;
 763     case AV_CODEC_ID_FLV1:
 764         s->out_format      = FMT_H263;
 765         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 766         s->unrestricted_mv = 1;
 767         s->rtp_mode  = 0; /* don't allow GOB */
 768         avctx->delay = 0;
 769         s->low_delay = 1;
 770         break;
 771     case AV_CODEC_ID_RV10:
 772         s->out_format = FMT_H263;
 773         avctx->delay  = 0;
 774         s->low_delay  = 1;
 775         break;
 776     case AV_CODEC_ID_RV20:
 777         s->out_format      = FMT_H263;
 778         avctx->delay       = 0;
 779         s->low_delay       = 1;
 780         s->modified_quant  = 1;
 781         s->h263_aic        = 1;
 782         s->h263_plus       = 1;
 783         s->loop_filter     = 1;
 784         s->unrestricted_mv = 0;
 785         break;
 786     case AV_CODEC_ID_MPEG4:
 787         s->out_format      = FMT_H263;
 788         s->h263_pred       = 1;
 789         s->unrestricted_mv = 1;
 790         s->low_delay       = s->max_b_frames ? 0 : 1;
 791         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 792         break;
 793     case AV_CODEC_ID_MSMPEG4V2:
 794         s->out_format      = FMT_H263;
 795         s->h263_pred       = 1;
 796         s->unrestricted_mv = 1;
 797         s->msmpeg4_version = 2;
 798         avctx->delay       = 0;
 799         s->low_delay       = 1;
 800         break;
 801     case AV_CODEC_ID_MSMPEG4V3:
 802         s->out_format        = FMT_H263;
 803         s->h263_pred         = 1;
 804         s->unrestricted_mv   = 1;
 805         s->msmpeg4_version   = 3;
 806         s->flipflop_rounding = 1;
 807         avctx->delay         = 0;
 808         s->low_delay         = 1;
 809         break;
 810     case AV_CODEC_ID_WMV1:
 811         s->out_format        = FMT_H263;
 812         s->h263_pred         = 1;
 813         s->unrestricted_mv   = 1;
 814         s->msmpeg4_version   = 4;
 815         s->flipflop_rounding = 1;
 816         avctx->delay         = 0;
 817         s->low_delay         = 1;
 818         break;
 819     case AV_CODEC_ID_WMV2:
 820         s->out_format        = FMT_H263;
 821         s->h263_pred         = 1;
 822         s->unrestricted_mv   = 1;
 823         s->msmpeg4_version   = 5;
 824         s->flipflop_rounding = 1;
 825         avctx->delay         = 0;
 826         s->low_delay         = 1;
 827         break;
 828     default:
 829         return -1;
 830     }
 831
 832     avctx->has_b_frames = !s->low_delay;
 833
 834     s->encoding = 1;
 835
 836     s->progressive_frame    =
 837     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 838                                                 CODEC_FLAG_INTERLACED_ME) ||
 839                                 s->alternate_scan);
 840
 841     /* init */
 842     ff_mpv_idct_init(s);
 843     if (ff_mpv_common_init(s) < 0)
 844         return -1;
 845
 846     ff_fdctdsp_init(&s->fdsp, avctx);
 847     ff_me_cmp_init(&s->mecc, avctx);
 848     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 849     ff_pixblockdsp_init(&s->pdsp, avctx);
 850     ff_qpeldsp_init(&s->qdsp);
 851
 852     s->avctx->coded_frame = s->current_picture.f;
 853
 854     if (s->msmpeg4_version) {
 855         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 856                           2 * 2 * (MAX_LEVEL + 1) *
 857                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 858     }
 859     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 860
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 866     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 867     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 868                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 869     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 870                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 871
 872     if (s->avctx->noise_reduction) {
 873         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 874                           2 * 64 * sizeof(uint16_t), fail);
 875     }
 876
 877     ff_dct_encode_init(s);
 878
 879     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 880         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 881
 882     s->quant_precision = 5;
 883
 884     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 885     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 886
 887     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 888         ff_h261_encode_init(s);
 889     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 890         ff_h263_encode_init(s);
 891     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 892         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 893             return ret;
 894     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 895         && s->out_format == FMT_MPEG1)
 896         ff_mpeg1_encode_init(s);
 897
 898     /* init q matrix */
 899     for (i = 0; i < 64; i++) {
 900         int j = s->idsp.idct_permutation[i];
 901         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 902             s->mpeg_quant) {
 903             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 904             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 905         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 906             s->intra_matrix[j] =
 907             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 908         } else {
 909             /* mpeg1/2 */
 910             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 911             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 912         }
 913         if (s->avctx->intra_matrix)
 914             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 915         if (s->avctx->inter_matrix)
 916             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 917     }
 918
 919     /* precompute matrix */
 920     /* for mjpeg, we do include qscale in the matrix */
 921     if (s->out_format != FMT_MJPEG) {
 922         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 923                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 924                           31, 1);
 925         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 926                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 927                           31, 0);
 928     }
 929
 930     if (ff_rate_control_init(s) < 0)
 931         return -1;
 932
 933 #if FF_API_ERROR_RATE
 934     FF_DISABLE_DEPRECATION_WARNINGS
 935     if (avctx->error_rate)
 936         s->error_rate = avctx->error_rate;
 937     FF_ENABLE_DEPRECATION_WARNINGS;
 938 #endif
 939
 940 #if FF_API_NORMALIZE_AQP
 941     FF_DISABLE_DEPRECATION_WARNINGS
 942     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 943         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 944     FF_ENABLE_DEPRECATION_WARNINGS;
 945 #endif
 946
 947 #if FF_API_MV0
 948     FF_DISABLE_DEPRECATION_WARNINGS
 949     if (avctx->flags & CODEC_FLAG_MV0)
 950         s->mpv_flags |= FF_MPV_FLAG_MV0;
 951     FF_ENABLE_DEPRECATION_WARNINGS
 952 #endif
 953
 954 #if FF_API_MPV_OPT
 955     FF_DISABLE_DEPRECATION_WARNINGS
 956     if (avctx->rc_qsquish != 0.0)
 957         s->rc_qsquish = avctx->rc_qsquish;
 958     if (avctx->rc_qmod_amp != 0.0)
 959         s->rc_qmod_amp = avctx->rc_qmod_amp;
 960     if (avctx->rc_qmod_freq)
 961         s->rc_qmod_freq = avctx->rc_qmod_freq;
 962     if (avctx->rc_buffer_aggressivity != 1.0)
 963         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 964     if (avctx->rc_initial_cplx != 0.0)
 965         s->rc_initial_cplx = avctx->rc_initial_cplx;
 966     if (avctx->lmin)
 967         s->lmin = avctx->lmin;
 968     if (avctx->lmax)
 969         s->lmax = avctx->lmax;
 970
 971     if (avctx->rc_eq) {
 972         av_freep(&s->rc_eq);
 973         s->rc_eq = av_strdup(avctx->rc_eq);
 974         if (!s->rc_eq)
 975             return AVERROR(ENOMEM);
 976     }
 977     FF_ENABLE_DEPRECATION_WARNINGS
 978 #endif
 979
 980     if (avctx->b_frame_strategy == 2) {
 981         for (i = 0; i < s->max_b_frames + 2; i++) {
 982             s->tmp_frames[i] = av_frame_alloc();
 983             if (!s->tmp_frames[i])
 984                 return AVERROR(ENOMEM);
 985
 986             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 987             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 988             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 989
 990             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 991             if (ret < 0)
 992                 return ret;
 993         }
 994     }
 995
 996     return 0;
 997 fail:
 998     ff_mpv_encode_end(avctx);
 999     return AVERROR_UNKNOWN;
1000 }
1001
1002 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1003 {
1004     MpegEncContext *s = avctx->priv_data;
1005     int i;
1006
1007     ff_rate_control_uninit(s);
1008
1009     ff_mpv_common_end(s);
1010     if (CONFIG_MJPEG_ENCODER &&
1011         s->out_format == FMT_MJPEG)
1012         ff_mjpeg_encode_close(s);
1013
1014     av_freep(&avctx->extradata);
1015
1016     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1017         av_frame_free(&s->tmp_frames[i]);
1018
1019     ff_free_picture_tables(&s->new_picture);
1020     ff_mpeg_unref_picture(s, &s->new_picture);
1021
1022     av_freep(&s->avctx->stats_out);
1023     av_freep(&s->ac_stats);
1024
1025     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1026     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1027     s->q_chroma_intra_matrix=   NULL;
1028     s->q_chroma_intra_matrix16= NULL;
1029     av_freep(&s->q_intra_matrix);
1030     av_freep(&s->q_inter_matrix);
1031     av_freep(&s->q_intra_matrix16);
1032     av_freep(&s->q_inter_matrix16);
1033     av_freep(&s->input_picture);
1034     av_freep(&s->reordered_input_picture);
1035     av_freep(&s->dct_offset);
1036
1037     return 0;
1038 }
1039
1040 static int get_sae(uint8_t *src, int ref, int stride)
1041 {
1042     int x,y;
1043     int acc = 0;
1044
1045     for (y = 0; y < 16; y++) {
1046         for (x = 0; x < 16; x++) {
1047             acc += FFABS(src[x + y * stride] - ref);
1048         }
1049     }
1050
1051     return acc;
1052 }
1053
1054 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1055                            uint8_t *ref, int stride)
1056 {
1057     int x, y, w, h;
1058     int acc = 0;
1059
1060     w = s->width  & ~15;
1061     h = s->height & ~15;
1062
1063     for (y = 0; y < h; y += 16) {
1064         for (x = 0; x < w; x += 16) {
1065             int offset = x + y * stride;
1066             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1067                                       stride, 16);
1068             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1069             int sae  = get_sae(src + offset, mean, stride);
1070
1071             acc += sae + 500 < sad;
1072         }
1073     }
1074     return acc;
1075 }
1076
1077
1078 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1079 {
1080     Picture *pic = NULL;
1081     int64_t pts;
1082     int i, display_picture_number = 0, ret;
1083     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1084                                                  (s->low_delay ? 0 : 1);
1085     int direct = 1;
1086
1087     if (pic_arg) {
1088         pts = pic_arg->pts;
1089         display_picture_number = s->input_picture_number++;
1090
1091         if (pts != AV_NOPTS_VALUE) {
1092             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1093                 int64_t last = s->user_specified_pts;
1094
1095                 if (pts <= last) {
1096                     av_log(s->avctx, AV_LOG_ERROR,
1097                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1098                            pts, last);
1099                     return AVERROR(EINVAL);
1100                 }
1101
1102                 if (!s->low_delay && display_picture_number == 1)
1103                     s->dts_delta = pts - last;
1104             }
1105             s->user_specified_pts = pts;
1106         } else {
1107             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1108                 s->user_specified_pts =
1109                 pts = s->user_specified_pts + 1;
1110                 av_log(s->avctx, AV_LOG_INFO,
1111                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1112                        pts);
1113             } else {
1114                 pts = display_picture_number;
1115             }
1116         }
1117     }
1118
1119     if (pic_arg) {
1120         if (!pic_arg->buf[0] ||
1121             pic_arg->linesize[0] != s->linesize ||
1122             pic_arg->linesize[1] != s->uvlinesize ||
1123             pic_arg->linesize[2] != s->uvlinesize)
1124             direct = 0;
1125         if ((s->width & 15) || (s->height & 15))
1126             direct = 0;
1127         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1128             direct = 0;
1129         if (s->linesize & (STRIDE_ALIGN-1))
1130             direct = 0;
1131
1132         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1133                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1134
1135         i = ff_find_unused_picture(s, direct);
1136         if (i < 0)
1137             return i;
1138
1139         pic = &s->picture[i];
1140         pic->reference = 3;
1141
1142         if (direct) {
1143             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1144                 return ret;
1145             if (ff_alloc_picture(s, pic, 1) < 0) {
1146                 return -1;
1147             }
1148         } else {
1149             if (ff_alloc_picture(s, pic, 0) < 0) {
1150                 return -1;
1151             }
1152
1153             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1154                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1155                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1156                 // empty
1157             } else {
1158                 int h_chroma_shift, v_chroma_shift;
1159                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1160                                                  &h_chroma_shift,
1161                                                  &v_chroma_shift);
1162
1163                 for (i = 0; i < 3; i++) {
1164                     int src_stride = pic_arg->linesize[i];
1165                     int dst_stride = i ? s->uvlinesize : s->linesize;
1166                     int h_shift = i ? h_chroma_shift : 0;
1167                     int v_shift = i ? v_chroma_shift : 0;
1168                     int w = s->width  >> h_shift;
1169                     int h = s->height >> v_shift;
1170                     uint8_t *src = pic_arg->data[i];
1171                     uint8_t *dst = pic->f->data[i];
1172                     int vpad = 16;
1173
1174                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1175                         && !s->progressive_sequence
1176                         && FFALIGN(s->height, 32) - s->height > 16)
1177                         vpad = 32;
1178
1179                     if (!s->avctx->rc_buffer_size)
1180                         dst += INPLACE_OFFSET;
1181
1182                     if (src_stride == dst_stride)
1183                         memcpy(dst, src, src_stride * h);
1184                     else {
1185                         int h2 = h;
1186                         uint8_t *dst2 = dst;
1187                         while (h2--) {
1188                             memcpy(dst2, src, w);
1189                             dst2 += dst_stride;
1190                             src += src_stride;
1191                         }
1192                     }
1193                     if ((s->width & 15) || (s->height & (vpad-1))) {
1194                         s->mpvencdsp.draw_edges(dst, dst_stride,
1195                                                 w, h,
1196                                                 16 >> h_shift,
1197                                                 vpad >> v_shift,
1198                                                 EDGE_BOTTOM);
1199                     }
1200                 }
1201             }
1202         }
1203         ret = av_frame_copy_props(pic->f, pic_arg);
1204         if (ret < 0)
1205             return ret;
1206
1207         pic->f->display_picture_number = display_picture_number;
1208         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1209     }
1210
1211     /* shift buffer entries */
1212     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1213         s->input_picture[i - 1] = s->input_picture[i];
1214
1215     s->input_picture[encoding_delay] = (Picture*) pic;
1216
1217     return 0;
1218 }
1219
1220 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1221 {
1222     int x, y, plane;
1223     int score = 0;
1224     int64_t score64 = 0;
1225
1226     for (plane = 0; plane < 3; plane++) {
1227         const int stride = p->f->linesize[plane];
1228         const int bw = plane ? 1 : 2;
1229         for (y = 0; y < s->mb_height * bw; y++) {
1230             for (x = 0; x < s->mb_width * bw; x++) {
1231                 int off = p->shared ? 0 : 16;
1232                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1233                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1234                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1235
1236                 switch (FFABS(s->avctx->frame_skip_exp)) {
1237                 case 0: score    =  FFMAX(score, v);          break;
1238                 case 1: score   += FFABS(v);                  break;
1239                 case 2: score64 += v * (int64_t)v;                       break;
1240                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1241                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1242                 }
1243             }
1244         }
1245     }
1246     emms_c();
1247
1248     if (score)
1249         score64 = score;
1250     if (s->avctx->frame_skip_exp < 0)
1251         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1252                       -1.0/s->avctx->frame_skip_exp);
1253
1254     if (score64 < s->avctx->frame_skip_threshold)
1255         return 1;
1256     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1257         return 1;
1258     return 0;
1259 }
1260
1261 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1262 {
1263     AVPacket pkt = { 0 };
1264     int ret, got_output;
1265
1266     av_init_packet(&pkt);
1267     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1268     if (ret < 0)
1269         return ret;
1270
1271     ret = pkt.size;
1272     av_free_packet(&pkt);
1273     return ret;
1274 }
1275
1276 static int estimate_best_b_count(MpegEncContext *s)
1277 {
1278     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1279     AVCodecContext *c = avcodec_alloc_context3(NULL);
1280     const int scale = s->avctx->brd_scale;
1281     int i, j, out_size, p_lambda, b_lambda, lambda2;
1282     int64_t best_rd  = INT64_MAX;
1283     int best_b_count = -1;
1284
1285     av_assert0(scale >= 0 && scale <= 3);
1286
1287     //emms_c();
1288     //s->next_picture_ptr->quality;
1289     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1290     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1291     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1292     if (!b_lambda) // FIXME we should do this somewhere else
1293         b_lambda = p_lambda;
1294     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1295                FF_LAMBDA_SHIFT;
1296
1297     c->width        = s->width  >> scale;
1298     c->height       = s->height >> scale;
1299     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1300     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1301     c->mb_decision  = s->avctx->mb_decision;
1302     c->me_cmp       = s->avctx->me_cmp;
1303     c->mb_cmp       = s->avctx->mb_cmp;
1304     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1305     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1306     c->time_base    = s->avctx->time_base;
1307     c->max_b_frames = s->max_b_frames;
1308
1309     if (avcodec_open2(c, codec, NULL) < 0)
1310         return -1;
1311
1312     for (i = 0; i < s->max_b_frames + 2; i++) {
1313         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1314                                                 s->next_picture_ptr;
1315         uint8_t *data[4];
1316
1317         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1318             pre_input = *pre_input_ptr;
1319             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1320
1321             if (!pre_input.shared && i) {
1322                 data[0] += INPLACE_OFFSET;
1323                 data[1] += INPLACE_OFFSET;
1324                 data[2] += INPLACE_OFFSET;
1325             }
1326
1327             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1328                                        s->tmp_frames[i]->linesize[0],
1329                                        data[0],
1330                                        pre_input.f->linesize[0],
1331                                        c->width, c->height);
1332             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1333                                        s->tmp_frames[i]->linesize[1],
1334                                        data[1],
1335                                        pre_input.f->linesize[1],
1336                                        c->width >> 1, c->height >> 1);
1337             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1338                                        s->tmp_frames[i]->linesize[2],
1339                                        data[2],
1340                                        pre_input.f->linesize[2],
1341                                        c->width >> 1, c->height >> 1);
1342         }
1343     }
1344
1345     for (j = 0; j < s->max_b_frames + 1; j++) {
1346         int64_t rd = 0;
1347
1348         if (!s->input_picture[j])
1349             break;
1350
1351         c->error[0] = c->error[1] = c->error[2] = 0;
1352
1353         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1354         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1355
1356         out_size = encode_frame(c, s->tmp_frames[0]);
1357
1358         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1359
1360         for (i = 0; i < s->max_b_frames + 1; i++) {
1361             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1362
1363             s->tmp_frames[i + 1]->pict_type = is_p ?
1364                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1365             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1366
1367             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1368
1369             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1370         }
1371
1372         /* get the delayed frames */
1373         while (out_size) {
1374             out_size = encode_frame(c, NULL);
1375             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1376         }
1377
1378         rd += c->error[0] + c->error[1] + c->error[2];
1379
1380         if (rd < best_rd) {
1381             best_rd = rd;
1382             best_b_count = j;
1383         }
1384     }
1385
1386     avcodec_close(c);
1387     av_freep(&c);
1388
1389     return best_b_count;
1390 }
1391
1392 static int select_input_picture(MpegEncContext *s)
1393 {
1394     int i, ret;
1395
1396     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1397         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1398     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1399
1400     /* set next picture type & ordering */
1401     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1402         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1403             if (s->picture_in_gop_number < s->gop_size &&
1404                 s->next_picture_ptr &&
1405                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1406                 // FIXME check that te gop check above is +-1 correct
1407                 av_frame_unref(s->input_picture[0]->f);
1408
1409                 ff_vbv_update(s, 0);
1410
1411                 goto no_output_pic;
1412             }
1413         }
1414
1415         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1416             !s->next_picture_ptr || s->intra_only) {
1417             s->reordered_input_picture[0] = s->input_picture[0];
1418             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1419             s->reordered_input_picture[0]->f->coded_picture_number =
1420                 s->coded_picture_number++;
1421         } else {
1422             int b_frames;
1423
1424             if (s->flags & CODEC_FLAG_PASS2) {
1425                 for (i = 0; i < s->max_b_frames + 1; i++) {
1426                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1427
1428                     if (pict_num >= s->rc_context.num_entries)
1429                         break;
1430                     if (!s->input_picture[i]) {
1431                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1432                         break;
1433                     }
1434
1435                     s->input_picture[i]->f->pict_type =
1436                         s->rc_context.entry[pict_num].new_pict_type;
1437                 }
1438             }
1439
1440             if (s->avctx->b_frame_strategy == 0) {
1441                 b_frames = s->max_b_frames;
1442                 while (b_frames && !s->input_picture[b_frames])
1443                     b_frames--;
1444             } else if (s->avctx->b_frame_strategy == 1) {
1445                 for (i = 1; i < s->max_b_frames + 1; i++) {
1446                     if (s->input_picture[i] &&
1447                         s->input_picture[i]->b_frame_score == 0) {
1448                         s->input_picture[i]->b_frame_score =
1449                             get_intra_count(s,
1450                                             s->input_picture[i    ]->f->data[0],
1451                                             s->input_picture[i - 1]->f->data[0],
1452                                             s->linesize) + 1;
1453                     }
1454                 }
1455                 for (i = 0; i < s->max_b_frames + 1; i++) {
1456                     if (!s->input_picture[i] ||
1457                         s->input_picture[i]->b_frame_score - 1 >
1458                             s->mb_num / s->avctx->b_sensitivity)
1459                         break;
1460                 }
1461
1462                 b_frames = FFMAX(0, i - 1);
1463
1464                 /* reset scores */
1465                 for (i = 0; i < b_frames + 1; i++) {
1466                     s->input_picture[i]->b_frame_score = 0;
1467                 }
1468             } else if (s->avctx->b_frame_strategy == 2) {
1469                 b_frames = estimate_best_b_count(s);
1470             } else {
1471                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1472                 b_frames = 0;
1473             }
1474
1475             emms_c();
1476
1477             for (i = b_frames - 1; i >= 0; i--) {
1478                 int type = s->input_picture[i]->f->pict_type;
1479                 if (type && type != AV_PICTURE_TYPE_B)
1480                     b_frames = i;
1481             }
1482             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1483                 b_frames == s->max_b_frames) {
1484                 av_log(s->avctx, AV_LOG_ERROR,
1485                        "warning, too many b frames in a row\n");
1486             }
1487
1488             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1489                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1490                     s->gop_size > s->picture_in_gop_number) {
1491                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1492                 } else {
1493                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1494                         b_frames = 0;
1495                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1496                 }
1497             }
1498
1499             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1500                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1501                 b_frames--;
1502
1503             s->reordered_input_picture[0] = s->input_picture[b_frames];
1504             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1505                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1506             s->reordered_input_picture[0]->f->coded_picture_number =
1507                 s->coded_picture_number++;
1508             for (i = 0; i < b_frames; i++) {
1509                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1510                 s->reordered_input_picture[i + 1]->f->pict_type =
1511                     AV_PICTURE_TYPE_B;
1512                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1513                     s->coded_picture_number++;
1514             }
1515         }
1516     }
1517 no_output_pic:
1518     if (s->reordered_input_picture[0]) {
1519         s->reordered_input_picture[0]->reference =
1520            s->reordered_input_picture[0]->f->pict_type !=
1521                AV_PICTURE_TYPE_B ? 3 : 0;
1522
1523         ff_mpeg_unref_picture(s, &s->new_picture);
1524         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1525             return ret;
1526
1527         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1528             // input is a shared pix, so we can't modifiy it -> alloc a new
1529             // one & ensure that the shared one is reuseable
1530
1531             Picture *pic;
1532             int i = ff_find_unused_picture(s, 0);
1533             if (i < 0)
1534                 return i;
1535             pic = &s->picture[i];
1536
1537             pic->reference = s->reordered_input_picture[0]->reference;
1538             if (ff_alloc_picture(s, pic, 0) < 0) {
1539                 return -1;
1540             }
1541
1542             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1543             if (ret < 0)
1544                 return ret;
1545
1546             /* mark us unused / free shared pic */
1547             av_frame_unref(s->reordered_input_picture[0]->f);
1548             s->reordered_input_picture[0]->shared = 0;
1549
1550             s->current_picture_ptr = pic;
1551         } else {
1552             // input is not a shared pix -> reuse buffer for current_pix
1553             s->current_picture_ptr = s->reordered_input_picture[0];
1554             for (i = 0; i < 4; i++) {
1555                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1556             }
1557         }
1558         ff_mpeg_unref_picture(s, &s->current_picture);
1559         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1560                                        s->current_picture_ptr)) < 0)
1561             return ret;
1562
1563         s->picture_number = s->new_picture.f->display_picture_number;
1564     } else {
1565         ff_mpeg_unref_picture(s, &s->new_picture);
1566     }
1567     return 0;
1568 }
1569
1570 static void frame_end(MpegEncContext *s)
1571 {
1572     if (s->unrestricted_mv &&
1573         s->current_picture.reference &&
1574         !s->intra_only) {
1575         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1576         int hshift = desc->log2_chroma_w;
1577         int vshift = desc->log2_chroma_h;
1578         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1579                                 s->current_picture.f->linesize[0],
1580                                 s->h_edge_pos, s->v_edge_pos,
1581                                 EDGE_WIDTH, EDGE_WIDTH,
1582                                 EDGE_TOP | EDGE_BOTTOM);
1583         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1584                                 s->current_picture.f->linesize[1],
1585                                 s->h_edge_pos >> hshift,
1586                                 s->v_edge_pos >> vshift,
1587                                 EDGE_WIDTH >> hshift,
1588                                 EDGE_WIDTH >> vshift,
1589                                 EDGE_TOP | EDGE_BOTTOM);
1590         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1591                                 s->current_picture.f->linesize[2],
1592                                 s->h_edge_pos >> hshift,
1593                                 s->v_edge_pos >> vshift,
1594                                 EDGE_WIDTH >> hshift,
1595                                 EDGE_WIDTH >> vshift,
1596                                 EDGE_TOP | EDGE_BOTTOM);
1597     }
1598
1599     emms_c();
1600
1601     s->last_pict_type                 = s->pict_type;
1602     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1603     if (s->pict_type!= AV_PICTURE_TYPE_B)
1604         s->last_non_b_pict_type = s->pict_type;
1605
1606     s->avctx->coded_frame = s->current_picture_ptr->f;
1607
1608 }
1609
1610 static void update_noise_reduction(MpegEncContext *s)
1611 {
1612     int intra, i;
1613
1614     for (intra = 0; intra < 2; intra++) {
1615         if (s->dct_count[intra] > (1 << 16)) {
1616             for (i = 0; i < 64; i++) {
1617                 s->dct_error_sum[intra][i] >>= 1;
1618             }
1619             s->dct_count[intra] >>= 1;
1620         }
1621
1622         for (i = 0; i < 64; i++) {
1623             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1624                                        s->dct_count[intra] +
1625                                        s->dct_error_sum[intra][i] / 2) /
1626                                       (s->dct_error_sum[intra][i] + 1);
1627         }
1628     }
1629 }
1630
1631 static int frame_start(MpegEncContext *s)
1632 {
1633     int ret;
1634
1635     /* mark & release old frames */
1636     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1637         s->last_picture_ptr != s->next_picture_ptr &&
1638         s->last_picture_ptr->f->buf[0]) {
1639         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1640     }
1641
1642     s->current_picture_ptr->f->pict_type = s->pict_type;
1643     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1644
1645     ff_mpeg_unref_picture(s, &s->current_picture);
1646     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1647                                    s->current_picture_ptr)) < 0)
1648         return ret;
1649
1650     if (s->pict_type != AV_PICTURE_TYPE_B) {
1651         s->last_picture_ptr = s->next_picture_ptr;
1652         if (!s->droppable)
1653             s->next_picture_ptr = s->current_picture_ptr;
1654     }
1655
1656     if (s->last_picture_ptr) {
1657         ff_mpeg_unref_picture(s, &s->last_picture);
1658         if (s->last_picture_ptr->f->buf[0] &&
1659             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1660                                        s->last_picture_ptr)) < 0)
1661             return ret;
1662     }
1663     if (s->next_picture_ptr) {
1664         ff_mpeg_unref_picture(s, &s->next_picture);
1665         if (s->next_picture_ptr->f->buf[0] &&
1666             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1667                                        s->next_picture_ptr)) < 0)
1668             return ret;
1669     }
1670
1671     if (s->picture_structure!= PICT_FRAME) {
1672         int i;
1673         for (i = 0; i < 4; i++) {
1674             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1675                 s->current_picture.f->data[i] +=
1676                     s->current_picture.f->linesize[i];
1677             }
1678             s->current_picture.f->linesize[i] *= 2;
1679             s->last_picture.f->linesize[i]    *= 2;
1680             s->next_picture.f->linesize[i]    *= 2;
1681         }
1682     }
1683
1684     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1685         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1686         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1687     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1688         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1689         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1690     } else {
1691         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1692         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1693     }
1694
1695     if (s->dct_error_sum) {
1696         av_assert2(s->avctx->noise_reduction && s->encoding);
1697         update_noise_reduction(s);
1698     }
1699
1700     return 0;
1701 }
1702
1703 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1704                           const AVFrame *pic_arg, int *got_packet)
1705 {
1706     MpegEncContext *s = avctx->priv_data;
1707     int i, stuffing_count, ret;
1708     int context_count = s->slice_context_count;
1709
1710     s->picture_in_gop_number++;
1711
1712     if (load_input_picture(s, pic_arg) < 0)
1713         return -1;
1714
1715     if (select_input_picture(s) < 0) {
1716         return -1;
1717     }
1718
1719     /* output? */
1720     if (s->new_picture.f->data[0]) {
1721         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1722         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1723                                               :
1724                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1725         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1726             return ret;
1727         if (s->mb_info) {
1728             s->mb_info_ptr = av_packet_new_side_data(pkt,
1729                                  AV_PKT_DATA_H263_MB_INFO,
1730                                  s->mb_width*s->mb_height*12);
1731             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1732         }
1733
1734         for (i = 0; i < context_count; i++) {
1735             int start_y = s->thread_context[i]->start_mb_y;
1736             int   end_y = s->thread_context[i]->  end_mb_y;
1737             int h       = s->mb_height;
1738             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1739             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1740
1741             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1742         }
1743
1744         s->pict_type = s->new_picture.f->pict_type;
1745         //emms_c();
1746         ret = frame_start(s);
1747         if (ret < 0)
1748             return ret;
1749 vbv_retry:
1750         ret = encode_picture(s, s->picture_number);
1751         if (growing_buffer) {
1752             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1753             pkt->data = s->pb.buf;
1754             pkt->size = avctx->internal->byte_buffer_size;
1755         }
1756         if (ret < 0)
1757             return -1;
1758
1759         avctx->header_bits = s->header_bits;
1760         avctx->mv_bits     = s->mv_bits;
1761         avctx->misc_bits   = s->misc_bits;
1762         avctx->i_tex_bits  = s->i_tex_bits;
1763         avctx->p_tex_bits  = s->p_tex_bits;
1764         avctx->i_count     = s->i_count;
1765         // FIXME f/b_count in avctx
1766         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1767         avctx->skip_count  = s->skip_count;
1768
1769         frame_end(s);
1770
1771         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1772             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1773
1774         if (avctx->rc_buffer_size) {
1775             RateControlContext *rcc = &s->rc_context;
1776             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1777
1778             if (put_bits_count(&s->pb) > max_size &&
1779                 s->lambda < s->lmax) {
1780                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1781                                        (s->qscale + 1) / s->qscale);
1782                 if (s->adaptive_quant) {
1783                     int i;
1784                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1785                         s->lambda_table[i] =
1786                             FFMAX(s->lambda_table[i] + 1,
1787                                   s->lambda_table[i] * (s->qscale + 1) /
1788                                   s->qscale);
1789                 }
1790                 s->mb_skipped = 0;        // done in frame_start()
1791                 // done in encode_picture() so we must undo it
1792                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1793                     if (s->flipflop_rounding          ||
1794                         s->codec_id == AV_CODEC_ID_H263P ||
1795                         s->codec_id == AV_CODEC_ID_MPEG4)
1796                         s->no_rounding ^= 1;
1797                 }
1798                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1799                     s->time_base       = s->last_time_base;
1800                     s->last_non_b_time = s->time - s->pp_time;
1801                 }
1802                 for (i = 0; i < context_count; i++) {
1803                     PutBitContext *pb = &s->thread_context[i]->pb;
1804                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1805                 }
1806                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1807                 goto vbv_retry;
1808             }
1809
1810             av_assert0(s->avctx->rc_max_rate);
1811         }
1812
1813         if (s->flags & CODEC_FLAG_PASS1)
1814             ff_write_pass1_stats(s);
1815
1816         for (i = 0; i < 4; i++) {
1817             s->current_picture_ptr->f->error[i] =
1818             s->current_picture.f->error[i] =
1819                 s->current_picture.error[i];
1820             avctx->error[i] += s->current_picture_ptr->f->error[i];
1821         }
1822
1823         if (s->flags & CODEC_FLAG_PASS1)
1824             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1825                    avctx->i_tex_bits + avctx->p_tex_bits ==
1826                        put_bits_count(&s->pb));
1827         flush_put_bits(&s->pb);
1828         s->frame_bits  = put_bits_count(&s->pb);
1829
1830         stuffing_count = ff_vbv_update(s, s->frame_bits);
1831         s->stuffing_bits = 8*stuffing_count;
1832         if (stuffing_count) {
1833             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1834                     stuffing_count + 50) {
1835                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1836                 return -1;
1837             }
1838
1839             switch (s->codec_id) {
1840             case AV_CODEC_ID_MPEG1VIDEO:
1841             case AV_CODEC_ID_MPEG2VIDEO:
1842                 while (stuffing_count--) {
1843                     put_bits(&s->pb, 8, 0);
1844                 }
1845             break;
1846             case AV_CODEC_ID_MPEG4:
1847                 put_bits(&s->pb, 16, 0);
1848                 put_bits(&s->pb, 16, 0x1C3);
1849                 stuffing_count -= 4;
1850                 while (stuffing_count--) {
1851                     put_bits(&s->pb, 8, 0xFF);
1852                 }
1853             break;
1854             default:
1855                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1856             }
1857             flush_put_bits(&s->pb);
1858             s->frame_bits  = put_bits_count(&s->pb);
1859         }
1860
1861         /* update mpeg1/2 vbv_delay for CBR */
1862         if (s->avctx->rc_max_rate                          &&
1863             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1864             s->out_format == FMT_MPEG1                     &&
1865             90000LL * (avctx->rc_buffer_size - 1) <=
1866                 s->avctx->rc_max_rate * 0xFFFFLL) {
1867             int vbv_delay, min_delay;
1868             double inbits  = s->avctx->rc_max_rate *
1869                              av_q2d(s->avctx->time_base);
1870             int    minbits = s->frame_bits - 8 *
1871                              (s->vbv_delay_ptr - s->pb.buf - 1);
1872             double bits    = s->rc_context.buffer_index + minbits - inbits;
1873
1874             if (bits < 0)
1875                 av_log(s->avctx, AV_LOG_ERROR,
1876                        "Internal error, negative bits\n");
1877
1878             assert(s->repeat_first_field == 0);
1879
1880             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1881             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1882                         s->avctx->rc_max_rate;
1883
1884             vbv_delay = FFMAX(vbv_delay, min_delay);
1885
1886             av_assert0(vbv_delay < 0xFFFF);
1887
1888             s->vbv_delay_ptr[0] &= 0xF8;
1889             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1890             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1891             s->vbv_delay_ptr[2] &= 0x07;
1892             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1893             avctx->vbv_delay     = vbv_delay * 300;
1894         }
1895         s->total_bits     += s->frame_bits;
1896         avctx->frame_bits  = s->frame_bits;
1897
1898         pkt->pts = s->current_picture.f->pts;
1899         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1900             if (!s->current_picture.f->coded_picture_number)
1901                 pkt->dts = pkt->pts - s->dts_delta;
1902             else
1903                 pkt->dts = s->reordered_pts;
1904             s->reordered_pts = pkt->pts;
1905         } else
1906             pkt->dts = pkt->pts;
1907         if (s->current_picture.f->key_frame)
1908             pkt->flags |= AV_PKT_FLAG_KEY;
1909         if (s->mb_info)
1910             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1911     } else {
1912         s->frame_bits = 0;
1913     }
1914
1915     /* release non-reference frames */
1916     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1917         if (!s->picture[i].reference)
1918             ff_mpeg_unref_picture(s, &s->picture[i]);
1919     }
1920
1921     av_assert1((s->frame_bits & 7) == 0);
1922
1923     pkt->size = s->frame_bits / 8;
1924     *got_packet = !!pkt->size;
1925     return 0;
1926 }
1927
1928 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1929                                                 int n, int threshold)
1930 {
1931     static const char tab[64] = {
1932         3, 2, 2, 1, 1, 1, 1, 1,
1933         1, 1, 1, 1, 1, 1, 1, 1,
1934         1, 1, 1, 1, 1, 1, 1, 1,
1935         0, 0, 0, 0, 0, 0, 0, 0,
1936         0, 0, 0, 0, 0, 0, 0, 0,
1937         0, 0, 0, 0, 0, 0, 0, 0,
1938         0, 0, 0, 0, 0, 0, 0, 0,
1939         0, 0, 0, 0, 0, 0, 0, 0
1940     };
1941     int score = 0;
1942     int run = 0;
1943     int i;
1944     int16_t *block = s->block[n];
1945     const int last_index = s->block_last_index[n];
1946     int skip_dc;
1947
1948     if (threshold < 0) {
1949         skip_dc = 0;
1950         threshold = -threshold;
1951     } else
1952         skip_dc = 1;
1953
1954     /* Are all we could set to zero already zero? */
1955     if (last_index <= skip_dc - 1)
1956         return;
1957
1958     for (i = 0; i <= last_index; i++) {
1959         const int j = s->intra_scantable.permutated[i];
1960         const int level = FFABS(block[j]);
1961         if (level == 1) {
1962             if (skip_dc && i == 0)
1963                 continue;
1964             score += tab[run];
1965             run = 0;
1966         } else if (level > 1) {
1967             return;
1968         } else {
1969             run++;
1970         }
1971     }
1972     if (score >= threshold)
1973         return;
1974     for (i = skip_dc; i <= last_index; i++) {
1975         const int j = s->intra_scantable.permutated[i];
1976         block[j] = 0;
1977     }
1978     if (block[0])
1979         s->block_last_index[n] = 0;
1980     else
1981         s->block_last_index[n] = -1;
1982 }
1983
1984 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1985                                int last_index)
1986 {
1987     int i;
1988     const int maxlevel = s->max_qcoeff;
1989     const int minlevel = s->min_qcoeff;
1990     int overflow = 0;
1991
1992     if (s->mb_intra) {
1993         i = 1; // skip clipping of intra dc
1994     } else
1995         i = 0;
1996
1997     for (; i <= last_index; i++) {
1998         const int j = s->intra_scantable.permutated[i];
1999         int level = block[j];
2000
2001         if (level > maxlevel) {
2002             level = maxlevel;
2003             overflow++;
2004         } else if (level < minlevel) {
2005             level = minlevel;
2006             overflow++;
2007         }
2008
2009         block[j] = level;
2010     }
2011
2012     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2013         av_log(s->avctx, AV_LOG_INFO,
2014                "warning, clipping %d dct coefficients to %d..%d\n",
2015                overflow, minlevel, maxlevel);
2016 }
2017
2018 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2019 {
2020     int x, y;
2021     // FIXME optimize
2022     for (y = 0; y < 8; y++) {
2023         for (x = 0; x < 8; x++) {
2024             int x2, y2;
2025             int sum = 0;
2026             int sqr = 0;
2027             int count = 0;
2028
2029             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2030                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2031                     int v = ptr[x2 + y2 * stride];
2032                     sum += v;
2033                     sqr += v * v;
2034                     count++;
2035                 }
2036             }
2037             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2038         }
2039     }
2040 }
2041
2042 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2043                                                 int motion_x, int motion_y,
2044                                                 int mb_block_height,
2045                                                 int mb_block_width,
2046                                                 int mb_block_count)
2047 {
2048     int16_t weight[12][64];
2049     int16_t orig[12][64];
2050     const int mb_x = s->mb_x;
2051     const int mb_y = s->mb_y;
2052     int i;
2053     int skip_dct[12];
2054     int dct_offset = s->linesize * 8; // default for progressive frames
2055     int uv_dct_offset = s->uvlinesize * 8;
2056     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2057     ptrdiff_t wrap_y, wrap_c;
2058
2059     for (i = 0; i < mb_block_count; i++)
2060         skip_dct[i] = s->skipdct;
2061
2062     if (s->adaptive_quant) {
2063         const int last_qp = s->qscale;
2064         const int mb_xy = mb_x + mb_y * s->mb_stride;
2065
2066         s->lambda = s->lambda_table[mb_xy];
2067         update_qscale(s);
2068
2069         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2070             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2071             s->dquant = s->qscale - last_qp;
2072
2073             if (s->out_format == FMT_H263) {
2074                 s->dquant = av_clip(s->dquant, -2, 2);
2075
2076                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2077                     if (!s->mb_intra) {
2078                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2079                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2080                                 s->dquant = 0;
2081                         }
2082                         if (s->mv_type == MV_TYPE_8X8)
2083                             s->dquant = 0;
2084                     }
2085                 }
2086             }
2087         }
2088         ff_set_qscale(s, last_qp + s->dquant);
2089     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2090         ff_set_qscale(s, s->qscale + s->dquant);
2091
2092     wrap_y = s->linesize;
2093     wrap_c = s->uvlinesize;
2094     ptr_y  = s->new_picture.f->data[0] +
2095              (mb_y * 16 * wrap_y)              + mb_x * 16;
2096     ptr_cb = s->new_picture.f->data[1] +
2097              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2098     ptr_cr = s->new_picture.f->data[2] +
2099              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2100
2101     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2102         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2103         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2104         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2105         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2106                                  wrap_y, wrap_y,
2107                                  16, 16, mb_x * 16, mb_y * 16,
2108                                  s->width, s->height);
2109         ptr_y = ebuf;
2110         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2111                                  wrap_c, wrap_c,
2112                                  mb_block_width, mb_block_height,
2113                                  mb_x * mb_block_width, mb_y * mb_block_height,
2114                                  cw, ch);
2115         ptr_cb = ebuf + 16 * wrap_y;
2116         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2117                                  wrap_c, wrap_c,
2118                                  mb_block_width, mb_block_height,
2119                                  mb_x * mb_block_width, mb_y * mb_block_height,
2120                                  cw, ch);
2121         ptr_cr = ebuf + 16 * wrap_y + 16;
2122     }
2123
2124     if (s->mb_intra) {
2125         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2126             int progressive_score, interlaced_score;
2127
2128             s->interlaced_dct = 0;
2129             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2130                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2131                                                      NULL, wrap_y, 8) - 400;
2132
2133             if (progressive_score > 0) {
2134                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2135                                                         NULL, wrap_y * 2, 8) +
2136                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2137                                                         NULL, wrap_y * 2, 8);
2138                 if (progressive_score > interlaced_score) {
2139                     s->interlaced_dct = 1;
2140
2141                     dct_offset = wrap_y;
2142                     uv_dct_offset = wrap_c;
2143                     wrap_y <<= 1;
2144                     if (s->chroma_format == CHROMA_422 ||
2145                         s->chroma_format == CHROMA_444)
2146                         wrap_c <<= 1;
2147                 }
2148             }
2149         }
2150
2151         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2152         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2153         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2154         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2155
2156         if (s->flags & CODEC_FLAG_GRAY) {
2157             skip_dct[4] = 1;
2158             skip_dct[5] = 1;
2159         } else {
2160             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2161             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2162             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2163                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2164                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2165             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2166                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2167                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2168                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2169                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2170                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2171                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2172             }
2173         }
2174     } else {
2175         op_pixels_func (*op_pix)[4];
2176         qpel_mc_func (*op_qpix)[16];
2177         uint8_t *dest_y, *dest_cb, *dest_cr;
2178
2179         dest_y  = s->dest[0];
2180         dest_cb = s->dest[1];
2181         dest_cr = s->dest[2];
2182
2183         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2184             op_pix  = s->hdsp.put_pixels_tab;
2185             op_qpix = s->qdsp.put_qpel_pixels_tab;
2186         } else {
2187             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2188             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2189         }
2190
2191         if (s->mv_dir & MV_DIR_FORWARD) {
2192             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2193                           s->last_picture.f->data,
2194                           op_pix, op_qpix);
2195             op_pix  = s->hdsp.avg_pixels_tab;
2196             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2197         }
2198         if (s->mv_dir & MV_DIR_BACKWARD) {
2199             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2200                           s->next_picture.f->data,
2201                           op_pix, op_qpix);
2202         }
2203
2204         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2205             int progressive_score, interlaced_score;
2206
2207             s->interlaced_dct = 0;
2208             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2209                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2210                                                      ptr_y + wrap_y * 8,
2211                                                      wrap_y, 8) - 400;
2212
2213             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2214                 progressive_score -= 400;
2215
2216             if (progressive_score > 0) {
2217                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2218                                                         wrap_y * 2, 8) +
2219                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2220                                                         ptr_y + wrap_y,
2221                                                         wrap_y * 2, 8);
2222
2223                 if (progressive_score > interlaced_score) {
2224                     s->interlaced_dct = 1;
2225
2226                     dct_offset = wrap_y;
2227                     uv_dct_offset = wrap_c;
2228                     wrap_y <<= 1;
2229                     if (s->chroma_format == CHROMA_422)
2230                         wrap_c <<= 1;
2231                 }
2232             }
2233         }
2234
2235         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2236         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2237         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2238                             dest_y + dct_offset, wrap_y);
2239         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2240                             dest_y + dct_offset + 8, wrap_y);
2241
2242         if (s->flags & CODEC_FLAG_GRAY) {
2243             skip_dct[4] = 1;
2244             skip_dct[5] = 1;
2245         } else {
2246             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2247             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2248             if (!s->chroma_y_shift) { /* 422 */
2249                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2250                                     dest_cb + uv_dct_offset, wrap_c);
2251                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2252                                     dest_cr + uv_dct_offset, wrap_c);
2253             }
2254         }
2255         /* pre quantization */
2256         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2257                 2 * s->qscale * s->qscale) {
2258             // FIXME optimize
2259             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2260                 skip_dct[0] = 1;
2261             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2262                 skip_dct[1] = 1;
2263             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2264                                wrap_y, 8) < 20 * s->qscale)
2265                 skip_dct[2] = 1;
2266             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2267                                wrap_y, 8) < 20 * s->qscale)
2268                 skip_dct[3] = 1;
2269             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2270                 skip_dct[4] = 1;
2271             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2272                 skip_dct[5] = 1;
2273             if (!s->chroma_y_shift) { /* 422 */
2274                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2275                                    dest_cb + uv_dct_offset,
2276                                    wrap_c, 8) < 20 * s->qscale)
2277                     skip_dct[6] = 1;
2278                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2279                                    dest_cr + uv_dct_offset,
2280                                    wrap_c, 8) < 20 * s->qscale)
2281                     skip_dct[7] = 1;
2282             }
2283         }
2284     }
2285
2286     if (s->quantizer_noise_shaping) {
2287         if (!skip_dct[0])
2288             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2289         if (!skip_dct[1])
2290             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2291         if (!skip_dct[2])
2292             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2293         if (!skip_dct[3])
2294             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2295         if (!skip_dct[4])
2296             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2297         if (!skip_dct[5])
2298             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2299         if (!s->chroma_y_shift) { /* 422 */
2300             if (!skip_dct[6])
2301                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2302                                   wrap_c);
2303             if (!skip_dct[7])
2304                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2305                                   wrap_c);
2306         }
2307         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2308     }
2309
2310     /* DCT & quantize */
2311     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2312     {
2313         for (i = 0; i < mb_block_count; i++) {
2314             if (!skip_dct[i]) {
2315                 int overflow;
2316                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2317                 // FIXME we could decide to change to quantizer instead of
2318                 // clipping
2319                 // JS: I don't think that would be a good idea it could lower
2320                 //     quality instead of improve it. Just INTRADC clipping
2321                 //     deserves changes in quantizer
2322                 if (overflow)
2323                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2324             } else
2325                 s->block_last_index[i] = -1;
2326         }
2327         if (s->quantizer_noise_shaping) {
2328             for (i = 0; i < mb_block_count; i++) {
2329                 if (!skip_dct[i]) {
2330                     s->block_last_index[i] =
2331                         dct_quantize_refine(s, s->block[i], weight[i],
2332                                             orig[i], i, s->qscale);
2333                 }
2334             }
2335         }
2336
2337         if (s->luma_elim_threshold && !s->mb_intra)
2338             for (i = 0; i < 4; i++)
2339                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2340         if (s->chroma_elim_threshold && !s->mb_intra)
2341             for (i = 4; i < mb_block_count; i++)
2342                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2343
2344         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2345             for (i = 0; i < mb_block_count; i++) {
2346                 if (s->block_last_index[i] == -1)
2347                     s->coded_score[i] = INT_MAX / 256;
2348             }
2349         }
2350     }
2351
2352     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2353         s->block_last_index[4] =
2354         s->block_last_index[5] = 0;
2355         s->block[4][0] =
2356         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2357         if (!s->chroma_y_shift) { /* 422 / 444 */
2358             for (i=6; i<12; i++) {
2359                 s->block_last_index[i] = 0;
2360                 s->block[i][0] = s->block[4][0];
2361             }
2362         }
2363     }
2364
2365     // non c quantize code returns incorrect block_last_index FIXME
2366     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2367         for (i = 0; i < mb_block_count; i++) {
2368             int j;
2369             if (s->block_last_index[i] > 0) {
2370                 for (j = 63; j > 0; j--) {
2371                     if (s->block[i][s->intra_scantable.permutated[j]])
2372                         break;
2373                 }
2374                 s->block_last_index[i] = j;
2375             }
2376         }
2377     }
2378
2379     /* huffman encode */
2380     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2381     case AV_CODEC_ID_MPEG1VIDEO:
2382     case AV_CODEC_ID_MPEG2VIDEO:
2383         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2384             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2385         break;
2386     case AV_CODEC_ID_MPEG4:
2387         if (CONFIG_MPEG4_ENCODER)
2388             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2389         break;
2390     case AV_CODEC_ID_MSMPEG4V2:
2391     case AV_CODEC_ID_MSMPEG4V3:
2392     case AV_CODEC_ID_WMV1:
2393         if (CONFIG_MSMPEG4_ENCODER)
2394             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2395         break;
2396     case AV_CODEC_ID_WMV2:
2397         if (CONFIG_WMV2_ENCODER)
2398             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2399         break;
2400     case AV_CODEC_ID_H261:
2401         if (CONFIG_H261_ENCODER)
2402             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2403         break;
2404     case AV_CODEC_ID_H263:
2405     case AV_CODEC_ID_H263P:
2406     case AV_CODEC_ID_FLV1:
2407     case AV_CODEC_ID_RV10:
2408     case AV_CODEC_ID_RV20:
2409         if (CONFIG_H263_ENCODER)
2410             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2411         break;
2412     case AV_CODEC_ID_MJPEG:
2413     case AV_CODEC_ID_AMV:
2414         if (CONFIG_MJPEG_ENCODER)
2415             ff_mjpeg_encode_mb(s, s->block);
2416         break;
2417     default:
2418         av_assert1(0);
2419     }
2420 }
2421
2422 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2423 {
2424     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2425     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2426     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2427 }
2428
2429 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2430     int i;
2431
2432     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2433
2434     /* mpeg1 */
2435     d->mb_skip_run= s->mb_skip_run;
2436     for(i=0; i<3; i++)
2437         d->last_dc[i] = s->last_dc[i];
2438
2439     /* statistics */
2440     d->mv_bits= s->mv_bits;
2441     d->i_tex_bits= s->i_tex_bits;
2442     d->p_tex_bits= s->p_tex_bits;
2443     d->i_count= s->i_count;
2444     d->f_count= s->f_count;
2445     d->b_count= s->b_count;
2446     d->skip_count= s->skip_count;
2447     d->misc_bits= s->misc_bits;
2448     d->last_bits= 0;
2449
2450     d->mb_skipped= 0;
2451     d->qscale= s->qscale;
2452     d->dquant= s->dquant;
2453
2454     d->esc3_level_length= s->esc3_level_length;
2455 }
2456
2457 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2458     int i;
2459
2460     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2461     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2462
2463     /* mpeg1 */
2464     d->mb_skip_run= s->mb_skip_run;
2465     for(i=0; i<3; i++)
2466         d->last_dc[i] = s->last_dc[i];
2467
2468     /* statistics */
2469     d->mv_bits= s->mv_bits;
2470     d->i_tex_bits= s->i_tex_bits;
2471     d->p_tex_bits= s->p_tex_bits;
2472     d->i_count= s->i_count;
2473     d->f_count= s->f_count;
2474     d->b_count= s->b_count;
2475     d->skip_count= s->skip_count;
2476     d->misc_bits= s->misc_bits;
2477
2478     d->mb_intra= s->mb_intra;
2479     d->mb_skipped= s->mb_skipped;
2480     d->mv_type= s->mv_type;
2481     d->mv_dir= s->mv_dir;
2482     d->pb= s->pb;
2483     if(s->data_partitioning){
2484         d->pb2= s->pb2;
2485         d->tex_pb= s->tex_pb;
2486     }
2487     d->block= s->block;
2488     for(i=0; i<8; i++)
2489         d->block_last_index[i]= s->block_last_index[i];
2490     d->interlaced_dct= s->interlaced_dct;
2491     d->qscale= s->qscale;
2492
2493     d->esc3_level_length= s->esc3_level_length;
2494 }
2495
2496 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2497                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2498                            int *dmin, int *next_block, int motion_x, int motion_y)
2499 {
2500     int score;
2501     uint8_t *dest_backup[3];
2502
2503     copy_context_before_encode(s, backup, type);
2504
2505     s->block= s->blocks[*next_block];
2506     s->pb= pb[*next_block];
2507     if(s->data_partitioning){
2508         s->pb2   = pb2   [*next_block];
2509         s->tex_pb= tex_pb[*next_block];
2510     }
2511
2512     if(*next_block){
2513         memcpy(dest_backup, s->dest, sizeof(s->dest));
2514         s->dest[0] = s->rd_scratchpad;
2515         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2516         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2517         av_assert0(s->linesize >= 32); //FIXME
2518     }
2519
2520     encode_mb(s, motion_x, motion_y);
2521
2522     score= put_bits_count(&s->pb);
2523     if(s->data_partitioning){
2524         score+= put_bits_count(&s->pb2);
2525         score+= put_bits_count(&s->tex_pb);
2526     }
2527
2528     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2529         ff_mpv_decode_mb(s, s->block);
2530
2531         score *= s->lambda2;
2532         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2533     }
2534
2535     if(*next_block){
2536         memcpy(s->dest, dest_backup, sizeof(s->dest));
2537     }
2538
2539     if(score<*dmin){
2540         *dmin= score;
2541         *next_block^=1;
2542
2543         copy_context_after_encode(best, s, type);
2544     }
2545 }
2546
2547 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2548     uint32_t *sq = ff_square_tab + 256;
2549     int acc=0;
2550     int x,y;
2551
2552     if(w==16 && h==16)
2553         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2554     else if(w==8 && h==8)
2555         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2556
2557     for(y=0; y<h; y++){
2558         for(x=0; x<w; x++){
2559             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2560         }
2561     }
2562
2563     av_assert2(acc>=0);
2564
2565     return acc;
2566 }
2567
2568 static int sse_mb(MpegEncContext *s){
2569     int w= 16;
2570     int h= 16;
2571
2572     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2573     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2574
2575     if(w==16 && h==16)
2576       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2577         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2578                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2579                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2580       }else{
2581         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2582                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2583                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2584       }
2585     else
2586         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2587                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2588                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2589 }
2590
2591 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2592     MpegEncContext *s= *(void**)arg;
2593
2594
2595     s->me.pre_pass=1;
2596     s->me.dia_size= s->avctx->pre_dia_size;
2597     s->first_slice_line=1;
2598     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2599         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2600             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2601         }
2602         s->first_slice_line=0;
2603     }
2604
2605     s->me.pre_pass=0;
2606
2607     return 0;
2608 }
2609
2610 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2611     MpegEncContext *s= *(void**)arg;
2612
2613     ff_check_alignment();
2614
2615     s->me.dia_size= s->avctx->dia_size;
2616     s->first_slice_line=1;
2617     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2618         s->mb_x=0; //for block init below
2619         ff_init_block_index(s);
2620         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2621             s->block_index[0]+=2;
2622             s->block_index[1]+=2;
2623             s->block_index[2]+=2;
2624             s->block_index[3]+=2;
2625
2626             /* compute motion vector & mb_type and store in context */
2627             if(s->pict_type==AV_PICTURE_TYPE_B)
2628                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2629             else
2630                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2631         }
2632         s->first_slice_line=0;
2633     }
2634     return 0;
2635 }
2636
2637 static int mb_var_thread(AVCodecContext *c, void *arg){
2638     MpegEncContext *s= *(void**)arg;
2639     int mb_x, mb_y;
2640
2641     ff_check_alignment();
2642
2643     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2644         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2645             int xx = mb_x * 16;
2646             int yy = mb_y * 16;
2647             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2648             int varc;
2649             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2650
2651             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2652                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2653
2654             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2655             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2656             s->me.mb_var_sum_temp    += varc;
2657         }
2658     }
2659     return 0;
2660 }
2661
2662 static void write_slice_end(MpegEncContext *s){
2663     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2664         if(s->partitioned_frame){
2665             ff_mpeg4_merge_partitions(s);
2666         }
2667
2668         ff_mpeg4_stuffing(&s->pb);
2669     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2670         ff_mjpeg_encode_stuffing(s);
2671     }
2672
2673     avpriv_align_put_bits(&s->pb);
2674     flush_put_bits(&s->pb);
2675
2676     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2677         s->misc_bits+= get_bits_diff(s);
2678 }
2679
2680 static void write_mb_info(MpegEncContext *s)
2681 {
2682     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2683     int offset = put_bits_count(&s->pb);
2684     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2685     int gobn = s->mb_y / s->gob_index;
2686     int pred_x, pred_y;
2687     if (CONFIG_H263_ENCODER)
2688         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2689     bytestream_put_le32(&ptr, offset);
2690     bytestream_put_byte(&ptr, s->qscale);
2691     bytestream_put_byte(&ptr, gobn);
2692     bytestream_put_le16(&ptr, mba);
2693     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2694     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2695     /* 4MV not implemented */
2696     bytestream_put_byte(&ptr, 0); /* hmv2 */
2697     bytestream_put_byte(&ptr, 0); /* vmv2 */
2698 }
2699
2700 static void update_mb_info(MpegEncContext *s, int startcode)
2701 {
2702     if (!s->mb_info)
2703         return;
2704     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2705         s->mb_info_size += 12;
2706         s->prev_mb_info = s->last_mb_info;
2707     }
2708     if (startcode) {
2709         s->prev_mb_info = put_bits_count(&s->pb)/8;
2710         /* This might have incremented mb_info_size above, and we return without
2711          * actually writing any info into that slot yet. But in that case,
2712          * this will be called again at the start of the after writing the
2713          * start code, actually writing the mb info. */
2714         return;
2715     }
2716
2717     s->last_mb_info = put_bits_count(&s->pb)/8;
2718     if (!s->mb_info_size)
2719         s->mb_info_size += 12;
2720     write_mb_info(s);
2721 }
2722
2723 static int encode_thread(AVCodecContext *c, void *arg){
2724     MpegEncContext *s= *(void**)arg;
2725     int mb_x, mb_y, pdif = 0;
2726     int chr_h= 16>>s->chroma_y_shift;
2727     int i, j;
2728     MpegEncContext best_s = { 0 }, backup_s;
2729     uint8_t bit_buf[2][MAX_MB_BYTES];
2730     uint8_t bit_buf2[2][MAX_MB_BYTES];
2731     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2732     PutBitContext pb[2], pb2[2], tex_pb[2];
2733
2734     ff_check_alignment();
2735
2736     for(i=0; i<2; i++){
2737         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2738         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2739         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2740     }
2741
2742     s->last_bits= put_bits_count(&s->pb);
2743     s->mv_bits=0;
2744     s->misc_bits=0;
2745     s->i_tex_bits=0;
2746     s->p_tex_bits=0;
2747     s->i_count=0;
2748     s->f_count=0;
2749     s->b_count=0;
2750     s->skip_count=0;
2751
2752     for(i=0; i<3; i++){
2753         /* init last dc values */
2754         /* note: quant matrix value (8) is implied here */
2755         s->last_dc[i] = 128 << s->intra_dc_precision;
2756
2757         s->current_picture.error[i] = 0;
2758     }
2759     if(s->codec_id==AV_CODEC_ID_AMV){
2760         s->last_dc[0] = 128*8/13;
2761         s->last_dc[1] = 128*8/14;
2762         s->last_dc[2] = 128*8/14;
2763     }
2764     s->mb_skip_run = 0;
2765     memset(s->last_mv, 0, sizeof(s->last_mv));
2766
2767     s->last_mv_dir = 0;
2768
2769     switch(s->codec_id){
2770     case AV_CODEC_ID_H263:
2771     case AV_CODEC_ID_H263P:
2772     case AV_CODEC_ID_FLV1:
2773         if (CONFIG_H263_ENCODER)
2774             s->gob_index = ff_h263_get_gob_height(s);
2775         break;
2776     case AV_CODEC_ID_MPEG4:
2777         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2778             ff_mpeg4_init_partitions(s);
2779         break;
2780     }
2781
2782     s->resync_mb_x=0;
2783     s->resync_mb_y=0;
2784     s->first_slice_line = 1;
2785     s->ptr_lastgob = s->pb.buf;
2786     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2787         s->mb_x=0;
2788         s->mb_y= mb_y;
2789
2790         ff_set_qscale(s, s->qscale);
2791         ff_init_block_index(s);
2792
2793         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2794             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2795             int mb_type= s->mb_type[xy];
2796 //            int d;
2797             int dmin= INT_MAX;
2798             int dir;
2799
2800             if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES
2801                 && s->slice_context_count == 1
2802                 && s->pb.buf == s->avctx->internal->byte_buffer) {
2803                 int new_size =  s->avctx->internal->byte_buffer_size
2804                               + s->avctx->internal->byte_buffer_size/4
2805                               + s->mb_width*MAX_MB_BYTES;
2806                 int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2807                 int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2808
2809                 uint8_t *new_buffer = NULL;
2810                 int new_buffer_size = 0;
2811
2812                 av_fast_padded_malloc(&new_buffer, &new_buffer_size, new_size);
2813                 if (new_buffer) {
2814                     memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2815                     av_free(s->avctx->internal->byte_buffer);
2816                     s->avctx->internal->byte_buffer      = new_buffer;
2817                     s->avctx->internal->byte_buffer_size = new_buffer_size;
2818                     rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2819                     s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2820                     s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2821                 }
2822             }
2823             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2824                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2825                 return -1;
2826             }
2827             if(s->data_partitioning){
2828                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2829                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2830                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2831                     return -1;
2832                 }
2833             }
2834
2835             s->mb_x = mb_x;
2836             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2837             ff_update_block_index(s);
2838
2839             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2840                 ff_h261_reorder_mb_index(s);
2841                 xy= s->mb_y*s->mb_stride + s->mb_x;
2842                 mb_type= s->mb_type[xy];
2843             }
2844
2845             /* write gob / video packet header  */
2846             if(s->rtp_mode){
2847                 int current_packet_size, is_gob_start;
2848
2849                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2850
2851                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2852
2853                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2854
2855                 switch(s->codec_id){
2856                 case AV_CODEC_ID_H263:
2857                 case AV_CODEC_ID_H263P:
2858                     if(!s->h263_slice_structured)
2859                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2860                     break;
2861                 case AV_CODEC_ID_MPEG2VIDEO:
2862                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2863                 case AV_CODEC_ID_MPEG1VIDEO:
2864                     if(s->mb_skip_run) is_gob_start=0;
2865                     break;
2866                 case AV_CODEC_ID_MJPEG:
2867                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2868                     break;
2869                 }
2870
2871                 if(is_gob_start){
2872                     if(s->start_mb_y != mb_y || mb_x!=0){
2873                         write_slice_end(s);
2874
2875                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2876                             ff_mpeg4_init_partitions(s);
2877                         }
2878                     }
2879
2880                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2881                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2882
2883                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2884                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2885                         int d = 100 / s->error_rate;
2886                         if(r % d == 0){
2887                             current_packet_size=0;
2888                             s->pb.buf_ptr= s->ptr_lastgob;
2889                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2890                         }
2891                     }
2892
2893                     if (s->avctx->rtp_callback){
2894                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2895                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2896                     }
2897                     update_mb_info(s, 1);
2898
2899                     switch(s->codec_id){
2900                     case AV_CODEC_ID_MPEG4:
2901                         if (CONFIG_MPEG4_ENCODER) {
2902                             ff_mpeg4_encode_video_packet_header(s);
2903                             ff_mpeg4_clean_buffers(s);
2904                         }
2905                     break;
2906                     case AV_CODEC_ID_MPEG1VIDEO:
2907                     case AV_CODEC_ID_MPEG2VIDEO:
2908                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2909                             ff_mpeg1_encode_slice_header(s);
2910                             ff_mpeg1_clean_buffers(s);
2911                         }
2912                     break;
2913                     case AV_CODEC_ID_H263:
2914                     case AV_CODEC_ID_H263P:
2915                         if (CONFIG_H263_ENCODER)
2916                             ff_h263_encode_gob_header(s, mb_y);
2917                     break;
2918                     }
2919
2920                     if(s->flags&CODEC_FLAG_PASS1){
2921                         int bits= put_bits_count(&s->pb);
2922                         s->misc_bits+= bits - s->last_bits;
2923                         s->last_bits= bits;
2924                     }
2925
2926                     s->ptr_lastgob += current_packet_size;
2927                     s->first_slice_line=1;
2928                     s->resync_mb_x=mb_x;
2929                     s->resync_mb_y=mb_y;
2930                 }
2931             }
2932
2933             if(  (s->resync_mb_x   == s->mb_x)
2934                && s->resync_mb_y+1 == s->mb_y){
2935                 s->first_slice_line=0;
2936             }
2937
2938             s->mb_skipped=0;
2939             s->dquant=0; //only for QP_RD
2940
2941             update_mb_info(s, 0);
2942
2943             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2944                 int next_block=0;
2945                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2946
2947                 copy_context_before_encode(&backup_s, s, -1);
2948                 backup_s.pb= s->pb;
2949                 best_s.data_partitioning= s->data_partitioning;
2950                 best_s.partitioned_frame= s->partitioned_frame;
2951                 if(s->data_partitioning){
2952                     backup_s.pb2= s->pb2;
2953                     backup_s.tex_pb= s->tex_pb;
2954                 }
2955
2956                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2957                     s->mv_dir = MV_DIR_FORWARD;
2958                     s->mv_type = MV_TYPE_16X16;
2959                     s->mb_intra= 0;
2960                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2961                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2962                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2963                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2964                 }
2965                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2966                     s->mv_dir = MV_DIR_FORWARD;
2967                     s->mv_type = MV_TYPE_FIELD;
2968                     s->mb_intra= 0;
2969                     for(i=0; i<2; i++){
2970                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2971                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2972                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2973                     }
2974                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2975                                  &dmin, &next_block, 0, 0);
2976                 }
2977                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2978                     s->mv_dir = MV_DIR_FORWARD;
2979                     s->mv_type = MV_TYPE_16X16;
2980                     s->mb_intra= 0;
2981                     s->mv[0][0][0] = 0;
2982                     s->mv[0][0][1] = 0;
2983                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2984                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2985                 }
2986                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2987                     s->mv_dir = MV_DIR_FORWARD;
2988                     s->mv_type = MV_TYPE_8X8;
2989                     s->mb_intra= 0;
2990                     for(i=0; i<4; i++){
2991                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2992                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2993                     }
2994                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2995                                  &dmin, &next_block, 0, 0);
2996                 }
2997                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2998                     s->mv_dir = MV_DIR_FORWARD;
2999                     s->mv_type = MV_TYPE_16X16;
3000                     s->mb_intra= 0;
3001                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3002                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3003                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3004                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3005                 }
3006                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3007                     s->mv_dir = MV_DIR_BACKWARD;
3008                     s->mv_type = MV_TYPE_16X16;
3009                     s->mb_intra= 0;
3010                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3011                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3012                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3013                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3014                 }
3015                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3016                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3017                     s->mv_type = MV_TYPE_16X16;
3018                     s->mb_intra= 0;
3019                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3020                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3021                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3022                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3023                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3024                                  &dmin, &next_block, 0, 0);
3025                 }
3026                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3027                     s->mv_dir = MV_DIR_FORWARD;
3028                     s->mv_type = MV_TYPE_FIELD;
3029                     s->mb_intra= 0;
3030                     for(i=0; i<2; i++){
3031                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3032                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3033                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3034                     }
3035                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3036                                  &dmin, &next_block, 0, 0);
3037                 }
3038                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3039                     s->mv_dir = MV_DIR_BACKWARD;
3040                     s->mv_type = MV_TYPE_FIELD;
3041                     s->mb_intra= 0;
3042                     for(i=0; i<2; i++){
3043                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3044                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3045                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3046                     }
3047                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3048                                  &dmin, &next_block, 0, 0);
3049                 }
3050                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3051                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3052                     s->mv_type = MV_TYPE_FIELD;
3053                     s->mb_intra= 0;
3054                     for(dir=0; dir<2; dir++){
3055                         for(i=0; i<2; i++){
3056                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3057                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3058                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3059                         }
3060                     }
3061                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3062                                  &dmin, &next_block, 0, 0);
3063                 }
3064                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3065                     s->mv_dir = 0;
3066                     s->mv_type = MV_TYPE_16X16;
3067                     s->mb_intra= 1;
3068                     s->mv[0][0][0] = 0;
3069                     s->mv[0][0][1] = 0;
3070                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3071                                  &dmin, &next_block, 0, 0);
3072                     if(s->h263_pred || s->h263_aic){
3073                         if(best_s.mb_intra)
3074                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3075                         else
3076                             ff_clean_intra_table_entries(s); //old mode?
3077                     }
3078                 }
3079
3080                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3081                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3082                         const int last_qp= backup_s.qscale;
3083                         int qpi, qp, dc[6];
3084                         int16_t ac[6][16];
3085                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3086                         static const int dquant_tab[4]={-1,1,-2,2};
3087                         int storecoefs = s->mb_intra && s->dc_val[0];
3088
3089                         av_assert2(backup_s.dquant == 0);
3090
3091                         //FIXME intra
3092                         s->mv_dir= best_s.mv_dir;
3093                         s->mv_type = MV_TYPE_16X16;
3094                         s->mb_intra= best_s.mb_intra;
3095                         s->mv[0][0][0] = best_s.mv[0][0][0];
3096                         s->mv[0][0][1] = best_s.mv[0][0][1];
3097                         s->mv[1][0][0] = best_s.mv[1][0][0];
3098                         s->mv[1][0][1] = best_s.mv[1][0][1];
3099
3100                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3101                         for(; qpi<4; qpi++){
3102                             int dquant= dquant_tab[qpi];
3103                             qp= last_qp + dquant;
3104                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3105                                 continue;
3106                             backup_s.dquant= dquant;
3107                             if(storecoefs){
3108                                 for(i=0; i<6; i++){
3109                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3110                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3111                                 }
3112                             }
3113
3114                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3115                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3116                             if(best_s.qscale != qp){
3117                                 if(storecoefs){
3118                                     for(i=0; i<6; i++){
3119                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3120                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3121                                     }
3122                                 }
3123                             }
3124                         }
3125                     }
3126                 }
3127                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3128                     int mx= s->b_direct_mv_table[xy][0];
3129                     int my= s->b_direct_mv_table[xy][1];
3130
3131                     backup_s.dquant = 0;
3132                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3133                     s->mb_intra= 0;
3134                     ff_mpeg4_set_direct_mv(s, mx, my);
3135                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3136                                  &dmin, &next_block, mx, my);
3137                 }
3138                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3139                     backup_s.dquant = 0;
3140                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3141                     s->mb_intra= 0;
3142                     ff_mpeg4_set_direct_mv(s, 0, 0);
3143                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3144                                  &dmin, &next_block, 0, 0);
3145                 }
3146                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3147                     int coded=0;
3148                     for(i=0; i<6; i++)
3149                         coded |= s->block_last_index[i];
3150                     if(coded){
3151                         int mx,my;
3152                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3153                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3154                             mx=my=0; //FIXME find the one we actually used
3155                             ff_mpeg4_set_direct_mv(s, mx, my);
3156                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3157                             mx= s->mv[1][0][0];
3158                             my= s->mv[1][0][1];
3159                         }else{
3160                             mx= s->mv[0][0][0];
3161                             my= s->mv[0][0][1];
3162                         }
3163
3164                         s->mv_dir= best_s.mv_dir;
3165                         s->mv_type = best_s.mv_type;
3166                         s->mb_intra= 0;
3167 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3168                         s->mv[0][0][1] = best_s.mv[0][0][1];
3169                         s->mv[1][0][0] = best_s.mv[1][0][0];
3170                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3171                         backup_s.dquant= 0;
3172                         s->skipdct=1;
3173                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3174                                         &dmin, &next_block, mx, my);
3175                         s->skipdct=0;
3176                     }
3177                 }
3178
3179                 s->current_picture.qscale_table[xy] = best_s.qscale;
3180
3181                 copy_context_after_encode(s, &best_s, -1);
3182
3183                 pb_bits_count= put_bits_count(&s->pb);
3184                 flush_put_bits(&s->pb);
3185                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3186                 s->pb= backup_s.pb;
3187
3188                 if(s->data_partitioning){
3189                     pb2_bits_count= put_bits_count(&s->pb2);
3190                     flush_put_bits(&s->pb2);
3191                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3192                     s->pb2= backup_s.pb2;
3193
3194                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3195                     flush_put_bits(&s->tex_pb);
3196                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3197                     s->tex_pb= backup_s.tex_pb;
3198                 }
3199                 s->last_bits= put_bits_count(&s->pb);
3200
3201                 if (CONFIG_H263_ENCODER &&
3202                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3203                     ff_h263_update_motion_val(s);
3204
3205                 if(next_block==0){ //FIXME 16 vs linesize16
3206                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3207                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3208                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3209                 }
3210
3211                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3212                     ff_mpv_decode_mb(s, s->block);
3213             } else {
3214                 int motion_x = 0, motion_y = 0;
3215                 s->mv_type=MV_TYPE_16X16;
3216                 // only one MB-Type possible
3217
3218                 switch(mb_type){
3219                 case CANDIDATE_MB_TYPE_INTRA:
3220                     s->mv_dir = 0;
3221                     s->mb_intra= 1;
3222                     motion_x= s->mv[0][0][0] = 0;
3223                     motion_y= s->mv[0][0][1] = 0;
3224                     break;
3225                 case CANDIDATE_MB_TYPE_INTER:
3226                     s->mv_dir = MV_DIR_FORWARD;
3227                     s->mb_intra= 0;
3228                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3229                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3230                     break;
3231                 case CANDIDATE_MB_TYPE_INTER_I:
3232                     s->mv_dir = MV_DIR_FORWARD;
3233                     s->mv_type = MV_TYPE_FIELD;
3234                     s->mb_intra= 0;
3235                     for(i=0; i<2; i++){
3236                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3237                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3238                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3239                     }
3240                     break;
3241                 case CANDIDATE_MB_TYPE_INTER4V:
3242                     s->mv_dir = MV_DIR_FORWARD;
3243                     s->mv_type = MV_TYPE_8X8;
3244                     s->mb_intra= 0;
3245                     for(i=0; i<4; i++){
3246                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3247                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3248                     }
3249                     break;
3250                 case CANDIDATE_MB_TYPE_DIRECT:
3251                     if (CONFIG_MPEG4_ENCODER) {
3252                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3253                         s->mb_intra= 0;
3254                         motion_x=s->b_direct_mv_table[xy][0];
3255                         motion_y=s->b_direct_mv_table[xy][1];
3256                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3257                     }
3258                     break;
3259                 case CANDIDATE_MB_TYPE_DIRECT0:
3260                     if (CONFIG_MPEG4_ENCODER) {
3261                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3262                         s->mb_intra= 0;
3263                         ff_mpeg4_set_direct_mv(s, 0, 0);
3264                     }
3265                     break;
3266                 case CANDIDATE_MB_TYPE_BIDIR:
3267                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3268                     s->mb_intra= 0;
3269                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3270                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3271                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3272                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3273                     break;
3274                 case CANDIDATE_MB_TYPE_BACKWARD:
3275                     s->mv_dir = MV_DIR_BACKWARD;
3276                     s->mb_intra= 0;
3277                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3278                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3279                     break;
3280                 case CANDIDATE_MB_TYPE_FORWARD:
3281                     s->mv_dir = MV_DIR_FORWARD;
3282                     s->mb_intra= 0;
3283                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3284                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3285                     break;
3286                 case CANDIDATE_MB_TYPE_FORWARD_I:
3287                     s->mv_dir = MV_DIR_FORWARD;
3288                     s->mv_type = MV_TYPE_FIELD;
3289                     s->mb_intra= 0;
3290                     for(i=0; i<2; i++){
3291                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3292                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3293                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3294                     }
3295                     break;
3296                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3297                     s->mv_dir = MV_DIR_BACKWARD;
3298                     s->mv_type = MV_TYPE_FIELD;
3299                     s->mb_intra= 0;
3300                     for(i=0; i<2; i++){
3301                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3302                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3303                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3304                     }
3305                     break;
3306                 case CANDIDATE_MB_TYPE_BIDIR_I:
3307                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3308                     s->mv_type = MV_TYPE_FIELD;
3309                     s->mb_intra= 0;
3310                     for(dir=0; dir<2; dir++){
3311                         for(i=0; i<2; i++){
3312                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3313                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3314                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3315                         }
3316                     }
3317                     break;
3318                 default:
3319                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3320                 }
3321
3322                 encode_mb(s, motion_x, motion_y);
3323
3324                 // RAL: Update last macroblock type
3325                 s->last_mv_dir = s->mv_dir;
3326
3327                 if (CONFIG_H263_ENCODER &&
3328                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3329                     ff_h263_update_motion_val(s);
3330
3331                 ff_mpv_decode_mb(s, s->block);
3332             }
3333
3334             /* clean the MV table in IPS frames for direct mode in B frames */
3335             if(s->mb_intra /* && I,P,S_TYPE */){
3336                 s->p_mv_table[xy][0]=0;
3337                 s->p_mv_table[xy][1]=0;
3338             }
3339
3340             if(s->flags&CODEC_FLAG_PSNR){
3341                 int w= 16;
3342                 int h= 16;
3343
3344                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3345                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3346
3347                 s->current_picture.error[0] += sse(
3348                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3349                     s->dest[0], w, h, s->linesize);
3350                 s->current_picture.error[1] += sse(
3351                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3352                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3353                 s->current_picture.error[2] += sse(
3354                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3355                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3356             }
3357             if(s->loop_filter){
3358                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3359                     ff_h263_loop_filter(s);
3360             }
3361             av_dlog(s->avctx, "MB %d %d bits\n",
3362                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3363         }
3364     }
3365
3366     //not beautiful here but we must write it before flushing so it has to be here
3367     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3368         ff_msmpeg4_encode_ext_header(s);
3369
3370     write_slice_end(s);
3371
3372     /* Send the last GOB if RTP */
3373     if (s->avctx->rtp_callback) {
3374         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3375         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3376         /* Call the RTP callback to send the last GOB */
3377         emms_c();
3378         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3379     }
3380
3381     return 0;
3382 }
3383
3384 #define MERGE(field) dst->field += src->field; src->field=0
3385 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3386     MERGE(me.scene_change_score);
3387     MERGE(me.mc_mb_var_sum_temp);
3388     MERGE(me.mb_var_sum_temp);
3389 }
3390
3391 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3392     int i;
3393
3394     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3395     MERGE(dct_count[1]);
3396     MERGE(mv_bits);
3397     MERGE(i_tex_bits);
3398     MERGE(p_tex_bits);
3399     MERGE(i_count);
3400     MERGE(f_count);
3401     MERGE(b_count);
3402     MERGE(skip_count);
3403     MERGE(misc_bits);
3404     MERGE(er.error_count);
3405     MERGE(padding_bug_score);
3406     MERGE(current_picture.error[0]);
3407     MERGE(current_picture.error[1]);
3408     MERGE(current_picture.error[2]);
3409
3410     if(dst->avctx->noise_reduction){
3411         for(i=0; i<64; i++){
3412             MERGE(dct_error_sum[0][i]);
3413             MERGE(dct_error_sum[1][i]);
3414         }
3415     }
3416
3417     assert(put_bits_count(&src->pb) % 8 ==0);
3418     assert(put_bits_count(&dst->pb) % 8 ==0);
3419     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3420     flush_put_bits(&dst->pb);
3421 }
3422
3423 static int estimate_qp(MpegEncContext *s, int dry_run){
3424     if (s->next_lambda){
3425         s->current_picture_ptr->f->quality =
3426         s->current_picture.f->quality = s->next_lambda;
3427         if(!dry_run) s->next_lambda= 0;
3428     } else if (!s->fixed_qscale) {
3429         s->current_picture_ptr->f->quality =
3430         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3431         if (s->current_picture.f->quality < 0)
3432             return -1;
3433     }
3434
3435     if(s->adaptive_quant){
3436         switch(s->codec_id){
3437         case AV_CODEC_ID_MPEG4:
3438             if (CONFIG_MPEG4_ENCODER)
3439                 ff_clean_mpeg4_qscales(s);
3440             break;
3441         case AV_CODEC_ID_H263:
3442         case AV_CODEC_ID_H263P:
3443         case AV_CODEC_ID_FLV1:
3444             if (CONFIG_H263_ENCODER)
3445                 ff_clean_h263_qscales(s);
3446             break;
3447         default:
3448             ff_init_qscale_tab(s);
3449         }
3450
3451         s->lambda= s->lambda_table[0];
3452         //FIXME broken
3453     }else
3454         s->lambda = s->current_picture.f->quality;
3455     update_qscale(s);
3456     return 0;
3457 }
3458
3459 /* must be called before writing the header */
3460 static void set_frame_distances(MpegEncContext * s){
3461     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3462     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3463
3464     if(s->pict_type==AV_PICTURE_TYPE_B){
3465         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3466         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3467     }else{
3468         s->pp_time= s->time - s->last_non_b_time;
3469         s->last_non_b_time= s->time;
3470         assert(s->picture_number==0 || s->pp_time > 0);
3471     }
3472 }
3473
3474 static int encode_picture(MpegEncContext *s, int picture_number)
3475 {
3476     int i, ret;
3477     int bits;
3478     int context_count = s->slice_context_count;
3479
3480     s->picture_number = picture_number;
3481
3482     /* Reset the average MB variance */
3483     s->me.mb_var_sum_temp    =
3484     s->me.mc_mb_var_sum_temp = 0;
3485
3486     /* we need to initialize some time vars before we can encode b-frames */
3487     // RAL: Condition added for MPEG1VIDEO
3488     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3489         set_frame_distances(s);
3490     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3491         ff_set_mpeg4_time(s);
3492
3493     s->me.scene_change_score=0;
3494
3495 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3496
3497     if(s->pict_type==AV_PICTURE_TYPE_I){
3498         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3499         else                        s->no_rounding=0;
3500     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3501         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3502             s->no_rounding ^= 1;
3503     }
3504
3505     if(s->flags & CODEC_FLAG_PASS2){
3506         if (estimate_qp(s,1) < 0)
3507             return -1;
3508         ff_get_2pass_fcode(s);
3509     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3510         if(s->pict_type==AV_PICTURE_TYPE_B)
3511             s->lambda= s->last_lambda_for[s->pict_type];
3512         else
3513             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3514         update_qscale(s);
3515     }
3516
3517     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3518         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3519         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3520         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3521         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3522     }
3523
3524     s->mb_intra=0; //for the rate distortion & bit compare functions
3525     for(i=1; i<context_count; i++){
3526         ret = ff_update_duplicate_context(s->thread_context[i], s);
3527         if (ret < 0)
3528             return ret;
3529     }
3530
3531     if(ff_init_me(s)<0)
3532         return -1;
3533
3534     /* Estimate motion for every MB */
3535     if(s->pict_type != AV_PICTURE_TYPE_I){
3536         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3537         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3538         if (s->pict_type != AV_PICTURE_TYPE_B) {
3539             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3540                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3541             }
3542         }
3543
3544         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3545     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3546         /* I-Frame */
3547         for(i=0; i<s->mb_stride*s->mb_height; i++)
3548             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3549
3550         if(!s->fixed_qscale){
3551             /* finding spatial complexity for I-frame rate control */
3552             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3553         }
3554     }
3555     for(i=1; i<context_count; i++){
3556         merge_context_after_me(s, s->thread_context[i]);
3557     }
3558     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3559     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3560     emms_c();
3561
3562     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3563         s->pict_type= AV_PICTURE_TYPE_I;
3564         for(i=0; i<s->mb_stride*s->mb_height; i++)
3565             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3566         if(s->msmpeg4_version >= 3)
3567             s->no_rounding=1;
3568         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3569                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3570     }
3571
3572     if(!s->umvplus){
3573         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3574             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3575
3576             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3577                 int a,b;
3578                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3579                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3580                 s->f_code= FFMAX3(s->f_code, a, b);
3581             }
3582
3583             ff_fix_long_p_mvs(s);
3584             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3585             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3586                 int j;
3587                 for(i=0; i<2; i++){
3588                     for(j=0; j<2; j++)
3589                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3590                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3591                 }
3592             }
3593         }
3594
3595         if(s->pict_type==AV_PICTURE_TYPE_B){
3596             int a, b;
3597
3598             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3599             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3600             s->f_code = FFMAX(a, b);
3601
3602             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3603             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3604             s->b_code = FFMAX(a, b);
3605
3606             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3607             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3608             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3609             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3610             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3611                 int dir, j;
3612                 for(dir=0; dir<2; dir++){
3613                     for(i=0; i<2; i++){
3614                         for(j=0; j<2; j++){
3615                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3616                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3617                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3618                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3619                         }
3620                     }
3621                 }
3622             }
3623         }
3624     }
3625
3626     if (estimate_qp(s, 0) < 0)
3627         return -1;
3628
3629     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3630         s->qscale= 3; //reduce clipping problems
3631
3632     if (s->out_format == FMT_MJPEG) {
3633         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3634         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3635
3636         if (s->avctx->intra_matrix) {
3637             chroma_matrix =
3638             luma_matrix = s->avctx->intra_matrix;
3639         }
3640         if (s->avctx->chroma_intra_matrix)
3641             chroma_matrix = s->avctx->chroma_intra_matrix;
3642
3643         /* for mjpeg, we do include qscale in the matrix */
3644         for(i=1;i<64;i++){
3645             int j = s->idsp.idct_permutation[i];
3646
3647             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3648             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3649         }
3650         s->y_dc_scale_table=
3651         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3652         s->chroma_intra_matrix[0] =
3653         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3654         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3655                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3656         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3657                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3658         s->qscale= 8;
3659     }
3660     if(s->codec_id == AV_CODEC_ID_AMV){
3661         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3662         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3663         for(i=1;i<64;i++){
3664             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3665
3666             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3667             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3668         }
3669         s->y_dc_scale_table= y;
3670         s->c_dc_scale_table= c;
3671         s->intra_matrix[0] = 13;
3672         s->chroma_intra_matrix[0] = 14;
3673         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3674                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3675         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3676                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3677         s->qscale= 8;
3678     }
3679
3680     //FIXME var duplication
3681     s->current_picture_ptr->f->key_frame =
3682     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3683     s->current_picture_ptr->f->pict_type =
3684     s->current_picture.f->pict_type = s->pict_type;
3685
3686     if (s->current_picture.f->key_frame)
3687         s->picture_in_gop_number=0;
3688
3689     s->mb_x = s->mb_y = 0;
3690     s->last_bits= put_bits_count(&s->pb);
3691     switch(s->out_format) {
3692     case FMT_MJPEG:
3693         if (CONFIG_MJPEG_ENCODER)
3694             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3695                                            s->intra_matrix, s->chroma_intra_matrix);
3696         break;
3697     case FMT_H261:
3698         if (CONFIG_H261_ENCODER)
3699             ff_h261_encode_picture_header(s, picture_number);
3700         break;
3701     case FMT_H263:
3702         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3703             ff_wmv2_encode_picture_header(s, picture_number);
3704         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3705             ff_msmpeg4_encode_picture_header(s, picture_number);
3706         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3707             ff_mpeg4_encode_picture_header(s, picture_number);
3708         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3709             ff_rv10_encode_picture_header(s, picture_number);
3710         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3711             ff_rv20_encode_picture_header(s, picture_number);
3712         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3713             ff_flv_encode_picture_header(s, picture_number);
3714         else if (CONFIG_H263_ENCODER)
3715             ff_h263_encode_picture_header(s, picture_number);
3716         break;
3717     case FMT_MPEG1:
3718         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3719             ff_mpeg1_encode_picture_header(s, picture_number);
3720         break;
3721     default:
3722         av_assert0(0);
3723     }
3724     bits= put_bits_count(&s->pb);
3725     s->header_bits= bits - s->last_bits;
3726
3727     for(i=1; i<context_count; i++){
3728         update_duplicate_context_after_me(s->thread_context[i], s);
3729     }
3730     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3731     for(i=1; i<context_count; i++){
3732         merge_context_after_encode(s, s->thread_context[i]);
3733     }
3734     emms_c();
3735     return 0;
3736 }
3737
3738 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3739     const int intra= s->mb_intra;
3740     int i;
3741
3742     s->dct_count[intra]++;
3743
3744     for(i=0; i<64; i++){
3745         int level= block[i];
3746
3747         if(level){
3748             if(level>0){
3749                 s->dct_error_sum[intra][i] += level;
3750                 level -= s->dct_offset[intra][i];
3751                 if(level<0) level=0;
3752             }else{
3753                 s->dct_error_sum[intra][i] -= level;
3754                 level += s->dct_offset[intra][i];
3755                 if(level>0) level=0;
3756             }
3757             block[i]= level;
3758         }
3759     }
3760 }
3761
3762 static int dct_quantize_trellis_c(MpegEncContext *s,
3763                                   int16_t *block, int n,
3764                                   int qscale, int *overflow){
3765     const int *qmat;
3766     const uint8_t *scantable= s->intra_scantable.scantable;
3767     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3768     int max=0;
3769     unsigned int threshold1, threshold2;
3770     int bias=0;
3771     int run_tab[65];
3772     int level_tab[65];
3773     int score_tab[65];
3774     int survivor[65];
3775     int survivor_count;
3776     int last_run=0;
3777     int last_level=0;
3778     int last_score= 0;
3779     int last_i;
3780     int coeff[2][64];
3781     int coeff_count[64];
3782     int qmul, qadd, start_i, last_non_zero, i, dc;
3783     const int esc_length= s->ac_esc_length;
3784     uint8_t * length;
3785     uint8_t * last_length;
3786     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3787
3788     s->fdsp.fdct(block);
3789
3790     if(s->dct_error_sum)
3791         s->denoise_dct(s, block);
3792     qmul= qscale*16;
3793     qadd= ((qscale-1)|1)*8;
3794
3795     if (s->mb_intra) {
3796         int q;
3797         if (!s->h263_aic) {
3798             if (n < 4)
3799                 q = s->y_dc_scale;
3800             else
3801                 q = s->c_dc_scale;
3802             q = q << 3;
3803         } else{
3804             /* For AIC we skip quant/dequant of INTRADC */
3805             q = 1 << 3;
3806             qadd=0;
3807         }
3808
3809         /* note: block[0] is assumed to be positive */
3810         block[0] = (block[0] + (q >> 1)) / q;
3811         start_i = 1;
3812         last_non_zero = 0;
3813         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3814         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3815             bias= 1<<(QMAT_SHIFT-1);
3816         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3817             length     = s->intra_chroma_ac_vlc_length;
3818             last_length= s->intra_chroma_ac_vlc_last_length;
3819         } else {
3820             length     = s->intra_ac_vlc_length;
3821             last_length= s->intra_ac_vlc_last_length;
3822         }
3823     } else {
3824         start_i = 0;
3825         last_non_zero = -1;
3826         qmat = s->q_inter_matrix[qscale];
3827         length     = s->inter_ac_vlc_length;
3828         last_length= s->inter_ac_vlc_last_length;
3829     }
3830     last_i= start_i;
3831
3832     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3833     threshold2= (threshold1<<1);
3834
3835     for(i=63; i>=start_i; i--) {
3836         const int j = scantable[i];
3837         int level = block[j] * qmat[j];
3838
3839         if(((unsigned)(level+threshold1))>threshold2){
3840             last_non_zero = i;
3841             break;
3842         }
3843     }
3844
3845     for(i=start_i; i<=last_non_zero; i++) {
3846         const int j = scantable[i];
3847         int level = block[j] * qmat[j];
3848
3849 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3850 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3851         if(((unsigned)(level+threshold1))>threshold2){
3852             if(level>0){
3853                 level= (bias + level)>>QMAT_SHIFT;
3854                 coeff[0][i]= level;
3855                 coeff[1][i]= level-1;
3856 //                coeff[2][k]= level-2;
3857             }else{
3858                 level= (bias - level)>>QMAT_SHIFT;
3859                 coeff[0][i]= -level;
3860                 coeff[1][i]= -level+1;
3861 //                coeff[2][k]= -level+2;
3862             }
3863             coeff_count[i]= FFMIN(level, 2);
3864             av_assert2(coeff_count[i]);
3865             max |=level;
3866         }else{
3867             coeff[0][i]= (level>>31)|1;
3868             coeff_count[i]= 1;
3869         }
3870     }
3871
3872     *overflow= s->max_qcoeff < max; //overflow might have happened
3873
3874     if(last_non_zero < start_i){
3875         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3876         return last_non_zero;
3877     }
3878
3879     score_tab[start_i]= 0;
3880     survivor[0]= start_i;
3881     survivor_count= 1;
3882
3883     for(i=start_i; i<=last_non_zero; i++){
3884         int level_index, j, zero_distortion;
3885         int dct_coeff= FFABS(block[ scantable[i] ]);
3886         int best_score=256*256*256*120;
3887
3888         if (s->fdsp.fdct == ff_fdct_ifast)
3889             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3890         zero_distortion= dct_coeff*dct_coeff;
3891
3892         for(level_index=0; level_index < coeff_count[i]; level_index++){
3893             int distortion;
3894             int level= coeff[level_index][i];
3895             const int alevel= FFABS(level);
3896             int unquant_coeff;
3897
3898             av_assert2(level);
3899
3900             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3901                 unquant_coeff= alevel*qmul + qadd;
3902             }else{ //MPEG1
3903                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3904                 if(s->mb_intra){
3905                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3906                         unquant_coeff =   (unquant_coeff - 1) | 1;
3907                 }else{
3908                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3909                         unquant_coeff =   (unquant_coeff - 1) | 1;
3910                 }
3911                 unquant_coeff<<= 3;
3912             }
3913
3914             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3915             level+=64;
3916             if((level&(~127)) == 0){
3917                 for(j=survivor_count-1; j>=0; j--){
3918                     int run= i - survivor[j];
3919                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3920                     score += score_tab[i-run];
3921
3922                     if(score < best_score){
3923                         best_score= score;
3924                         run_tab[i+1]= run;
3925                         level_tab[i+1]= level-64;
3926                     }
3927                 }
3928
3929                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3930                     for(j=survivor_count-1; j>=0; j--){
3931                         int run= i - survivor[j];
3932                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3933                         score += score_tab[i-run];
3934                         if(score < last_score){
3935                             last_score= score;
3936                             last_run= run;
3937                             last_level= level-64;
3938                             last_i= i+1;
3939                         }
3940                     }
3941                 }
3942             }else{
3943                 distortion += esc_length*lambda;
3944                 for(j=survivor_count-1; j>=0; j--){
3945                     int run= i - survivor[j];
3946                     int score= distortion + score_tab[i-run];
3947
3948                     if(score < best_score){
3949                         best_score= score;
3950                         run_tab[i+1]= run;
3951                         level_tab[i+1]= level-64;
3952                     }
3953                 }
3954
3955                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3956                   for(j=survivor_count-1; j>=0; j--){
3957                         int run= i - survivor[j];
3958                         int score= distortion + score_tab[i-run];
3959                         if(score < last_score){
3960                             last_score= score;
3961                             last_run= run;
3962                             last_level= level-64;
3963                             last_i= i+1;
3964                         }
3965                     }
3966                 }
3967             }
3968         }
3969
3970         score_tab[i+1]= best_score;
3971
3972         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3973         if(last_non_zero <= 27){
3974             for(; survivor_count; survivor_count--){
3975                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3976                     break;
3977             }
3978         }else{
3979             for(; survivor_count; survivor_count--){
3980                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3981                     break;
3982             }
3983         }
3984
3985         survivor[ survivor_count++ ]= i+1;
3986     }
3987
3988     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3989         last_score= 256*256*256*120;
3990         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3991             int score= score_tab[i];
3992             if(i) score += lambda*2; //FIXME exacter?
3993
3994             if(score < last_score){
3995                 last_score= score;
3996                 last_i= i;
3997                 last_level= level_tab[i];
3998                 last_run= run_tab[i];
3999             }
4000         }
4001     }
4002
4003     s->coded_score[n] = last_score;
4004
4005     dc= FFABS(block[0]);
4006     last_non_zero= last_i - 1;
4007     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4008
4009     if(last_non_zero < start_i)
4010         return last_non_zero;
4011
4012     if(last_non_zero == 0 && start_i == 0){
4013         int best_level= 0;
4014         int best_score= dc * dc;
4015
4016         for(i=0; i<coeff_count[0]; i++){
4017             int level= coeff[i][0];
4018             int alevel= FFABS(level);
4019             int unquant_coeff, score, distortion;
4020
4021             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4022                     unquant_coeff= (alevel*qmul + qadd)>>3;
4023             }else{ //MPEG1
4024                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4025                     unquant_coeff =   (unquant_coeff - 1) | 1;
4026             }
4027             unquant_coeff = (unquant_coeff + 4) >> 3;
4028             unquant_coeff<<= 3 + 3;
4029
4030             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4031             level+=64;
4032             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4033             else                    score= distortion + esc_length*lambda;
4034
4035             if(score < best_score){
4036                 best_score= score;
4037                 best_level= level - 64;
4038             }
4039         }
4040         block[0]= best_level;
4041         s->coded_score[n] = best_score - dc*dc;
4042         if(best_level == 0) return -1;
4043         else                return last_non_zero;
4044     }
4045
4046     i= last_i;
4047     av_assert2(last_level);
4048
4049     block[ perm_scantable[last_non_zero] ]= last_level;
4050     i -= last_run + 1;
4051
4052     for(; i>start_i; i -= run_tab[i] + 1){
4053         block[ perm_scantable[i-1] ]= level_tab[i];
4054     }
4055
4056     return last_non_zero;
4057 }
4058
4059 //#define REFINE_STATS 1
4060 static int16_t basis[64][64];
4061
4062 static void build_basis(uint8_t *perm){
4063     int i, j, x, y;
4064     emms_c();
4065     for(i=0; i<8; i++){
4066         for(j=0; j<8; j++){
4067             for(y=0; y<8; y++){
4068                 for(x=0; x<8; x++){
4069                     double s= 0.25*(1<<BASIS_SHIFT);
4070                     int index= 8*i + j;
4071                     int perm_index= perm[index];
4072                     if(i==0) s*= sqrt(0.5);
4073                     if(j==0) s*= sqrt(0.5);
4074                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4075                 }
4076             }
4077         }
4078     }
4079 }
4080
4081 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4082                         int16_t *block, int16_t *weight, int16_t *orig,
4083                         int n, int qscale){
4084     int16_t rem[64];
4085     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4086     const uint8_t *scantable= s->intra_scantable.scantable;
4087     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4088 //    unsigned int threshold1, threshold2;
4089 //    int bias=0;
4090     int run_tab[65];
4091     int prev_run=0;
4092     int prev_level=0;
4093     int qmul, qadd, start_i, last_non_zero, i, dc;
4094     uint8_t * length;
4095     uint8_t * last_length;
4096     int lambda;
4097     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4098 #ifdef REFINE_STATS
4099 static int count=0;
4100 static int after_last=0;
4101 static int to_zero=0;
4102 static int from_zero=0;
4103 static int raise=0;
4104 static int lower=0;
4105 static int messed_sign=0;
4106 #endif
4107
4108     if(basis[0][0] == 0)
4109         build_basis(s->idsp.idct_permutation);
4110
4111     qmul= qscale*2;
4112     qadd= (qscale-1)|1;
4113     if (s->mb_intra) {
4114         if (!s->h263_aic) {
4115             if (n < 4)
4116                 q = s->y_dc_scale;
4117             else
4118                 q = s->c_dc_scale;
4119         } else{
4120             /* For AIC we skip quant/dequant of INTRADC */
4121             q = 1;
4122             qadd=0;
4123         }
4124         q <<= RECON_SHIFT-3;
4125         /* note: block[0] is assumed to be positive */
4126         dc= block[0]*q;
4127 //        block[0] = (block[0] + (q >> 1)) / q;
4128         start_i = 1;
4129 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4130 //            bias= 1<<(QMAT_SHIFT-1);
4131         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4132             length     = s->intra_chroma_ac_vlc_length;
4133             last_length= s->intra_chroma_ac_vlc_last_length;
4134         } else {
4135             length     = s->intra_ac_vlc_length;
4136             last_length= s->intra_ac_vlc_last_length;
4137         }
4138     } else {
4139         dc= 0;
4140         start_i = 0;
4141         length     = s->inter_ac_vlc_length;
4142         last_length= s->inter_ac_vlc_last_length;
4143     }
4144     last_non_zero = s->block_last_index[n];
4145
4146 #ifdef REFINE_STATS
4147 {START_TIMER
4148 #endif
4149     dc += (1<<(RECON_SHIFT-1));
4150     for(i=0; i<64; i++){
4151         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4152     }
4153 #ifdef REFINE_STATS
4154 STOP_TIMER("memset rem[]")}
4155 #endif
4156     sum=0;
4157     for(i=0; i<64; i++){
4158         int one= 36;
4159         int qns=4;
4160         int w;
4161
4162         w= FFABS(weight[i]) + qns*one;
4163         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4164
4165         weight[i] = w;
4166 //        w=weight[i] = (63*qns + (w/2)) / w;
4167
4168         av_assert2(w>0);
4169         av_assert2(w<(1<<6));
4170         sum += w*w;
4171     }
4172     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4173 #ifdef REFINE_STATS
4174 {START_TIMER
4175 #endif
4176     run=0;
4177     rle_index=0;
4178     for(i=start_i; i<=last_non_zero; i++){
4179         int j= perm_scantable[i];
4180         const int level= block[j];
4181         int coeff;
4182
4183         if(level){
4184             if(level<0) coeff= qmul*level - qadd;
4185             else        coeff= qmul*level + qadd;
4186             run_tab[rle_index++]=run;
4187             run=0;
4188
4189             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4190         }else{
4191             run++;
4192         }
4193     }
4194 #ifdef REFINE_STATS
4195 if(last_non_zero>0){
4196 STOP_TIMER("init rem[]")
4197 }
4198 }
4199
4200 {START_TIMER
4201 #endif
4202     for(;;){
4203         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4204         int best_coeff=0;
4205         int best_change=0;
4206         int run2, best_unquant_change=0, analyze_gradient;
4207 #ifdef REFINE_STATS
4208 {START_TIMER
4209 #endif
4210         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4211
4212         if(analyze_gradient){
4213 #ifdef REFINE_STATS
4214 {START_TIMER
4215 #endif
4216             for(i=0; i<64; i++){
4217                 int w= weight[i];
4218
4219                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4220             }
4221 #ifdef REFINE_STATS
4222 STOP_TIMER("rem*w*w")}
4223 {START_TIMER
4224 #endif
4225             s->fdsp.fdct(d1);
4226 #ifdef REFINE_STATS
4227 STOP_TIMER("dct")}
4228 #endif
4229         }
4230
4231         if(start_i){
4232             const int level= block[0];
4233             int change, old_coeff;
4234
4235             av_assert2(s->mb_intra);
4236
4237             old_coeff= q*level;
4238
4239             for(change=-1; change<=1; change+=2){
4240                 int new_level= level + change;
4241                 int score, new_coeff;
4242
4243                 new_coeff= q*new_level;
4244                 if(new_coeff >= 2048 || new_coeff < 0)
4245                     continue;
4246
4247                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4248                                                   new_coeff - old_coeff);
4249                 if(score<best_score){
4250                     best_score= score;
4251                     best_coeff= 0;
4252                     best_change= change;
4253                     best_unquant_change= new_coeff - old_coeff;
4254                 }
4255             }
4256         }
4257
4258         run=0;
4259         rle_index=0;
4260         run2= run_tab[rle_index++];
4261         prev_level=0;
4262         prev_run=0;
4263
4264         for(i=start_i; i<64; i++){
4265             int j= perm_scantable[i];
4266             const int level= block[j];
4267             int change, old_coeff;
4268
4269             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4270                 break;
4271
4272             if(level){
4273                 if(level<0) old_coeff= qmul*level - qadd;
4274                 else        old_coeff= qmul*level + qadd;
4275                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4276             }else{
4277                 old_coeff=0;
4278                 run2--;
4279                 av_assert2(run2>=0 || i >= last_non_zero );
4280             }
4281
4282             for(change=-1; change<=1; change+=2){
4283                 int new_level= level + change;
4284                 int score, new_coeff, unquant_change;
4285
4286                 score=0;
4287                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4288                    continue;
4289
4290                 if(new_level){
4291                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4292                     else            new_coeff= qmul*new_level + qadd;
4293                     if(new_coeff >= 2048 || new_coeff <= -2048)
4294                         continue;
4295                     //FIXME check for overflow
4296
4297                     if(level){
4298                         if(level < 63 && level > -63){
4299                             if(i < last_non_zero)
4300                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4301                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4302                             else
4303                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4304                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4305                         }
4306                     }else{
4307                         av_assert2(FFABS(new_level)==1);
4308
4309                         if(analyze_gradient){
4310                             int g= d1[ scantable[i] ];
4311                             if(g && (g^new_level) >= 0)
4312                                 continue;
4313                         }
4314
4315                         if(i < last_non_zero){
4316                             int next_i= i + run2 + 1;
4317                             int next_level= block[ perm_scantable[next_i] ] + 64;
4318
4319                             if(next_level&(~127))
4320                                 next_level= 0;
4321
4322                             if(next_i < last_non_zero)
4323                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4324                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4325                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4326                             else
4327                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4328                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4329                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4330                         }else{
4331                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4332                             if(prev_level){
4333                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4334                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4335                             }
4336                         }
4337                     }
4338                 }else{
4339                     new_coeff=0;
4340                     av_assert2(FFABS(level)==1);
4341
4342                     if(i < last_non_zero){
4343                         int next_i= i + run2 + 1;
4344                         int next_level= block[ perm_scantable[next_i] ] + 64;
4345
4346                         if(next_level&(~127))
4347                             next_level= 0;
4348
4349                         if(next_i < last_non_zero)
4350                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4351                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4352                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4353                         else
4354                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4355                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4356                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4357                     }else{
4358                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4359                         if(prev_level){
4360                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4361                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4362                         }
4363                     }
4364                 }
4365
4366                 score *= lambda;
4367
4368                 unquant_change= new_coeff - old_coeff;
4369                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4370
4371                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4372                                                    unquant_change);
4373                 if(score<best_score){
4374                     best_score= score;
4375                     best_coeff= i;
4376                     best_change= change;
4377                     best_unquant_change= unquant_change;
4378                 }
4379             }
4380             if(level){
4381                 prev_level= level + 64;
4382                 if(prev_level&(~127))
4383                     prev_level= 0;
4384                 prev_run= run;
4385                 run=0;
4386             }else{
4387                 run++;
4388             }
4389         }
4390 #ifdef REFINE_STATS
4391 STOP_TIMER("iterative step")}
4392 #endif
4393
4394         if(best_change){
4395             int j= perm_scantable[ best_coeff ];
4396
4397             block[j] += best_change;
4398
4399             if(best_coeff > last_non_zero){
4400                 last_non_zero= best_coeff;
4401                 av_assert2(block[j]);
4402 #ifdef REFINE_STATS
4403 after_last++;
4404 #endif
4405             }else{
4406 #ifdef REFINE_STATS
4407 if(block[j]){
4408     if(block[j] - best_change){
4409         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4410             raise++;
4411         }else{
4412             lower++;
4413         }
4414     }else{
4415         from_zero++;
4416     }
4417 }else{
4418     to_zero++;
4419 }
4420 #endif
4421                 for(; last_non_zero>=start_i; last_non_zero--){
4422                     if(block[perm_scantable[last_non_zero]])
4423                         break;
4424                 }
4425             }
4426 #ifdef REFINE_STATS
4427 count++;
4428 if(256*256*256*64 % count == 0){
4429     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4430 }
4431 #endif
4432             run=0;
4433             rle_index=0;
4434             for(i=start_i; i<=last_non_zero; i++){
4435                 int j= perm_scantable[i];
4436                 const int level= block[j];
4437
4438                  if(level){
4439                      run_tab[rle_index++]=run;
4440                      run=0;
4441                  }else{
4442                      run++;
4443                  }
4444             }
4445
4446             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4447         }else{
4448             break;
4449         }
4450     }
4451 #ifdef REFINE_STATS
4452 if(last_non_zero>0){
4453 STOP_TIMER("iterative search")
4454 }
4455 }
4456 #endif
4457
4458     return last_non_zero;
4459 }
4460
4461 int ff_dct_quantize_c(MpegEncContext *s,
4462                         int16_t *block, int n,
4463                         int qscale, int *overflow)
4464 {
4465     int i, j, level, last_non_zero, q, start_i;
4466     const int *qmat;
4467     const uint8_t *scantable= s->intra_scantable.scantable;
4468     int bias;
4469     int max=0;
4470     unsigned int threshold1, threshold2;
4471
4472     s->fdsp.fdct(block);
4473
4474     if(s->dct_error_sum)
4475         s->denoise_dct(s, block);
4476
4477     if (s->mb_intra) {
4478         if (!s->h263_aic) {
4479             if (n < 4)
4480                 q = s->y_dc_scale;
4481             else
4482                 q = s->c_dc_scale;
4483             q = q << 3;
4484         } else
4485             /* For AIC we skip quant/dequant of INTRADC */
4486             q = 1 << 3;
4487
4488         /* note: block[0] is assumed to be positive */
4489         block[0] = (block[0] + (q >> 1)) / q;
4490         start_i = 1;
4491         last_non_zero = 0;
4492         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4493         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4494     } else {
4495         start_i = 0;
4496         last_non_zero = -1;
4497         qmat = s->q_inter_matrix[qscale];
4498         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4499     }
4500     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4501     threshold2= (threshold1<<1);
4502     for(i=63;i>=start_i;i--) {
4503         j = scantable[i];
4504         level = block[j] * qmat[j];
4505
4506         if(((unsigned)(level+threshold1))>threshold2){
4507             last_non_zero = i;
4508             break;
4509         }else{
4510             block[j]=0;
4511         }
4512     }
4513     for(i=start_i; i<=last_non_zero; i++) {
4514         j = scantable[i];
4515         level = block[j] * qmat[j];
4516
4517 //        if(   bias+level >= (1<<QMAT_SHIFT)
4518 //           || bias-level >= (1<<QMAT_SHIFT)){
4519         if(((unsigned)(level+threshold1))>threshold2){
4520             if(level>0){
4521                 level= (bias + level)>>QMAT_SHIFT;
4522                 block[j]= level;
4523             }else{
4524                 level= (bias - level)>>QMAT_SHIFT;
4525                 block[j]= -level;
4526             }
4527             max |=level;
4528         }else{
4529             block[j]=0;
4530         }
4531     }
4532     *overflow= s->max_qcoeff < max; //overflow might have happened
4533
4534     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4535     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4536         ff_block_permute(block, s->idsp.idct_permutation,
4537                          scantable, last_non_zero);
4538
4539     return last_non_zero;
4540 }
4541
4542 #define OFFSET(x) offsetof(MpegEncContext, x)
4543 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4544 static const AVOption h263_options[] = {
4545     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4546     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4547     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4548     FF_MPV_COMMON_OPTS
4549     { NULL },
4550 };
4551
4552 static const AVClass h263_class = {
4553     .class_name = "H.263 encoder",
4554     .item_name  = av_default_item_name,
4555     .option     = h263_options,
4556     .version    = LIBAVUTIL_VERSION_INT,
4557 };
4558
4559 AVCodec ff_h263_encoder = {
4560     .name           = "h263",
4561     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4562     .type           = AVMEDIA_TYPE_VIDEO,
4563     .id             = AV_CODEC_ID_H263,
4564     .priv_data_size = sizeof(MpegEncContext),
4565     .init           = ff_mpv_encode_init,
4566     .encode2        = ff_mpv_encode_picture,
4567     .close          = ff_mpv_encode_end,
4568     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4569     .priv_class     = &h263_class,
4570 };
4571
4572 static const AVOption h263p_options[] = {
4573     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4574     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4575     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4576     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4577     FF_MPV_COMMON_OPTS
4578     { NULL },
4579 };
4580 static const AVClass h263p_class = {
4581     .class_name = "H.263p encoder",
4582     .item_name  = av_default_item_name,
4583     .option     = h263p_options,
4584     .version    = LIBAVUTIL_VERSION_INT,
4585 };
4586
4587 AVCodec ff_h263p_encoder = {
4588     .name           = "h263p",
4589     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4590     .type           = AVMEDIA_TYPE_VIDEO,
4591     .id             = AV_CODEC_ID_H263P,
4592     .priv_data_size = sizeof(MpegEncContext),
4593     .init           = ff_mpv_encode_init,
4594     .encode2        = ff_mpv_encode_picture,
4595     .close          = ff_mpv_encode_end,
4596     .capabilities   = CODEC_CAP_SLICE_THREADS,
4597     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4598     .priv_class     = &h263p_class,
4599 };
4600
4601 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4602
4603 AVCodec ff_msmpeg4v2_encoder = {
4604     .name           = "msmpeg4v2",
4605     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4606     .type           = AVMEDIA_TYPE_VIDEO,
4607     .id             = AV_CODEC_ID_MSMPEG4V2,
4608     .priv_data_size = sizeof(MpegEncContext),
4609     .init           = ff_mpv_encode_init,
4610     .encode2        = ff_mpv_encode_picture,
4611     .close          = ff_mpv_encode_end,
4612     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4613     .priv_class     = &msmpeg4v2_class,
4614 };
4615
4616 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4617
4618 AVCodec ff_msmpeg4v3_encoder = {
4619     .name           = "msmpeg4",
4620     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4621     .type           = AVMEDIA_TYPE_VIDEO,
4622     .id             = AV_CODEC_ID_MSMPEG4V3,
4623     .priv_data_size = sizeof(MpegEncContext),
4624     .init           = ff_mpv_encode_init,
4625     .encode2        = ff_mpv_encode_picture,
4626     .close          = ff_mpv_encode_end,
4627     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4628     .priv_class     = &msmpeg4v3_class,
4629 };
4630
4631 FF_MPV_GENERIC_CLASS(wmv1)
4632
4633 AVCodec ff_wmv1_encoder = {
4634     .name           = "wmv1",
4635     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4636     .type           = AVMEDIA_TYPE_VIDEO,
4637     .id             = AV_CODEC_ID_WMV1,
4638     .priv_data_size = sizeof(MpegEncContext),
4639     .init           = ff_mpv_encode_init,
4640     .encode2        = ff_mpv_encode_picture,
4641     .close          = ff_mpv_encode_end,
4642     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4643     .priv_class     = &wmv1_class,
4644 };