git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "mpegvideodata.h"
  44 #include "h261.h"
  45 #include "h263.h"
  46 #include "mjpegenc_common.h"
  47 #include "mathops.h"
  48 #include "mpegutils.h"
  49 #include "mjpegenc.h"
  50 #include "msmpeg4.h"
  51 #include "pixblockdsp.h"
  52 #include "qpeldsp.h"
  53 #include "faandct.h"
  54 #include "thread.h"
  55 #include "aandcttab.h"
  56 #include "flv.h"
  57 #include "mpeg4video.h"
  58 #include "internal.h"
  59 #include "bytestream.h"
  60 #include "wmv2.h"
  61 #include <limits.h>
  62 #include "sp5x.h"
  63
  64 #define QUANT_BIAS_SHIFT 8
  65
  66 #define QMAT_SHIFT_MMX 16
  67 #define QMAT_SHIFT 21
  68
  69 static int encode_picture(MpegEncContext *s, int picture_number);
  70 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  71 static int sse_mb(MpegEncContext *s);
  72 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  73 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  74
  75 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  76 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  77
  78 const AVOption ff_mpv_generic_options[] = {
  79     FF_MPV_COMMON_OPTS
  80     { NULL },
  81 };
  82
  83 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  84                        uint16_t (*qmat16)[2][64],
  85                        const uint16_t *quant_matrix,
  86                        int bias, int qmin, int qmax, int intra)
  87 {
  88     FDCTDSPContext *fdsp = &s->fdsp;
  89     int qscale;
  90     int shift = 0;
  91
  92     for (qscale = qmin; qscale <= qmax; qscale++) {
  93         int i;
  94         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  95 #if CONFIG_FAANDCT
  96             fdsp->fdct == ff_faandct            ||
  97 #endif /* CONFIG_FAANDCT */
  98             fdsp->fdct == ff_jpeg_fdct_islow_10) {
  99             for (i = 0; i < 64; i++) {
 100                 const int j = s->idsp.idct_permutation[i];
 101                 int64_t den = (int64_t) qscale * quant_matrix[j];
 102                 /* 16 <= qscale * quant_matrix[i] <= 7905
 103                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 104                  *             19952 <=              x  <= 249205026
 105                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 106                  *           3444240 >= (1 << 36) / (x) >= 275 */
 107
 108                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 109             }
 110         } else if (fdsp->fdct == ff_fdct_ifast) {
 111             for (i = 0; i < 64; i++) {
 112                 const int j = s->idsp.idct_permutation[i];
 113                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 114                 /* 16 <= qscale * quant_matrix[i] <= 7905
 115                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 116                  *             19952 <=              x  <= 249205026
 117                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 118                  *           3444240 >= (1 << 36) / (x) >= 275 */
 119
 120                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 121             }
 122         } else {
 123             for (i = 0; i < 64; i++) {
 124                 const int j = s->idsp.idct_permutation[i];
 125                 int64_t den = (int64_t) qscale * quant_matrix[j];
 126                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 127                  * Assume x = qscale * quant_matrix[i]
 128                  * So             16 <=              x  <= 7905
 129                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 130                  * so          32768 >= (1 << 19) / (x) >= 67 */
 131                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 132                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 133                 //                    (qscale * quant_matrix[i]);
 134                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 135
 136                 if (qmat16[qscale][0][i] == 0 ||
 137                     qmat16[qscale][0][i] == 128 * 256)
 138                     qmat16[qscale][0][i] = 128 * 256 - 1;
 139                 qmat16[qscale][1][i] =
 140                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 141                                 qmat16[qscale][0][i]);
 142             }
 143         }
 144
 145         for (i = intra; i < 64; i++) {
 146             int64_t max = 8191;
 147             if (fdsp->fdct == ff_fdct_ifast) {
 148                 max = (8191LL * ff_aanscales[i]) >> 14;
 149             }
 150             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 151                 shift++;
 152             }
 153         }
 154     }
 155     if (shift) {
 156         av_log(NULL, AV_LOG_INFO,
 157                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 158                QMAT_SHIFT - shift);
 159     }
 160 }
 161
 162 static inline void update_qscale(MpegEncContext *s)
 163 {
 164     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 165                 (FF_LAMBDA_SHIFT + 7);
 166     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 167
 168     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 169                  FF_LAMBDA_SHIFT;
 170 }
 171
 172 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 173 {
 174     int i;
 175
 176     if (matrix) {
 177         put_bits(pb, 1, 1);
 178         for (i = 0; i < 64; i++) {
 179             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 180         }
 181     } else
 182         put_bits(pb, 1, 0);
 183 }
 184
 185 /**
 186  * init s->current_picture.qscale_table from s->lambda_table
 187  */
 188 void ff_init_qscale_tab(MpegEncContext *s)
 189 {
 190     int8_t * const qscale_table = s->current_picture.qscale_table;
 191     int i;
 192
 193     for (i = 0; i < s->mb_num; i++) {
 194         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 195         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 196         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 197                                                   s->avctx->qmax);
 198     }
 199 }
 200
 201 static void update_duplicate_context_after_me(MpegEncContext *dst,
 202                                               MpegEncContext *src)
 203 {
 204 #define COPY(a) dst->a= src->a
 205     COPY(pict_type);
 206     COPY(current_picture);
 207     COPY(f_code);
 208     COPY(b_code);
 209     COPY(qscale);
 210     COPY(lambda);
 211     COPY(lambda2);
 212     COPY(picture_in_gop_number);
 213     COPY(gop_picture_number);
 214     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 215     COPY(progressive_frame);    // FIXME don't set in encode_header
 216     COPY(partitioned_frame);    // FIXME don't set in encode_header
 217 #undef COPY
 218 }
 219
 220 /**
 221  * Set the given MpegEncContext to defaults for encoding.
 222  * the changed fields will not depend upon the prior state of the MpegEncContext.
 223  */
 224 static void mpv_encode_defaults(MpegEncContext *s)
 225 {
 226     int i;
 227     ff_mpv_common_defaults(s);
 228
 229     for (i = -16; i < 16; i++) {
 230         default_fcode_tab[i + MAX_MV] = 1;
 231     }
 232     s->me.mv_penalty = default_mv_penalty;
 233     s->fcode_tab     = default_fcode_tab;
 234
 235     s->input_picture_number  = 0;
 236     s->picture_in_gop_number = 0;
 237 }
 238
 239 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 240     if (ARCH_X86)
 241         ff_dct_encode_init_x86(s);
 242
 243     if (CONFIG_H263_ENCODER)
 244         ff_h263dsp_init(&s->h263dsp);
 245     if (!s->dct_quantize)
 246         s->dct_quantize = ff_dct_quantize_c;
 247     if (!s->denoise_dct)
 248         s->denoise_dct  = denoise_dct_c;
 249     s->fast_dct_quantize = s->dct_quantize;
 250     if (s->avctx->trellis)
 251         s->dct_quantize  = dct_quantize_trellis_c;
 252
 253     return 0;
 254 }
 255
 256 /* init video encoder */
 257 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 258 {
 259     MpegEncContext *s = avctx->priv_data;
 260     int i, ret, format_supported;
 261
 262     mpv_encode_defaults(s);
 263
 264     switch (avctx->codec_id) {
 265     case AV_CODEC_ID_MPEG2VIDEO:
 266         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 267             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 268             av_log(avctx, AV_LOG_ERROR,
 269                    "only YUV420 and YUV422 are supported\n");
 270             return -1;
 271         }
 272         break;
 273     case AV_CODEC_ID_MJPEG:
 274     case AV_CODEC_ID_AMV:
 275         format_supported = 0;
 276         /* JPEG color space */
 277         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 278             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 279             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 280             (avctx->color_range == AVCOL_RANGE_JPEG &&
 281              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 282               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 283               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 284             format_supported = 1;
 285         /* MPEG color space */
 286         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 287                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 288                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 289                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 290             format_supported = 1;
 291
 292         if (!format_supported) {
 293             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 294             return -1;
 295         }
 296         break;
 297     default:
 298         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 299             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 300             return -1;
 301         }
 302     }
 303
 304     switch (avctx->pix_fmt) {
 305     case AV_PIX_FMT_YUVJ444P:
 306     case AV_PIX_FMT_YUV444P:
 307         s->chroma_format = CHROMA_444;
 308         break;
 309     case AV_PIX_FMT_YUVJ422P:
 310     case AV_PIX_FMT_YUV422P:
 311         s->chroma_format = CHROMA_422;
 312         break;
 313     case AV_PIX_FMT_YUVJ420P:
 314     case AV_PIX_FMT_YUV420P:
 315     default:
 316         s->chroma_format = CHROMA_420;
 317         break;
 318     }
 319
 320     s->bit_rate = avctx->bit_rate;
 321     s->width    = avctx->width;
 322     s->height   = avctx->height;
 323     if (avctx->gop_size > 600 &&
 324         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 325         av_log(avctx, AV_LOG_WARNING,
 326                "keyframe interval too large!, reducing it from %d to %d\n",
 327                avctx->gop_size, 600);
 328         avctx->gop_size = 600;
 329     }
 330     s->gop_size     = avctx->gop_size;
 331     s->avctx        = avctx;
 332     if (avctx->max_b_frames > MAX_B_FRAMES) {
 333         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 334                "is %d.\n", MAX_B_FRAMES);
 335         avctx->max_b_frames = MAX_B_FRAMES;
 336     }
 337     s->max_b_frames = avctx->max_b_frames;
 338     s->codec_id     = avctx->codec->id;
 339     s->strict_std_compliance = avctx->strict_std_compliance;
 340     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 341     s->mpeg_quant         = avctx->mpeg_quant;
 342     s->rtp_mode           = !!avctx->rtp_payload_size;
 343     s->intra_dc_precision = avctx->intra_dc_precision;
 344
 345     // workaround some differences between how applications specify dc precision
 346     if (s->intra_dc_precision < 0) {
 347         s->intra_dc_precision += 8;
 348     } else if (s->intra_dc_precision >= 8)
 349         s->intra_dc_precision -= 8;
 350
 351     if (s->intra_dc_precision < 0) {
 352         av_log(avctx, AV_LOG_ERROR,
 353                 "intra dc precision must be positive, note some applications use"
 354                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 355         return AVERROR(EINVAL);
 356     }
 357
 358     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 359         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 360         return AVERROR(EINVAL);
 361     }
 362     s->user_specified_pts = AV_NOPTS_VALUE;
 363
 364     if (s->gop_size <= 1) {
 365         s->intra_only = 1;
 366         s->gop_size   = 12;
 367     } else {
 368         s->intra_only = 0;
 369     }
 370
 371     s->me_method = avctx->me_method;
 372
 373     /* Fixed QSCALE */
 374     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 375
 376 #if FF_API_MPV_OPT
 377     FF_DISABLE_DEPRECATION_WARNINGS
 378     if (avctx->border_masking != 0.0)
 379         s->border_masking = avctx->border_masking;
 380     FF_ENABLE_DEPRECATION_WARNINGS
 381 #endif
 382
 383     s->adaptive_quant = (s->avctx->lumi_masking ||
 384                          s->avctx->dark_masking ||
 385                          s->avctx->temporal_cplx_masking ||
 386                          s->avctx->spatial_cplx_masking  ||
 387                          s->avctx->p_masking      ||
 388                          s->border_masking ||
 389                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 390                         !s->fixed_qscale;
 391
 392     s->loop_filter = !!(s->avctx->flags & CODEC_FLAG_LOOP_FILTER);
 393
 394     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 395         switch(avctx->codec_id) {
 396         case AV_CODEC_ID_MPEG1VIDEO:
 397         case AV_CODEC_ID_MPEG2VIDEO:
 398             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 399             break;
 400         case AV_CODEC_ID_MPEG4:
 401         case AV_CODEC_ID_MSMPEG4V1:
 402         case AV_CODEC_ID_MSMPEG4V2:
 403         case AV_CODEC_ID_MSMPEG4V3:
 404             if       (avctx->rc_max_rate >= 15000000) {
 405                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 406             } else if(avctx->rc_max_rate >=  2000000) {
 407                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 408             } else if(avctx->rc_max_rate >=   384000) {
 409                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 410             } else
 411                 avctx->rc_buffer_size = 40;
 412             avctx->rc_buffer_size *= 16384;
 413             break;
 414         }
 415         if (avctx->rc_buffer_size) {
 416             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 417         }
 418     }
 419
 420     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 421         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 422         return -1;
 423     }
 424
 425     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 426         av_log(avctx, AV_LOG_INFO,
 427                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 428     }
 429
 430     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 431         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 432         return -1;
 433     }
 434
 435     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 436         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 437         return -1;
 438     }
 439
 440     if (avctx->rc_max_rate &&
 441         avctx->rc_max_rate == avctx->bit_rate &&
 442         avctx->rc_max_rate != avctx->rc_min_rate) {
 443         av_log(avctx, AV_LOG_INFO,
 444                "impossible bitrate constraints, this will fail\n");
 445     }
 446
 447     if (avctx->rc_buffer_size &&
 448         avctx->bit_rate * (int64_t)avctx->time_base.num >
 449             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 450         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 451         return -1;
 452     }
 453
 454     if (!s->fixed_qscale &&
 455         avctx->bit_rate * av_q2d(avctx->time_base) >
 456             avctx->bit_rate_tolerance) {
 457         av_log(avctx, AV_LOG_WARNING,
 458                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 459         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 460     }
 461
 462     if (s->avctx->rc_max_rate &&
 463         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 464         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 465          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 466         90000LL * (avctx->rc_buffer_size - 1) >
 467             s->avctx->rc_max_rate * 0xFFFFLL) {
 468         av_log(avctx, AV_LOG_INFO,
 469                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 470                "specified vbv buffer is too large for the given bitrate!\n");
 471     }
 472
 473     if ((s->avctx->flags & CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 474         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 475         s->codec_id != AV_CODEC_ID_FLV1) {
 476         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 477         return -1;
 478     }
 479
 480     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 481         av_log(avctx, AV_LOG_ERROR,
 482                "OBMC is only supported with simple mb decision\n");
 483         return -1;
 484     }
 485
 486     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 487         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 488         return -1;
 489     }
 490
 491     if (s->max_b_frames                    &&
 492         s->codec_id != AV_CODEC_ID_MPEG4      &&
 493         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 494         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 495         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 496         return -1;
 497     }
 498     if (s->max_b_frames < 0) {
 499         av_log(avctx, AV_LOG_ERROR,
 500                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 501         return -1;
 502     }
 503
 504     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 505          s->codec_id == AV_CODEC_ID_H263  ||
 506          s->codec_id == AV_CODEC_ID_H263P) &&
 507         (avctx->sample_aspect_ratio.num > 255 ||
 508          avctx->sample_aspect_ratio.den > 255)) {
 509         av_log(avctx, AV_LOG_WARNING,
 510                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 511                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 512         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 513                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 514     }
 515
 516     if ((s->codec_id == AV_CODEC_ID_H263  ||
 517          s->codec_id == AV_CODEC_ID_H263P) &&
 518         (avctx->width  > 2048 ||
 519          avctx->height > 1152 )) {
 520         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 521         return -1;
 522     }
 523     if ((s->codec_id == AV_CODEC_ID_H263  ||
 524          s->codec_id == AV_CODEC_ID_H263P) &&
 525         ((avctx->width &3) ||
 526          (avctx->height&3) )) {
 527         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 528         return -1;
 529     }
 530
 531     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 532         (avctx->width  > 4095 ||
 533          avctx->height > 4095 )) {
 534         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 535         return -1;
 536     }
 537
 538     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 539         (avctx->width  > 16383 ||
 540          avctx->height > 16383 )) {
 541         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 542         return -1;
 543     }
 544
 545     if (s->codec_id == AV_CODEC_ID_RV10 &&
 546         (avctx->width &15 ||
 547          avctx->height&15 )) {
 548         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 549         return AVERROR(EINVAL);
 550     }
 551
 552     if (s->codec_id == AV_CODEC_ID_RV20 &&
 553         (avctx->width &3 ||
 554          avctx->height&3 )) {
 555         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 556         return AVERROR(EINVAL);
 557     }
 558
 559     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 560          s->codec_id == AV_CODEC_ID_WMV2) &&
 561          avctx->width & 1) {
 562          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 563          return -1;
 564     }
 565
 566     if ((s->avctx->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 567         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 568         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 569         return -1;
 570     }
 571
 572     // FIXME mpeg2 uses that too
 573     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 574                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 575         av_log(avctx, AV_LOG_ERROR,
 576                "mpeg2 style quantization not supported by codec\n");
 577         return -1;
 578     }
 579
 580     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 581         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 582         return -1;
 583     }
 584
 585     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 586         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 587         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 588         return -1;
 589     }
 590
 591     if (s->avctx->scenechange_threshold < 1000000000 &&
 592         (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)) {
 593         av_log(avctx, AV_LOG_ERROR,
 594                "closed gop with scene change detection are not supported yet, "
 595                "set threshold to 1000000000\n");
 596         return -1;
 597     }
 598
 599     if (s->avctx->flags & CODEC_FLAG_LOW_DELAY) {
 600         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 601             av_log(avctx, AV_LOG_ERROR,
 602                   "low delay forcing is only available for mpeg2\n");
 603             return -1;
 604         }
 605         if (s->max_b_frames != 0) {
 606             av_log(avctx, AV_LOG_ERROR,
 607                    "b frames cannot be used with low delay\n");
 608             return -1;
 609         }
 610     }
 611
 612     if (s->q_scale_type == 1) {
 613         if (avctx->qmax > 12) {
 614             av_log(avctx, AV_LOG_ERROR,
 615                    "non linear quant only supports qmax <= 12 currently\n");
 616             return -1;
 617         }
 618     }
 619
 620     if (s->avctx->thread_count > 1         &&
 621         s->codec_id != AV_CODEC_ID_MPEG4      &&
 622         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 623         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 624         s->codec_id != AV_CODEC_ID_MJPEG      &&
 625         (s->codec_id != AV_CODEC_ID_H263P)) {
 626         av_log(avctx, AV_LOG_ERROR,
 627                "multi threaded encoding not supported by codec\n");
 628         return -1;
 629     }
 630
 631     if (s->avctx->thread_count < 1) {
 632         av_log(avctx, AV_LOG_ERROR,
 633                "automatic thread number detection not supported by codec, "
 634                "patch welcome\n");
 635         return -1;
 636     }
 637
 638     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 639         s->rtp_mode = 1;
 640
 641     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 642         s->h263_slice_structured = 1;
 643
 644     if (!avctx->time_base.den || !avctx->time_base.num) {
 645         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 646         return -1;
 647     }
 648
 649     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 650         av_log(avctx, AV_LOG_INFO,
 651                "notice: b_frame_strategy only affects the first pass\n");
 652         avctx->b_frame_strategy = 0;
 653     }
 654
 655     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 656     if (i > 1) {
 657         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 658         avctx->time_base.den /= i;
 659         avctx->time_base.num /= i;
 660         //return -1;
 661     }
 662
 663     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 664         // (a + x * 3 / 8) / x
 665         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 666         s->inter_quant_bias = 0;
 667     } else {
 668         s->intra_quant_bias = 0;
 669         // (a - x / 4) / x
 670         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 671     }
 672
 673     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 674         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 675         return AVERROR(EINVAL);
 676     }
 677
 678     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 679         s->intra_quant_bias = avctx->intra_quant_bias;
 680     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 681         s->inter_quant_bias = avctx->inter_quant_bias;
 682
 683     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 684
 685     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 686         s->avctx->time_base.den > (1 << 16) - 1) {
 687         av_log(avctx, AV_LOG_ERROR,
 688                "timebase %d/%d not supported by MPEG 4 standard, "
 689                "the maximum admitted value for the timebase denominator "
 690                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 691                (1 << 16) - 1);
 692         return -1;
 693     }
 694     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 695
 696     switch (avctx->codec->id) {
 697     case AV_CODEC_ID_MPEG1VIDEO:
 698         s->out_format = FMT_MPEG1;
 699         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
 700         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 701         break;
 702     case AV_CODEC_ID_MPEG2VIDEO:
 703         s->out_format = FMT_MPEG1;
 704         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
 705         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 706         s->rtp_mode   = 1;
 707         break;
 708     case AV_CODEC_ID_MJPEG:
 709     case AV_CODEC_ID_AMV:
 710         s->out_format = FMT_MJPEG;
 711         s->intra_only = 1; /* force intra only for jpeg */
 712         if (!CONFIG_MJPEG_ENCODER ||
 713             ff_mjpeg_encode_init(s) < 0)
 714             return -1;
 715         avctx->delay = 0;
 716         s->low_delay = 1;
 717         break;
 718     case AV_CODEC_ID_H261:
 719         if (!CONFIG_H261_ENCODER)
 720             return -1;
 721         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 722             av_log(avctx, AV_LOG_ERROR,
 723                    "The specified picture size of %dx%d is not valid for the "
 724                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 725                     s->width, s->height);
 726             return -1;
 727         }
 728         s->out_format = FMT_H261;
 729         avctx->delay  = 0;
 730         s->low_delay  = 1;
 731         s->rtp_mode   = 0; /* Sliced encoding not supported */
 732         break;
 733     case AV_CODEC_ID_H263:
 734         if (!CONFIG_H263_ENCODER)
 735             return -1;
 736         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 737                              s->width, s->height) == 8) {
 738             av_log(avctx, AV_LOG_ERROR,
 739                    "The specified picture size of %dx%d is not valid for "
 740                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 741                    "352x288, 704x576, and 1408x1152. "
 742                    "Try H.263+.\n", s->width, s->height);
 743             return -1;
 744         }
 745         s->out_format = FMT_H263;
 746         avctx->delay  = 0;
 747         s->low_delay  = 1;
 748         break;
 749     case AV_CODEC_ID_H263P:
 750         s->out_format = FMT_H263;
 751         s->h263_plus  = 1;
 752         /* Fx */
 753         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 754         s->modified_quant  = s->h263_aic;
 755         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 756         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 757
 758         /* /Fx */
 759         /* These are just to be sure */
 760         avctx->delay = 0;
 761         s->low_delay = 1;
 762         break;
 763     case AV_CODEC_ID_FLV1:
 764         s->out_format      = FMT_H263;
 765         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 766         s->unrestricted_mv = 1;
 767         s->rtp_mode  = 0; /* don't allow GOB */
 768         avctx->delay = 0;
 769         s->low_delay = 1;
 770         break;
 771     case AV_CODEC_ID_RV10:
 772         s->out_format = FMT_H263;
 773         avctx->delay  = 0;
 774         s->low_delay  = 1;
 775         break;
 776     case AV_CODEC_ID_RV20:
 777         s->out_format      = FMT_H263;
 778         avctx->delay       = 0;
 779         s->low_delay       = 1;
 780         s->modified_quant  = 1;
 781         s->h263_aic        = 1;
 782         s->h263_plus       = 1;
 783         s->loop_filter     = 1;
 784         s->unrestricted_mv = 0;
 785         break;
 786     case AV_CODEC_ID_MPEG4:
 787         s->out_format      = FMT_H263;
 788         s->h263_pred       = 1;
 789         s->unrestricted_mv = 1;
 790         s->low_delay       = s->max_b_frames ? 0 : 1;
 791         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 792         break;
 793     case AV_CODEC_ID_MSMPEG4V2:
 794         s->out_format      = FMT_H263;
 795         s->h263_pred       = 1;
 796         s->unrestricted_mv = 1;
 797         s->msmpeg4_version = 2;
 798         avctx->delay       = 0;
 799         s->low_delay       = 1;
 800         break;
 801     case AV_CODEC_ID_MSMPEG4V3:
 802         s->out_format        = FMT_H263;
 803         s->h263_pred         = 1;
 804         s->unrestricted_mv   = 1;
 805         s->msmpeg4_version   = 3;
 806         s->flipflop_rounding = 1;
 807         avctx->delay         = 0;
 808         s->low_delay         = 1;
 809         break;
 810     case AV_CODEC_ID_WMV1:
 811         s->out_format        = FMT_H263;
 812         s->h263_pred         = 1;
 813         s->unrestricted_mv   = 1;
 814         s->msmpeg4_version   = 4;
 815         s->flipflop_rounding = 1;
 816         avctx->delay         = 0;
 817         s->low_delay         = 1;
 818         break;
 819     case AV_CODEC_ID_WMV2:
 820         s->out_format        = FMT_H263;
 821         s->h263_pred         = 1;
 822         s->unrestricted_mv   = 1;
 823         s->msmpeg4_version   = 5;
 824         s->flipflop_rounding = 1;
 825         avctx->delay         = 0;
 826         s->low_delay         = 1;
 827         break;
 828     default:
 829         return -1;
 830     }
 831
 832     avctx->has_b_frames = !s->low_delay;
 833
 834     s->encoding = 1;
 835
 836     s->progressive_frame    =
 837     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 838                                                 CODEC_FLAG_INTERLACED_ME) ||
 839                                 s->alternate_scan);
 840
 841     /* init */
 842     ff_mpv_idct_init(s);
 843     if (ff_mpv_common_init(s) < 0)
 844         return -1;
 845
 846     ff_fdctdsp_init(&s->fdsp, avctx);
 847     ff_me_cmp_init(&s->mecc, avctx);
 848     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 849     ff_pixblockdsp_init(&s->pdsp, avctx);
 850     ff_qpeldsp_init(&s->qdsp);
 851
 852     s->avctx->coded_frame = s->current_picture.f;
 853
 854     if (s->msmpeg4_version) {
 855         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 856                           2 * 2 * (MAX_LEVEL + 1) *
 857                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 858     }
 859     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 860
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 866     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 867     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 868                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 869     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 870                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 871
 872     if (s->avctx->noise_reduction) {
 873         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 874                           2 * 64 * sizeof(uint16_t), fail);
 875     }
 876
 877     ff_dct_encode_init(s);
 878
 879     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 880         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 881
 882     s->quant_precision = 5;
 883
 884     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 885     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 886
 887     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 888         ff_h261_encode_init(s);
 889     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 890         ff_h263_encode_init(s);
 891     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 892         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 893             return ret;
 894     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 895         && s->out_format == FMT_MPEG1)
 896         ff_mpeg1_encode_init(s);
 897
 898     /* init q matrix */
 899     for (i = 0; i < 64; i++) {
 900         int j = s->idsp.idct_permutation[i];
 901         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 902             s->mpeg_quant) {
 903             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 904             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 905         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 906             s->intra_matrix[j] =
 907             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 908         } else {
 909             /* mpeg1/2 */
 910             s->chroma_intra_matrix[j] =
 911             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 912             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 913         }
 914         if (s->avctx->intra_matrix)
 915             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 916         if (s->avctx->inter_matrix)
 917             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 918     }
 919
 920     /* precompute matrix */
 921     /* for mjpeg, we do include qscale in the matrix */
 922     if (s->out_format != FMT_MJPEG) {
 923         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 924                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 925                           31, 1);
 926         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 927                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 928                           31, 0);
 929     }
 930
 931     if (ff_rate_control_init(s) < 0)
 932         return -1;
 933
 934 #if FF_API_ERROR_RATE
 935     FF_DISABLE_DEPRECATION_WARNINGS
 936     if (avctx->error_rate)
 937         s->error_rate = avctx->error_rate;
 938     FF_ENABLE_DEPRECATION_WARNINGS;
 939 #endif
 940
 941 #if FF_API_NORMALIZE_AQP
 942     FF_DISABLE_DEPRECATION_WARNINGS
 943     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 944         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 945     FF_ENABLE_DEPRECATION_WARNINGS;
 946 #endif
 947
 948 #if FF_API_MV0
 949     FF_DISABLE_DEPRECATION_WARNINGS
 950     if (avctx->flags & CODEC_FLAG_MV0)
 951         s->mpv_flags |= FF_MPV_FLAG_MV0;
 952     FF_ENABLE_DEPRECATION_WARNINGS
 953 #endif
 954
 955 #if FF_API_MPV_OPT
 956     FF_DISABLE_DEPRECATION_WARNINGS
 957     if (avctx->rc_qsquish != 0.0)
 958         s->rc_qsquish = avctx->rc_qsquish;
 959     if (avctx->rc_qmod_amp != 0.0)
 960         s->rc_qmod_amp = avctx->rc_qmod_amp;
 961     if (avctx->rc_qmod_freq)
 962         s->rc_qmod_freq = avctx->rc_qmod_freq;
 963     if (avctx->rc_buffer_aggressivity != 1.0)
 964         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 965     if (avctx->rc_initial_cplx != 0.0)
 966         s->rc_initial_cplx = avctx->rc_initial_cplx;
 967     if (avctx->lmin)
 968         s->lmin = avctx->lmin;
 969     if (avctx->lmax)
 970         s->lmax = avctx->lmax;
 971
 972     if (avctx->rc_eq) {
 973         av_freep(&s->rc_eq);
 974         s->rc_eq = av_strdup(avctx->rc_eq);
 975         if (!s->rc_eq)
 976             return AVERROR(ENOMEM);
 977     }
 978     FF_ENABLE_DEPRECATION_WARNINGS
 979 #endif
 980
 981     if (avctx->b_frame_strategy == 2) {
 982         for (i = 0; i < s->max_b_frames + 2; i++) {
 983             s->tmp_frames[i] = av_frame_alloc();
 984             if (!s->tmp_frames[i])
 985                 return AVERROR(ENOMEM);
 986
 987             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 988             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 989             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 990
 991             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 992             if (ret < 0)
 993                 return ret;
 994         }
 995     }
 996
 997     return 0;
 998 fail:
 999     ff_mpv_encode_end(avctx);
1000     return AVERROR_UNKNOWN;
1001 }
1002
1003 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1004 {
1005     MpegEncContext *s = avctx->priv_data;
1006     int i;
1007
1008     ff_rate_control_uninit(s);
1009
1010     ff_mpv_common_end(s);
1011     if (CONFIG_MJPEG_ENCODER &&
1012         s->out_format == FMT_MJPEG)
1013         ff_mjpeg_encode_close(s);
1014
1015     av_freep(&avctx->extradata);
1016
1017     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1018         av_frame_free(&s->tmp_frames[i]);
1019
1020     ff_free_picture_tables(&s->new_picture);
1021     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1022
1023     av_freep(&s->avctx->stats_out);
1024     av_freep(&s->ac_stats);
1025
1026     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1027     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1028     s->q_chroma_intra_matrix=   NULL;
1029     s->q_chroma_intra_matrix16= NULL;
1030     av_freep(&s->q_intra_matrix);
1031     av_freep(&s->q_inter_matrix);
1032     av_freep(&s->q_intra_matrix16);
1033     av_freep(&s->q_inter_matrix16);
1034     av_freep(&s->input_picture);
1035     av_freep(&s->reordered_input_picture);
1036     av_freep(&s->dct_offset);
1037
1038     return 0;
1039 }
1040
1041 static int get_sae(uint8_t *src, int ref, int stride)
1042 {
1043     int x,y;
1044     int acc = 0;
1045
1046     for (y = 0; y < 16; y++) {
1047         for (x = 0; x < 16; x++) {
1048             acc += FFABS(src[x + y * stride] - ref);
1049         }
1050     }
1051
1052     return acc;
1053 }
1054
1055 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1056                            uint8_t *ref, int stride)
1057 {
1058     int x, y, w, h;
1059     int acc = 0;
1060
1061     w = s->width  & ~15;
1062     h = s->height & ~15;
1063
1064     for (y = 0; y < h; y += 16) {
1065         for (x = 0; x < w; x += 16) {
1066             int offset = x + y * stride;
1067             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1068                                       stride, 16);
1069             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1070             int sae  = get_sae(src + offset, mean, stride);
1071
1072             acc += sae + 500 < sad;
1073         }
1074     }
1075     return acc;
1076 }
1077
1078
1079 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1080 {
1081     Picture *pic = NULL;
1082     int64_t pts;
1083     int i, display_picture_number = 0, ret;
1084     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1085                                                  (s->low_delay ? 0 : 1);
1086     int direct = 1;
1087
1088     if (pic_arg) {
1089         pts = pic_arg->pts;
1090         display_picture_number = s->input_picture_number++;
1091
1092         if (pts != AV_NOPTS_VALUE) {
1093             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1094                 int64_t last = s->user_specified_pts;
1095
1096                 if (pts <= last) {
1097                     av_log(s->avctx, AV_LOG_ERROR,
1098                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1099                            pts, last);
1100                     return AVERROR(EINVAL);
1101                 }
1102
1103                 if (!s->low_delay && display_picture_number == 1)
1104                     s->dts_delta = pts - last;
1105             }
1106             s->user_specified_pts = pts;
1107         } else {
1108             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1109                 s->user_specified_pts =
1110                 pts = s->user_specified_pts + 1;
1111                 av_log(s->avctx, AV_LOG_INFO,
1112                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1113                        pts);
1114             } else {
1115                 pts = display_picture_number;
1116             }
1117         }
1118     }
1119
1120     if (pic_arg) {
1121         if (!pic_arg->buf[0] ||
1122             pic_arg->linesize[0] != s->linesize ||
1123             pic_arg->linesize[1] != s->uvlinesize ||
1124             pic_arg->linesize[2] != s->uvlinesize)
1125             direct = 0;
1126         if ((s->width & 15) || (s->height & 15))
1127             direct = 0;
1128         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1129             direct = 0;
1130         if (s->linesize & (STRIDE_ALIGN-1))
1131             direct = 0;
1132
1133         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1134                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1135
1136         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1137         if (i < 0)
1138             return i;
1139
1140         pic = &s->picture[i];
1141         pic->reference = 3;
1142
1143         if (direct) {
1144             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1145                 return ret;
1146         }
1147         ret = ff_alloc_picture(s, pic, direct);
1148         if (ret < 0)
1149             return ret;
1150
1151         if (!direct) {
1152             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1153                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1154                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1155                 // empty
1156             } else {
1157                 int h_chroma_shift, v_chroma_shift;
1158                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1159                                                  &h_chroma_shift,
1160                                                  &v_chroma_shift);
1161
1162                 for (i = 0; i < 3; i++) {
1163                     int src_stride = pic_arg->linesize[i];
1164                     int dst_stride = i ? s->uvlinesize : s->linesize;
1165                     int h_shift = i ? h_chroma_shift : 0;
1166                     int v_shift = i ? v_chroma_shift : 0;
1167                     int w = s->width  >> h_shift;
1168                     int h = s->height >> v_shift;
1169                     uint8_t *src = pic_arg->data[i];
1170                     uint8_t *dst = pic->f->data[i];
1171                     int vpad = 16;
1172
1173                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1174                         && !s->progressive_sequence
1175                         && FFALIGN(s->height, 32) - s->height > 16)
1176                         vpad = 32;
1177
1178                     if (!s->avctx->rc_buffer_size)
1179                         dst += INPLACE_OFFSET;
1180
1181                     if (src_stride == dst_stride)
1182                         memcpy(dst, src, src_stride * h);
1183                     else {
1184                         int h2 = h;
1185                         uint8_t *dst2 = dst;
1186                         while (h2--) {
1187                             memcpy(dst2, src, w);
1188                             dst2 += dst_stride;
1189                             src += src_stride;
1190                         }
1191                     }
1192                     if ((s->width & 15) || (s->height & (vpad-1))) {
1193                         s->mpvencdsp.draw_edges(dst, dst_stride,
1194                                                 w, h,
1195                                                 16 >> h_shift,
1196                                                 vpad >> v_shift,
1197                                                 EDGE_BOTTOM);
1198                     }
1199                 }
1200             }
1201         }
1202         ret = av_frame_copy_props(pic->f, pic_arg);
1203         if (ret < 0)
1204             return ret;
1205
1206         pic->f->display_picture_number = display_picture_number;
1207         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1208     }
1209
1210     /* shift buffer entries */
1211     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1212         s->input_picture[i - 1] = s->input_picture[i];
1213
1214     s->input_picture[encoding_delay] = (Picture*) pic;
1215
1216     return 0;
1217 }
1218
1219 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1220 {
1221     int x, y, plane;
1222     int score = 0;
1223     int64_t score64 = 0;
1224
1225     for (plane = 0; plane < 3; plane++) {
1226         const int stride = p->f->linesize[plane];
1227         const int bw = plane ? 1 : 2;
1228         for (y = 0; y < s->mb_height * bw; y++) {
1229             for (x = 0; x < s->mb_width * bw; x++) {
1230                 int off = p->shared ? 0 : 16;
1231                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1232                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1233                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1234
1235                 switch (FFABS(s->avctx->frame_skip_exp)) {
1236                 case 0: score    =  FFMAX(score, v);          break;
1237                 case 1: score   += FFABS(v);                  break;
1238                 case 2: score64 += v * (int64_t)v;                       break;
1239                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1240                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1241                 }
1242             }
1243         }
1244     }
1245     emms_c();
1246
1247     if (score)
1248         score64 = score;
1249     if (s->avctx->frame_skip_exp < 0)
1250         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1251                       -1.0/s->avctx->frame_skip_exp);
1252
1253     if (score64 < s->avctx->frame_skip_threshold)
1254         return 1;
1255     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1256         return 1;
1257     return 0;
1258 }
1259
1260 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1261 {
1262     AVPacket pkt = { 0 };
1263     int ret, got_output;
1264
1265     av_init_packet(&pkt);
1266     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1267     if (ret < 0)
1268         return ret;
1269
1270     ret = pkt.size;
1271     av_free_packet(&pkt);
1272     return ret;
1273 }
1274
1275 static int estimate_best_b_count(MpegEncContext *s)
1276 {
1277     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1278     AVCodecContext *c = avcodec_alloc_context3(NULL);
1279     const int scale = s->avctx->brd_scale;
1280     int i, j, out_size, p_lambda, b_lambda, lambda2;
1281     int64_t best_rd  = INT64_MAX;
1282     int best_b_count = -1;
1283
1284     if (!c)
1285         return AVERROR(ENOMEM);
1286     av_assert0(scale >= 0 && scale <= 3);
1287
1288     //emms_c();
1289     //s->next_picture_ptr->quality;
1290     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1291     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1292     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1293     if (!b_lambda) // FIXME we should do this somewhere else
1294         b_lambda = p_lambda;
1295     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1296                FF_LAMBDA_SHIFT;
1297
1298     c->width        = s->width  >> scale;
1299     c->height       = s->height >> scale;
1300     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1301     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1302     c->mb_decision  = s->avctx->mb_decision;
1303     c->me_cmp       = s->avctx->me_cmp;
1304     c->mb_cmp       = s->avctx->mb_cmp;
1305     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1306     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1307     c->time_base    = s->avctx->time_base;
1308     c->max_b_frames = s->max_b_frames;
1309
1310     if (avcodec_open2(c, codec, NULL) < 0)
1311         return -1;
1312
1313     for (i = 0; i < s->max_b_frames + 2; i++) {
1314         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1315                                                 s->next_picture_ptr;
1316         uint8_t *data[4];
1317
1318         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1319             pre_input = *pre_input_ptr;
1320             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1321
1322             if (!pre_input.shared && i) {
1323                 data[0] += INPLACE_OFFSET;
1324                 data[1] += INPLACE_OFFSET;
1325                 data[2] += INPLACE_OFFSET;
1326             }
1327
1328             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1329                                        s->tmp_frames[i]->linesize[0],
1330                                        data[0],
1331                                        pre_input.f->linesize[0],
1332                                        c->width, c->height);
1333             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1334                                        s->tmp_frames[i]->linesize[1],
1335                                        data[1],
1336                                        pre_input.f->linesize[1],
1337                                        c->width >> 1, c->height >> 1);
1338             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1339                                        s->tmp_frames[i]->linesize[2],
1340                                        data[2],
1341                                        pre_input.f->linesize[2],
1342                                        c->width >> 1, c->height >> 1);
1343         }
1344     }
1345
1346     for (j = 0; j < s->max_b_frames + 1; j++) {
1347         int64_t rd = 0;
1348
1349         if (!s->input_picture[j])
1350             break;
1351
1352         c->error[0] = c->error[1] = c->error[2] = 0;
1353
1354         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1355         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1356
1357         out_size = encode_frame(c, s->tmp_frames[0]);
1358
1359         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1360
1361         for (i = 0; i < s->max_b_frames + 1; i++) {
1362             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1363
1364             s->tmp_frames[i + 1]->pict_type = is_p ?
1365                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1366             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1367
1368             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1369
1370             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1371         }
1372
1373         /* get the delayed frames */
1374         while (out_size) {
1375             out_size = encode_frame(c, NULL);
1376             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1377         }
1378
1379         rd += c->error[0] + c->error[1] + c->error[2];
1380
1381         if (rd < best_rd) {
1382             best_rd = rd;
1383             best_b_count = j;
1384         }
1385     }
1386
1387     avcodec_close(c);
1388     av_freep(&c);
1389
1390     return best_b_count;
1391 }
1392
1393 static int select_input_picture(MpegEncContext *s)
1394 {
1395     int i, ret;
1396
1397     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1398         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1399     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1400
1401     /* set next picture type & ordering */
1402     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1403         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1404             if (s->picture_in_gop_number < s->gop_size &&
1405                 s->next_picture_ptr &&
1406                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1407                 // FIXME check that te gop check above is +-1 correct
1408                 av_frame_unref(s->input_picture[0]->f);
1409
1410                 ff_vbv_update(s, 0);
1411
1412                 goto no_output_pic;
1413             }
1414         }
1415
1416         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1417             !s->next_picture_ptr || s->intra_only) {
1418             s->reordered_input_picture[0] = s->input_picture[0];
1419             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1420             s->reordered_input_picture[0]->f->coded_picture_number =
1421                 s->coded_picture_number++;
1422         } else {
1423             int b_frames;
1424
1425             if (s->avctx->flags & CODEC_FLAG_PASS2) {
1426                 for (i = 0; i < s->max_b_frames + 1; i++) {
1427                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1428
1429                     if (pict_num >= s->rc_context.num_entries)
1430                         break;
1431                     if (!s->input_picture[i]) {
1432                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1433                         break;
1434                     }
1435
1436                     s->input_picture[i]->f->pict_type =
1437                         s->rc_context.entry[pict_num].new_pict_type;
1438                 }
1439             }
1440
1441             if (s->avctx->b_frame_strategy == 0) {
1442                 b_frames = s->max_b_frames;
1443                 while (b_frames && !s->input_picture[b_frames])
1444                     b_frames--;
1445             } else if (s->avctx->b_frame_strategy == 1) {
1446                 for (i = 1; i < s->max_b_frames + 1; i++) {
1447                     if (s->input_picture[i] &&
1448                         s->input_picture[i]->b_frame_score == 0) {
1449                         s->input_picture[i]->b_frame_score =
1450                             get_intra_count(s,
1451                                             s->input_picture[i    ]->f->data[0],
1452                                             s->input_picture[i - 1]->f->data[0],
1453                                             s->linesize) + 1;
1454                     }
1455                 }
1456                 for (i = 0; i < s->max_b_frames + 1; i++) {
1457                     if (!s->input_picture[i] ||
1458                         s->input_picture[i]->b_frame_score - 1 >
1459                             s->mb_num / s->avctx->b_sensitivity)
1460                         break;
1461                 }
1462
1463                 b_frames = FFMAX(0, i - 1);
1464
1465                 /* reset scores */
1466                 for (i = 0; i < b_frames + 1; i++) {
1467                     s->input_picture[i]->b_frame_score = 0;
1468                 }
1469             } else if (s->avctx->b_frame_strategy == 2) {
1470                 b_frames = estimate_best_b_count(s);
1471             } else {
1472                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1473                 b_frames = 0;
1474             }
1475
1476             emms_c();
1477
1478             for (i = b_frames - 1; i >= 0; i--) {
1479                 int type = s->input_picture[i]->f->pict_type;
1480                 if (type && type != AV_PICTURE_TYPE_B)
1481                     b_frames = i;
1482             }
1483             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1484                 b_frames == s->max_b_frames) {
1485                 av_log(s->avctx, AV_LOG_ERROR,
1486                        "warning, too many b frames in a row\n");
1487             }
1488
1489             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1490                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1491                     s->gop_size > s->picture_in_gop_number) {
1492                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1493                 } else {
1494                     if (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)
1495                         b_frames = 0;
1496                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1497                 }
1498             }
1499
1500             if ((s->avctx->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1501                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1502                 b_frames--;
1503
1504             s->reordered_input_picture[0] = s->input_picture[b_frames];
1505             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1506                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1507             s->reordered_input_picture[0]->f->coded_picture_number =
1508                 s->coded_picture_number++;
1509             for (i = 0; i < b_frames; i++) {
1510                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1511                 s->reordered_input_picture[i + 1]->f->pict_type =
1512                     AV_PICTURE_TYPE_B;
1513                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1514                     s->coded_picture_number++;
1515             }
1516         }
1517     }
1518 no_output_pic:
1519     if (s->reordered_input_picture[0]) {
1520         s->reordered_input_picture[0]->reference =
1521            s->reordered_input_picture[0]->f->pict_type !=
1522                AV_PICTURE_TYPE_B ? 3 : 0;
1523
1524         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1525         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1526             return ret;
1527
1528         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1529             // input is a shared pix, so we can't modifiy it -> alloc a new
1530             // one & ensure that the shared one is reuseable
1531
1532             Picture *pic;
1533             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1534             if (i < 0)
1535                 return i;
1536             pic = &s->picture[i];
1537
1538             pic->reference = s->reordered_input_picture[0]->reference;
1539             if (ff_alloc_picture(s, pic, 0) < 0) {
1540                 return -1;
1541             }
1542
1543             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1544             if (ret < 0)
1545                 return ret;
1546
1547             /* mark us unused / free shared pic */
1548             av_frame_unref(s->reordered_input_picture[0]->f);
1549             s->reordered_input_picture[0]->shared = 0;
1550
1551             s->current_picture_ptr = pic;
1552         } else {
1553             // input is not a shared pix -> reuse buffer for current_pix
1554             s->current_picture_ptr = s->reordered_input_picture[0];
1555             for (i = 0; i < 4; i++) {
1556                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1557             }
1558         }
1559         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1560         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1561                                        s->current_picture_ptr)) < 0)
1562             return ret;
1563
1564         s->picture_number = s->new_picture.f->display_picture_number;
1565     } else {
1566         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1567     }
1568     return 0;
1569 }
1570
1571 static void frame_end(MpegEncContext *s)
1572 {
1573     if (s->unrestricted_mv &&
1574         s->current_picture.reference &&
1575         !s->intra_only) {
1576         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1577         int hshift = desc->log2_chroma_w;
1578         int vshift = desc->log2_chroma_h;
1579         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1580                                 s->current_picture.f->linesize[0],
1581                                 s->h_edge_pos, s->v_edge_pos,
1582                                 EDGE_WIDTH, EDGE_WIDTH,
1583                                 EDGE_TOP | EDGE_BOTTOM);
1584         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1585                                 s->current_picture.f->linesize[1],
1586                                 s->h_edge_pos >> hshift,
1587                                 s->v_edge_pos >> vshift,
1588                                 EDGE_WIDTH >> hshift,
1589                                 EDGE_WIDTH >> vshift,
1590                                 EDGE_TOP | EDGE_BOTTOM);
1591         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1592                                 s->current_picture.f->linesize[2],
1593                                 s->h_edge_pos >> hshift,
1594                                 s->v_edge_pos >> vshift,
1595                                 EDGE_WIDTH >> hshift,
1596                                 EDGE_WIDTH >> vshift,
1597                                 EDGE_TOP | EDGE_BOTTOM);
1598     }
1599
1600     emms_c();
1601
1602     s->last_pict_type                 = s->pict_type;
1603     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1604     if (s->pict_type!= AV_PICTURE_TYPE_B)
1605         s->last_non_b_pict_type = s->pict_type;
1606
1607     s->avctx->coded_frame = s->current_picture_ptr->f;
1608
1609 }
1610
1611 static void update_noise_reduction(MpegEncContext *s)
1612 {
1613     int intra, i;
1614
1615     for (intra = 0; intra < 2; intra++) {
1616         if (s->dct_count[intra] > (1 << 16)) {
1617             for (i = 0; i < 64; i++) {
1618                 s->dct_error_sum[intra][i] >>= 1;
1619             }
1620             s->dct_count[intra] >>= 1;
1621         }
1622
1623         for (i = 0; i < 64; i++) {
1624             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1625                                        s->dct_count[intra] +
1626                                        s->dct_error_sum[intra][i] / 2) /
1627                                       (s->dct_error_sum[intra][i] + 1);
1628         }
1629     }
1630 }
1631
1632 static int frame_start(MpegEncContext *s)
1633 {
1634     int ret;
1635
1636     /* mark & release old frames */
1637     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1638         s->last_picture_ptr != s->next_picture_ptr &&
1639         s->last_picture_ptr->f->buf[0]) {
1640         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1641     }
1642
1643     s->current_picture_ptr->f->pict_type = s->pict_type;
1644     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1645
1646     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1647     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1648                                    s->current_picture_ptr)) < 0)
1649         return ret;
1650
1651     if (s->pict_type != AV_PICTURE_TYPE_B) {
1652         s->last_picture_ptr = s->next_picture_ptr;
1653         if (!s->droppable)
1654             s->next_picture_ptr = s->current_picture_ptr;
1655     }
1656
1657     if (s->last_picture_ptr) {
1658         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1659         if (s->last_picture_ptr->f->buf[0] &&
1660             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1661                                        s->last_picture_ptr)) < 0)
1662             return ret;
1663     }
1664     if (s->next_picture_ptr) {
1665         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1666         if (s->next_picture_ptr->f->buf[0] &&
1667             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1668                                        s->next_picture_ptr)) < 0)
1669             return ret;
1670     }
1671
1672     if (s->picture_structure!= PICT_FRAME) {
1673         int i;
1674         for (i = 0; i < 4; i++) {
1675             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1676                 s->current_picture.f->data[i] +=
1677                     s->current_picture.f->linesize[i];
1678             }
1679             s->current_picture.f->linesize[i] *= 2;
1680             s->last_picture.f->linesize[i]    *= 2;
1681             s->next_picture.f->linesize[i]    *= 2;
1682         }
1683     }
1684
1685     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1686         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1687         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1688     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1689         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1690         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1691     } else {
1692         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1693         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1694     }
1695
1696     if (s->dct_error_sum) {
1697         av_assert2(s->avctx->noise_reduction && s->encoding);
1698         update_noise_reduction(s);
1699     }
1700
1701     return 0;
1702 }
1703
1704 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1705                           const AVFrame *pic_arg, int *got_packet)
1706 {
1707     MpegEncContext *s = avctx->priv_data;
1708     int i, stuffing_count, ret;
1709     int context_count = s->slice_context_count;
1710
1711     s->picture_in_gop_number++;
1712
1713     if (load_input_picture(s, pic_arg) < 0)
1714         return -1;
1715
1716     if (select_input_picture(s) < 0) {
1717         return -1;
1718     }
1719
1720     /* output? */
1721     if (s->new_picture.f->data[0]) {
1722         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1723         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1724                                               :
1725                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1726         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1727             return ret;
1728         if (s->mb_info) {
1729             s->mb_info_ptr = av_packet_new_side_data(pkt,
1730                                  AV_PKT_DATA_H263_MB_INFO,
1731                                  s->mb_width*s->mb_height*12);
1732             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1733         }
1734
1735         for (i = 0; i < context_count; i++) {
1736             int start_y = s->thread_context[i]->start_mb_y;
1737             int   end_y = s->thread_context[i]->  end_mb_y;
1738             int h       = s->mb_height;
1739             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1740             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1741
1742             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1743         }
1744
1745         s->pict_type = s->new_picture.f->pict_type;
1746         //emms_c();
1747         ret = frame_start(s);
1748         if (ret < 0)
1749             return ret;
1750 vbv_retry:
1751         ret = encode_picture(s, s->picture_number);
1752         if (growing_buffer) {
1753             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1754             pkt->data = s->pb.buf;
1755             pkt->size = avctx->internal->byte_buffer_size;
1756         }
1757         if (ret < 0)
1758             return -1;
1759
1760         avctx->header_bits = s->header_bits;
1761         avctx->mv_bits     = s->mv_bits;
1762         avctx->misc_bits   = s->misc_bits;
1763         avctx->i_tex_bits  = s->i_tex_bits;
1764         avctx->p_tex_bits  = s->p_tex_bits;
1765         avctx->i_count     = s->i_count;
1766         // FIXME f/b_count in avctx
1767         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1768         avctx->skip_count  = s->skip_count;
1769
1770         frame_end(s);
1771
1772         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1773             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1774
1775         if (avctx->rc_buffer_size) {
1776             RateControlContext *rcc = &s->rc_context;
1777             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1778
1779             if (put_bits_count(&s->pb) > max_size &&
1780                 s->lambda < s->lmax) {
1781                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1782                                        (s->qscale + 1) / s->qscale);
1783                 if (s->adaptive_quant) {
1784                     int i;
1785                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1786                         s->lambda_table[i] =
1787                             FFMAX(s->lambda_table[i] + 1,
1788                                   s->lambda_table[i] * (s->qscale + 1) /
1789                                   s->qscale);
1790                 }
1791                 s->mb_skipped = 0;        // done in frame_start()
1792                 // done in encode_picture() so we must undo it
1793                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1794                     if (s->flipflop_rounding          ||
1795                         s->codec_id == AV_CODEC_ID_H263P ||
1796                         s->codec_id == AV_CODEC_ID_MPEG4)
1797                         s->no_rounding ^= 1;
1798                 }
1799                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1800                     s->time_base       = s->last_time_base;
1801                     s->last_non_b_time = s->time - s->pp_time;
1802                 }
1803                 for (i = 0; i < context_count; i++) {
1804                     PutBitContext *pb = &s->thread_context[i]->pb;
1805                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1806                 }
1807                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1808                 goto vbv_retry;
1809             }
1810
1811             av_assert0(s->avctx->rc_max_rate);
1812         }
1813
1814         if (s->avctx->flags & CODEC_FLAG_PASS1)
1815             ff_write_pass1_stats(s);
1816
1817         for (i = 0; i < 4; i++) {
1818             s->current_picture_ptr->f->error[i] =
1819             s->current_picture.f->error[i] =
1820                 s->current_picture.error[i];
1821             avctx->error[i] += s->current_picture_ptr->f->error[i];
1822         }
1823
1824         if (s->avctx->flags & CODEC_FLAG_PASS1)
1825             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1826                    avctx->i_tex_bits + avctx->p_tex_bits ==
1827                        put_bits_count(&s->pb));
1828         flush_put_bits(&s->pb);
1829         s->frame_bits  = put_bits_count(&s->pb);
1830
1831         stuffing_count = ff_vbv_update(s, s->frame_bits);
1832         s->stuffing_bits = 8*stuffing_count;
1833         if (stuffing_count) {
1834             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1835                     stuffing_count + 50) {
1836                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1837                 return -1;
1838             }
1839
1840             switch (s->codec_id) {
1841             case AV_CODEC_ID_MPEG1VIDEO:
1842             case AV_CODEC_ID_MPEG2VIDEO:
1843                 while (stuffing_count--) {
1844                     put_bits(&s->pb, 8, 0);
1845                 }
1846             break;
1847             case AV_CODEC_ID_MPEG4:
1848                 put_bits(&s->pb, 16, 0);
1849                 put_bits(&s->pb, 16, 0x1C3);
1850                 stuffing_count -= 4;
1851                 while (stuffing_count--) {
1852                     put_bits(&s->pb, 8, 0xFF);
1853                 }
1854             break;
1855             default:
1856                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1857             }
1858             flush_put_bits(&s->pb);
1859             s->frame_bits  = put_bits_count(&s->pb);
1860         }
1861
1862         /* update mpeg1/2 vbv_delay for CBR */
1863         if (s->avctx->rc_max_rate                          &&
1864             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1865             s->out_format == FMT_MPEG1                     &&
1866             90000LL * (avctx->rc_buffer_size - 1) <=
1867                 s->avctx->rc_max_rate * 0xFFFFLL) {
1868             int vbv_delay, min_delay;
1869             double inbits  = s->avctx->rc_max_rate *
1870                              av_q2d(s->avctx->time_base);
1871             int    minbits = s->frame_bits - 8 *
1872                              (s->vbv_delay_ptr - s->pb.buf - 1);
1873             double bits    = s->rc_context.buffer_index + minbits - inbits;
1874
1875             if (bits < 0)
1876                 av_log(s->avctx, AV_LOG_ERROR,
1877                        "Internal error, negative bits\n");
1878
1879             assert(s->repeat_first_field == 0);
1880
1881             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1882             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1883                         s->avctx->rc_max_rate;
1884
1885             vbv_delay = FFMAX(vbv_delay, min_delay);
1886
1887             av_assert0(vbv_delay < 0xFFFF);
1888
1889             s->vbv_delay_ptr[0] &= 0xF8;
1890             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1891             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1892             s->vbv_delay_ptr[2] &= 0x07;
1893             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1894             avctx->vbv_delay     = vbv_delay * 300;
1895         }
1896         s->total_bits     += s->frame_bits;
1897         avctx->frame_bits  = s->frame_bits;
1898
1899         pkt->pts = s->current_picture.f->pts;
1900         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1901             if (!s->current_picture.f->coded_picture_number)
1902                 pkt->dts = pkt->pts - s->dts_delta;
1903             else
1904                 pkt->dts = s->reordered_pts;
1905             s->reordered_pts = pkt->pts;
1906         } else
1907             pkt->dts = pkt->pts;
1908         if (s->current_picture.f->key_frame)
1909             pkt->flags |= AV_PKT_FLAG_KEY;
1910         if (s->mb_info)
1911             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1912     } else {
1913         s->frame_bits = 0;
1914     }
1915
1916     /* release non-reference frames */
1917     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1918         if (!s->picture[i].reference)
1919             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1920     }
1921
1922     av_assert1((s->frame_bits & 7) == 0);
1923
1924     pkt->size = s->frame_bits / 8;
1925     *got_packet = !!pkt->size;
1926     return 0;
1927 }
1928
1929 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1930                                                 int n, int threshold)
1931 {
1932     static const char tab[64] = {
1933         3, 2, 2, 1, 1, 1, 1, 1,
1934         1, 1, 1, 1, 1, 1, 1, 1,
1935         1, 1, 1, 1, 1, 1, 1, 1,
1936         0, 0, 0, 0, 0, 0, 0, 0,
1937         0, 0, 0, 0, 0, 0, 0, 0,
1938         0, 0, 0, 0, 0, 0, 0, 0,
1939         0, 0, 0, 0, 0, 0, 0, 0,
1940         0, 0, 0, 0, 0, 0, 0, 0
1941     };
1942     int score = 0;
1943     int run = 0;
1944     int i;
1945     int16_t *block = s->block[n];
1946     const int last_index = s->block_last_index[n];
1947     int skip_dc;
1948
1949     if (threshold < 0) {
1950         skip_dc = 0;
1951         threshold = -threshold;
1952     } else
1953         skip_dc = 1;
1954
1955     /* Are all we could set to zero already zero? */
1956     if (last_index <= skip_dc - 1)
1957         return;
1958
1959     for (i = 0; i <= last_index; i++) {
1960         const int j = s->intra_scantable.permutated[i];
1961         const int level = FFABS(block[j]);
1962         if (level == 1) {
1963             if (skip_dc && i == 0)
1964                 continue;
1965             score += tab[run];
1966             run = 0;
1967         } else if (level > 1) {
1968             return;
1969         } else {
1970             run++;
1971         }
1972     }
1973     if (score >= threshold)
1974         return;
1975     for (i = skip_dc; i <= last_index; i++) {
1976         const int j = s->intra_scantable.permutated[i];
1977         block[j] = 0;
1978     }
1979     if (block[0])
1980         s->block_last_index[n] = 0;
1981     else
1982         s->block_last_index[n] = -1;
1983 }
1984
1985 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1986                                int last_index)
1987 {
1988     int i;
1989     const int maxlevel = s->max_qcoeff;
1990     const int minlevel = s->min_qcoeff;
1991     int overflow = 0;
1992
1993     if (s->mb_intra) {
1994         i = 1; // skip clipping of intra dc
1995     } else
1996         i = 0;
1997
1998     for (; i <= last_index; i++) {
1999         const int j = s->intra_scantable.permutated[i];
2000         int level = block[j];
2001
2002         if (level > maxlevel) {
2003             level = maxlevel;
2004             overflow++;
2005         } else if (level < minlevel) {
2006             level = minlevel;
2007             overflow++;
2008         }
2009
2010         block[j] = level;
2011     }
2012
2013     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2014         av_log(s->avctx, AV_LOG_INFO,
2015                "warning, clipping %d dct coefficients to %d..%d\n",
2016                overflow, minlevel, maxlevel);
2017 }
2018
2019 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2020 {
2021     int x, y;
2022     // FIXME optimize
2023     for (y = 0; y < 8; y++) {
2024         for (x = 0; x < 8; x++) {
2025             int x2, y2;
2026             int sum = 0;
2027             int sqr = 0;
2028             int count = 0;
2029
2030             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2031                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2032                     int v = ptr[x2 + y2 * stride];
2033                     sum += v;
2034                     sqr += v * v;
2035                     count++;
2036                 }
2037             }
2038             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2039         }
2040     }
2041 }
2042
2043 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2044                                                 int motion_x, int motion_y,
2045                                                 int mb_block_height,
2046                                                 int mb_block_width,
2047                                                 int mb_block_count)
2048 {
2049     int16_t weight[12][64];
2050     int16_t orig[12][64];
2051     const int mb_x = s->mb_x;
2052     const int mb_y = s->mb_y;
2053     int i;
2054     int skip_dct[12];
2055     int dct_offset = s->linesize * 8; // default for progressive frames
2056     int uv_dct_offset = s->uvlinesize * 8;
2057     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2058     ptrdiff_t wrap_y, wrap_c;
2059
2060     for (i = 0; i < mb_block_count; i++)
2061         skip_dct[i] = s->skipdct;
2062
2063     if (s->adaptive_quant) {
2064         const int last_qp = s->qscale;
2065         const int mb_xy = mb_x + mb_y * s->mb_stride;
2066
2067         s->lambda = s->lambda_table[mb_xy];
2068         update_qscale(s);
2069
2070         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2071             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2072             s->dquant = s->qscale - last_qp;
2073
2074             if (s->out_format == FMT_H263) {
2075                 s->dquant = av_clip(s->dquant, -2, 2);
2076
2077                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2078                     if (!s->mb_intra) {
2079                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2080                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2081                                 s->dquant = 0;
2082                         }
2083                         if (s->mv_type == MV_TYPE_8X8)
2084                             s->dquant = 0;
2085                     }
2086                 }
2087             }
2088         }
2089         ff_set_qscale(s, last_qp + s->dquant);
2090     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2091         ff_set_qscale(s, s->qscale + s->dquant);
2092
2093     wrap_y = s->linesize;
2094     wrap_c = s->uvlinesize;
2095     ptr_y  = s->new_picture.f->data[0] +
2096              (mb_y * 16 * wrap_y)              + mb_x * 16;
2097     ptr_cb = s->new_picture.f->data[1] +
2098              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2099     ptr_cr = s->new_picture.f->data[2] +
2100              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2101
2102     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2103         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2104         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2105         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2106         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2107                                  wrap_y, wrap_y,
2108                                  16, 16, mb_x * 16, mb_y * 16,
2109                                  s->width, s->height);
2110         ptr_y = ebuf;
2111         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2112                                  wrap_c, wrap_c,
2113                                  mb_block_width, mb_block_height,
2114                                  mb_x * mb_block_width, mb_y * mb_block_height,
2115                                  cw, ch);
2116         ptr_cb = ebuf + 16 * wrap_y;
2117         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2118                                  wrap_c, wrap_c,
2119                                  mb_block_width, mb_block_height,
2120                                  mb_x * mb_block_width, mb_y * mb_block_height,
2121                                  cw, ch);
2122         ptr_cr = ebuf + 16 * wrap_y + 16;
2123     }
2124
2125     if (s->mb_intra) {
2126         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2127             int progressive_score, interlaced_score;
2128
2129             s->interlaced_dct = 0;
2130             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2131                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2132                                                      NULL, wrap_y, 8) - 400;
2133
2134             if (progressive_score > 0) {
2135                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2136                                                         NULL, wrap_y * 2, 8) +
2137                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2138                                                         NULL, wrap_y * 2, 8);
2139                 if (progressive_score > interlaced_score) {
2140                     s->interlaced_dct = 1;
2141
2142                     dct_offset = wrap_y;
2143                     uv_dct_offset = wrap_c;
2144                     wrap_y <<= 1;
2145                     if (s->chroma_format == CHROMA_422 ||
2146                         s->chroma_format == CHROMA_444)
2147                         wrap_c <<= 1;
2148                 }
2149             }
2150         }
2151
2152         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2153         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2154         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2155         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2156
2157         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2158             skip_dct[4] = 1;
2159             skip_dct[5] = 1;
2160         } else {
2161             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2162             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2163             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2164                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2165                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2166             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2167                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2168                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2169                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2170                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2171                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2172                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2173             }
2174         }
2175     } else {
2176         op_pixels_func (*op_pix)[4];
2177         qpel_mc_func (*op_qpix)[16];
2178         uint8_t *dest_y, *dest_cb, *dest_cr;
2179
2180         dest_y  = s->dest[0];
2181         dest_cb = s->dest[1];
2182         dest_cr = s->dest[2];
2183
2184         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2185             op_pix  = s->hdsp.put_pixels_tab;
2186             op_qpix = s->qdsp.put_qpel_pixels_tab;
2187         } else {
2188             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2189             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2190         }
2191
2192         if (s->mv_dir & MV_DIR_FORWARD) {
2193             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2194                           s->last_picture.f->data,
2195                           op_pix, op_qpix);
2196             op_pix  = s->hdsp.avg_pixels_tab;
2197             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2198         }
2199         if (s->mv_dir & MV_DIR_BACKWARD) {
2200             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2201                           s->next_picture.f->data,
2202                           op_pix, op_qpix);
2203         }
2204
2205         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2206             int progressive_score, interlaced_score;
2207
2208             s->interlaced_dct = 0;
2209             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2210                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2211                                                      ptr_y + wrap_y * 8,
2212                                                      wrap_y, 8) - 400;
2213
2214             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2215                 progressive_score -= 400;
2216
2217             if (progressive_score > 0) {
2218                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2219                                                         wrap_y * 2, 8) +
2220                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2221                                                         ptr_y + wrap_y,
2222                                                         wrap_y * 2, 8);
2223
2224                 if (progressive_score > interlaced_score) {
2225                     s->interlaced_dct = 1;
2226
2227                     dct_offset = wrap_y;
2228                     uv_dct_offset = wrap_c;
2229                     wrap_y <<= 1;
2230                     if (s->chroma_format == CHROMA_422)
2231                         wrap_c <<= 1;
2232                 }
2233             }
2234         }
2235
2236         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2237         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2238         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2239                             dest_y + dct_offset, wrap_y);
2240         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2241                             dest_y + dct_offset + 8, wrap_y);
2242
2243         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2244             skip_dct[4] = 1;
2245             skip_dct[5] = 1;
2246         } else {
2247             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2248             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2249             if (!s->chroma_y_shift) { /* 422 */
2250                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2251                                     dest_cb + uv_dct_offset, wrap_c);
2252                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2253                                     dest_cr + uv_dct_offset, wrap_c);
2254             }
2255         }
2256         /* pre quantization */
2257         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2258                 2 * s->qscale * s->qscale) {
2259             // FIXME optimize
2260             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2261                 skip_dct[0] = 1;
2262             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2263                 skip_dct[1] = 1;
2264             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2265                                wrap_y, 8) < 20 * s->qscale)
2266                 skip_dct[2] = 1;
2267             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2268                                wrap_y, 8) < 20 * s->qscale)
2269                 skip_dct[3] = 1;
2270             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2271                 skip_dct[4] = 1;
2272             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2273                 skip_dct[5] = 1;
2274             if (!s->chroma_y_shift) { /* 422 */
2275                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2276                                    dest_cb + uv_dct_offset,
2277                                    wrap_c, 8) < 20 * s->qscale)
2278                     skip_dct[6] = 1;
2279                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2280                                    dest_cr + uv_dct_offset,
2281                                    wrap_c, 8) < 20 * s->qscale)
2282                     skip_dct[7] = 1;
2283             }
2284         }
2285     }
2286
2287     if (s->quantizer_noise_shaping) {
2288         if (!skip_dct[0])
2289             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2290         if (!skip_dct[1])
2291             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2292         if (!skip_dct[2])
2293             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2294         if (!skip_dct[3])
2295             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2296         if (!skip_dct[4])
2297             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2298         if (!skip_dct[5])
2299             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2300         if (!s->chroma_y_shift) { /* 422 */
2301             if (!skip_dct[6])
2302                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2303                                   wrap_c);
2304             if (!skip_dct[7])
2305                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2306                                   wrap_c);
2307         }
2308         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2309     }
2310
2311     /* DCT & quantize */
2312     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2313     {
2314         for (i = 0; i < mb_block_count; i++) {
2315             if (!skip_dct[i]) {
2316                 int overflow;
2317                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2318                 // FIXME we could decide to change to quantizer instead of
2319                 // clipping
2320                 // JS: I don't think that would be a good idea it could lower
2321                 //     quality instead of improve it. Just INTRADC clipping
2322                 //     deserves changes in quantizer
2323                 if (overflow)
2324                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2325             } else
2326                 s->block_last_index[i] = -1;
2327         }
2328         if (s->quantizer_noise_shaping) {
2329             for (i = 0; i < mb_block_count; i++) {
2330                 if (!skip_dct[i]) {
2331                     s->block_last_index[i] =
2332                         dct_quantize_refine(s, s->block[i], weight[i],
2333                                             orig[i], i, s->qscale);
2334                 }
2335             }
2336         }
2337
2338         if (s->luma_elim_threshold && !s->mb_intra)
2339             for (i = 0; i < 4; i++)
2340                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2341         if (s->chroma_elim_threshold && !s->mb_intra)
2342             for (i = 4; i < mb_block_count; i++)
2343                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2344
2345         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2346             for (i = 0; i < mb_block_count; i++) {
2347                 if (s->block_last_index[i] == -1)
2348                     s->coded_score[i] = INT_MAX / 256;
2349             }
2350         }
2351     }
2352
2353     if ((s->avctx->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2354         s->block_last_index[4] =
2355         s->block_last_index[5] = 0;
2356         s->block[4][0] =
2357         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2358         if (!s->chroma_y_shift) { /* 422 / 444 */
2359             for (i=6; i<12; i++) {
2360                 s->block_last_index[i] = 0;
2361                 s->block[i][0] = s->block[4][0];
2362             }
2363         }
2364     }
2365
2366     // non c quantize code returns incorrect block_last_index FIXME
2367     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2368         for (i = 0; i < mb_block_count; i++) {
2369             int j;
2370             if (s->block_last_index[i] > 0) {
2371                 for (j = 63; j > 0; j--) {
2372                     if (s->block[i][s->intra_scantable.permutated[j]])
2373                         break;
2374                 }
2375                 s->block_last_index[i] = j;
2376             }
2377         }
2378     }
2379
2380     /* huffman encode */
2381     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2382     case AV_CODEC_ID_MPEG1VIDEO:
2383     case AV_CODEC_ID_MPEG2VIDEO:
2384         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2385             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2386         break;
2387     case AV_CODEC_ID_MPEG4:
2388         if (CONFIG_MPEG4_ENCODER)
2389             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2390         break;
2391     case AV_CODEC_ID_MSMPEG4V2:
2392     case AV_CODEC_ID_MSMPEG4V3:
2393     case AV_CODEC_ID_WMV1:
2394         if (CONFIG_MSMPEG4_ENCODER)
2395             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2396         break;
2397     case AV_CODEC_ID_WMV2:
2398         if (CONFIG_WMV2_ENCODER)
2399             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2400         break;
2401     case AV_CODEC_ID_H261:
2402         if (CONFIG_H261_ENCODER)
2403             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2404         break;
2405     case AV_CODEC_ID_H263:
2406     case AV_CODEC_ID_H263P:
2407     case AV_CODEC_ID_FLV1:
2408     case AV_CODEC_ID_RV10:
2409     case AV_CODEC_ID_RV20:
2410         if (CONFIG_H263_ENCODER)
2411             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2412         break;
2413     case AV_CODEC_ID_MJPEG:
2414     case AV_CODEC_ID_AMV:
2415         if (CONFIG_MJPEG_ENCODER)
2416             ff_mjpeg_encode_mb(s, s->block);
2417         break;
2418     default:
2419         av_assert1(0);
2420     }
2421 }
2422
2423 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2424 {
2425     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2426     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2427     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2428 }
2429
2430 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2431     int i;
2432
2433     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2434
2435     /* mpeg1 */
2436     d->mb_skip_run= s->mb_skip_run;
2437     for(i=0; i<3; i++)
2438         d->last_dc[i] = s->last_dc[i];
2439
2440     /* statistics */
2441     d->mv_bits= s->mv_bits;
2442     d->i_tex_bits= s->i_tex_bits;
2443     d->p_tex_bits= s->p_tex_bits;
2444     d->i_count= s->i_count;
2445     d->f_count= s->f_count;
2446     d->b_count= s->b_count;
2447     d->skip_count= s->skip_count;
2448     d->misc_bits= s->misc_bits;
2449     d->last_bits= 0;
2450
2451     d->mb_skipped= 0;
2452     d->qscale= s->qscale;
2453     d->dquant= s->dquant;
2454
2455     d->esc3_level_length= s->esc3_level_length;
2456 }
2457
2458 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2459     int i;
2460
2461     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2462     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2463
2464     /* mpeg1 */
2465     d->mb_skip_run= s->mb_skip_run;
2466     for(i=0; i<3; i++)
2467         d->last_dc[i] = s->last_dc[i];
2468
2469     /* statistics */
2470     d->mv_bits= s->mv_bits;
2471     d->i_tex_bits= s->i_tex_bits;
2472     d->p_tex_bits= s->p_tex_bits;
2473     d->i_count= s->i_count;
2474     d->f_count= s->f_count;
2475     d->b_count= s->b_count;
2476     d->skip_count= s->skip_count;
2477     d->misc_bits= s->misc_bits;
2478
2479     d->mb_intra= s->mb_intra;
2480     d->mb_skipped= s->mb_skipped;
2481     d->mv_type= s->mv_type;
2482     d->mv_dir= s->mv_dir;
2483     d->pb= s->pb;
2484     if(s->data_partitioning){
2485         d->pb2= s->pb2;
2486         d->tex_pb= s->tex_pb;
2487     }
2488     d->block= s->block;
2489     for(i=0; i<8; i++)
2490         d->block_last_index[i]= s->block_last_index[i];
2491     d->interlaced_dct= s->interlaced_dct;
2492     d->qscale= s->qscale;
2493
2494     d->esc3_level_length= s->esc3_level_length;
2495 }
2496
2497 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2498                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2499                            int *dmin, int *next_block, int motion_x, int motion_y)
2500 {
2501     int score;
2502     uint8_t *dest_backup[3];
2503
2504     copy_context_before_encode(s, backup, type);
2505
2506     s->block= s->blocks[*next_block];
2507     s->pb= pb[*next_block];
2508     if(s->data_partitioning){
2509         s->pb2   = pb2   [*next_block];
2510         s->tex_pb= tex_pb[*next_block];
2511     }
2512
2513     if(*next_block){
2514         memcpy(dest_backup, s->dest, sizeof(s->dest));
2515         s->dest[0] = s->rd_scratchpad;
2516         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2517         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2518         av_assert0(s->linesize >= 32); //FIXME
2519     }
2520
2521     encode_mb(s, motion_x, motion_y);
2522
2523     score= put_bits_count(&s->pb);
2524     if(s->data_partitioning){
2525         score+= put_bits_count(&s->pb2);
2526         score+= put_bits_count(&s->tex_pb);
2527     }
2528
2529     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2530         ff_mpv_decode_mb(s, s->block);
2531
2532         score *= s->lambda2;
2533         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2534     }
2535
2536     if(*next_block){
2537         memcpy(s->dest, dest_backup, sizeof(s->dest));
2538     }
2539
2540     if(score<*dmin){
2541         *dmin= score;
2542         *next_block^=1;
2543
2544         copy_context_after_encode(best, s, type);
2545     }
2546 }
2547
2548 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2549     uint32_t *sq = ff_square_tab + 256;
2550     int acc=0;
2551     int x,y;
2552
2553     if(w==16 && h==16)
2554         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2555     else if(w==8 && h==8)
2556         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2557
2558     for(y=0; y<h; y++){
2559         for(x=0; x<w; x++){
2560             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2561         }
2562     }
2563
2564     av_assert2(acc>=0);
2565
2566     return acc;
2567 }
2568
2569 static int sse_mb(MpegEncContext *s){
2570     int w= 16;
2571     int h= 16;
2572
2573     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2574     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2575
2576     if(w==16 && h==16)
2577       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2578         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2579                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2580                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2581       }else{
2582         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2583                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2584                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2585       }
2586     else
2587         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2588                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2589                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2590 }
2591
2592 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2593     MpegEncContext *s= *(void**)arg;
2594
2595
2596     s->me.pre_pass=1;
2597     s->me.dia_size= s->avctx->pre_dia_size;
2598     s->first_slice_line=1;
2599     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2600         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2601             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2602         }
2603         s->first_slice_line=0;
2604     }
2605
2606     s->me.pre_pass=0;
2607
2608     return 0;
2609 }
2610
2611 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2612     MpegEncContext *s= *(void**)arg;
2613
2614     ff_check_alignment();
2615
2616     s->me.dia_size= s->avctx->dia_size;
2617     s->first_slice_line=1;
2618     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2619         s->mb_x=0; //for block init below
2620         ff_init_block_index(s);
2621         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2622             s->block_index[0]+=2;
2623             s->block_index[1]+=2;
2624             s->block_index[2]+=2;
2625             s->block_index[3]+=2;
2626
2627             /* compute motion vector & mb_type and store in context */
2628             if(s->pict_type==AV_PICTURE_TYPE_B)
2629                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2630             else
2631                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2632         }
2633         s->first_slice_line=0;
2634     }
2635     return 0;
2636 }
2637
2638 static int mb_var_thread(AVCodecContext *c, void *arg){
2639     MpegEncContext *s= *(void**)arg;
2640     int mb_x, mb_y;
2641
2642     ff_check_alignment();
2643
2644     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2645         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2646             int xx = mb_x * 16;
2647             int yy = mb_y * 16;
2648             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2649             int varc;
2650             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2651
2652             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2653                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2654
2655             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2656             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2657             s->me.mb_var_sum_temp    += varc;
2658         }
2659     }
2660     return 0;
2661 }
2662
2663 static void write_slice_end(MpegEncContext *s){
2664     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2665         if(s->partitioned_frame){
2666             ff_mpeg4_merge_partitions(s);
2667         }
2668
2669         ff_mpeg4_stuffing(&s->pb);
2670     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2671         ff_mjpeg_encode_stuffing(s);
2672     }
2673
2674     avpriv_align_put_bits(&s->pb);
2675     flush_put_bits(&s->pb);
2676
2677     if ((s->avctx->flags & CODEC_FLAG_PASS1) && !s->partitioned_frame)
2678         s->misc_bits+= get_bits_diff(s);
2679 }
2680
2681 static void write_mb_info(MpegEncContext *s)
2682 {
2683     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2684     int offset = put_bits_count(&s->pb);
2685     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2686     int gobn = s->mb_y / s->gob_index;
2687     int pred_x, pred_y;
2688     if (CONFIG_H263_ENCODER)
2689         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2690     bytestream_put_le32(&ptr, offset);
2691     bytestream_put_byte(&ptr, s->qscale);
2692     bytestream_put_byte(&ptr, gobn);
2693     bytestream_put_le16(&ptr, mba);
2694     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2695     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2696     /* 4MV not implemented */
2697     bytestream_put_byte(&ptr, 0); /* hmv2 */
2698     bytestream_put_byte(&ptr, 0); /* vmv2 */
2699 }
2700
2701 static void update_mb_info(MpegEncContext *s, int startcode)
2702 {
2703     if (!s->mb_info)
2704         return;
2705     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2706         s->mb_info_size += 12;
2707         s->prev_mb_info = s->last_mb_info;
2708     }
2709     if (startcode) {
2710         s->prev_mb_info = put_bits_count(&s->pb)/8;
2711         /* This might have incremented mb_info_size above, and we return without
2712          * actually writing any info into that slot yet. But in that case,
2713          * this will be called again at the start of the after writing the
2714          * start code, actually writing the mb info. */
2715         return;
2716     }
2717
2718     s->last_mb_info = put_bits_count(&s->pb)/8;
2719     if (!s->mb_info_size)
2720         s->mb_info_size += 12;
2721     write_mb_info(s);
2722 }
2723
2724 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2725 {
2726     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2727         && s->slice_context_count == 1
2728         && s->pb.buf == s->avctx->internal->byte_buffer) {
2729         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2730         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2731
2732         uint8_t *new_buffer = NULL;
2733         int new_buffer_size = 0;
2734
2735         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2736                               s->avctx->internal->byte_buffer_size + size_increase);
2737         if (!new_buffer)
2738             return AVERROR(ENOMEM);
2739
2740         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2741         av_free(s->avctx->internal->byte_buffer);
2742         s->avctx->internal->byte_buffer      = new_buffer;
2743         s->avctx->internal->byte_buffer_size = new_buffer_size;
2744         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2745         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2746         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2747     }
2748     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2749         return AVERROR(EINVAL);
2750     return 0;
2751 }
2752
2753 static int encode_thread(AVCodecContext *c, void *arg){
2754     MpegEncContext *s= *(void**)arg;
2755     int mb_x, mb_y, pdif = 0;
2756     int chr_h= 16>>s->chroma_y_shift;
2757     int i, j;
2758     MpegEncContext best_s = { 0 }, backup_s;
2759     uint8_t bit_buf[2][MAX_MB_BYTES];
2760     uint8_t bit_buf2[2][MAX_MB_BYTES];
2761     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2762     PutBitContext pb[2], pb2[2], tex_pb[2];
2763
2764     ff_check_alignment();
2765
2766     for(i=0; i<2; i++){
2767         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2768         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2769         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2770     }
2771
2772     s->last_bits= put_bits_count(&s->pb);
2773     s->mv_bits=0;
2774     s->misc_bits=0;
2775     s->i_tex_bits=0;
2776     s->p_tex_bits=0;
2777     s->i_count=0;
2778     s->f_count=0;
2779     s->b_count=0;
2780     s->skip_count=0;
2781
2782     for(i=0; i<3; i++){
2783         /* init last dc values */
2784         /* note: quant matrix value (8) is implied here */
2785         s->last_dc[i] = 128 << s->intra_dc_precision;
2786
2787         s->current_picture.error[i] = 0;
2788     }
2789     if(s->codec_id==AV_CODEC_ID_AMV){
2790         s->last_dc[0] = 128*8/13;
2791         s->last_dc[1] = 128*8/14;
2792         s->last_dc[2] = 128*8/14;
2793     }
2794     s->mb_skip_run = 0;
2795     memset(s->last_mv, 0, sizeof(s->last_mv));
2796
2797     s->last_mv_dir = 0;
2798
2799     switch(s->codec_id){
2800     case AV_CODEC_ID_H263:
2801     case AV_CODEC_ID_H263P:
2802     case AV_CODEC_ID_FLV1:
2803         if (CONFIG_H263_ENCODER)
2804             s->gob_index = H263_GOB_HEIGHT(s->height);
2805         break;
2806     case AV_CODEC_ID_MPEG4:
2807         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2808             ff_mpeg4_init_partitions(s);
2809         break;
2810     }
2811
2812     s->resync_mb_x=0;
2813     s->resync_mb_y=0;
2814     s->first_slice_line = 1;
2815     s->ptr_lastgob = s->pb.buf;
2816     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2817         s->mb_x=0;
2818         s->mb_y= mb_y;
2819
2820         ff_set_qscale(s, s->qscale);
2821         ff_init_block_index(s);
2822
2823         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2824             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2825             int mb_type= s->mb_type[xy];
2826 //            int d;
2827             int dmin= INT_MAX;
2828             int dir;
2829             int size_increase =  s->avctx->internal->byte_buffer_size/4
2830                                + s->mb_width*MAX_MB_BYTES;
2831
2832             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2833             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2834                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2835                 return -1;
2836             }
2837             if(s->data_partitioning){
2838                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2839                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2840                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2841                     return -1;
2842                 }
2843             }
2844
2845             s->mb_x = mb_x;
2846             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2847             ff_update_block_index(s);
2848
2849             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2850                 ff_h261_reorder_mb_index(s);
2851                 xy= s->mb_y*s->mb_stride + s->mb_x;
2852                 mb_type= s->mb_type[xy];
2853             }
2854
2855             /* write gob / video packet header  */
2856             if(s->rtp_mode){
2857                 int current_packet_size, is_gob_start;
2858
2859                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2860
2861                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2862
2863                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2864
2865                 switch(s->codec_id){
2866                 case AV_CODEC_ID_H263:
2867                 case AV_CODEC_ID_H263P:
2868                     if(!s->h263_slice_structured)
2869                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2870                     break;
2871                 case AV_CODEC_ID_MPEG2VIDEO:
2872                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2873                 case AV_CODEC_ID_MPEG1VIDEO:
2874                     if(s->mb_skip_run) is_gob_start=0;
2875                     break;
2876                 case AV_CODEC_ID_MJPEG:
2877                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2878                     break;
2879                 }
2880
2881                 if(is_gob_start){
2882                     if(s->start_mb_y != mb_y || mb_x!=0){
2883                         write_slice_end(s);
2884
2885                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2886                             ff_mpeg4_init_partitions(s);
2887                         }
2888                     }
2889
2890                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2891                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2892
2893                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2894                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2895                         int d = 100 / s->error_rate;
2896                         if(r % d == 0){
2897                             current_packet_size=0;
2898                             s->pb.buf_ptr= s->ptr_lastgob;
2899                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2900                         }
2901                     }
2902
2903                     if (s->avctx->rtp_callback){
2904                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2905                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2906                     }
2907                     update_mb_info(s, 1);
2908
2909                     switch(s->codec_id){
2910                     case AV_CODEC_ID_MPEG4:
2911                         if (CONFIG_MPEG4_ENCODER) {
2912                             ff_mpeg4_encode_video_packet_header(s);
2913                             ff_mpeg4_clean_buffers(s);
2914                         }
2915                     break;
2916                     case AV_CODEC_ID_MPEG1VIDEO:
2917                     case AV_CODEC_ID_MPEG2VIDEO:
2918                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2919                             ff_mpeg1_encode_slice_header(s);
2920                             ff_mpeg1_clean_buffers(s);
2921                         }
2922                     break;
2923                     case AV_CODEC_ID_H263:
2924                     case AV_CODEC_ID_H263P:
2925                         if (CONFIG_H263_ENCODER)
2926                             ff_h263_encode_gob_header(s, mb_y);
2927                     break;
2928                     }
2929
2930                     if (s->avctx->flags & CODEC_FLAG_PASS1) {
2931                         int bits= put_bits_count(&s->pb);
2932                         s->misc_bits+= bits - s->last_bits;
2933                         s->last_bits= bits;
2934                     }
2935
2936                     s->ptr_lastgob += current_packet_size;
2937                     s->first_slice_line=1;
2938                     s->resync_mb_x=mb_x;
2939                     s->resync_mb_y=mb_y;
2940                 }
2941             }
2942
2943             if(  (s->resync_mb_x   == s->mb_x)
2944                && s->resync_mb_y+1 == s->mb_y){
2945                 s->first_slice_line=0;
2946             }
2947
2948             s->mb_skipped=0;
2949             s->dquant=0; //only for QP_RD
2950
2951             update_mb_info(s, 0);
2952
2953             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2954                 int next_block=0;
2955                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2956
2957                 copy_context_before_encode(&backup_s, s, -1);
2958                 backup_s.pb= s->pb;
2959                 best_s.data_partitioning= s->data_partitioning;
2960                 best_s.partitioned_frame= s->partitioned_frame;
2961                 if(s->data_partitioning){
2962                     backup_s.pb2= s->pb2;
2963                     backup_s.tex_pb= s->tex_pb;
2964                 }
2965
2966                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2967                     s->mv_dir = MV_DIR_FORWARD;
2968                     s->mv_type = MV_TYPE_16X16;
2969                     s->mb_intra= 0;
2970                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2971                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2972                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2973                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2974                 }
2975                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2976                     s->mv_dir = MV_DIR_FORWARD;
2977                     s->mv_type = MV_TYPE_FIELD;
2978                     s->mb_intra= 0;
2979                     for(i=0; i<2; i++){
2980                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2981                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2982                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2983                     }
2984                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2985                                  &dmin, &next_block, 0, 0);
2986                 }
2987                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2988                     s->mv_dir = MV_DIR_FORWARD;
2989                     s->mv_type = MV_TYPE_16X16;
2990                     s->mb_intra= 0;
2991                     s->mv[0][0][0] = 0;
2992                     s->mv[0][0][1] = 0;
2993                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2994                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2995                 }
2996                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2997                     s->mv_dir = MV_DIR_FORWARD;
2998                     s->mv_type = MV_TYPE_8X8;
2999                     s->mb_intra= 0;
3000                     for(i=0; i<4; i++){
3001                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3002                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3003                     }
3004                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3005                                  &dmin, &next_block, 0, 0);
3006                 }
3007                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3008                     s->mv_dir = MV_DIR_FORWARD;
3009                     s->mv_type = MV_TYPE_16X16;
3010                     s->mb_intra= 0;
3011                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3012                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3013                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3014                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3015                 }
3016                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3017                     s->mv_dir = MV_DIR_BACKWARD;
3018                     s->mv_type = MV_TYPE_16X16;
3019                     s->mb_intra= 0;
3020                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3021                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3022                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3023                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3024                 }
3025                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3026                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3027                     s->mv_type = MV_TYPE_16X16;
3028                     s->mb_intra= 0;
3029                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3030                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3031                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3032                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3033                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3034                                  &dmin, &next_block, 0, 0);
3035                 }
3036                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3037                     s->mv_dir = MV_DIR_FORWARD;
3038                     s->mv_type = MV_TYPE_FIELD;
3039                     s->mb_intra= 0;
3040                     for(i=0; i<2; i++){
3041                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3042                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3043                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3044                     }
3045                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3046                                  &dmin, &next_block, 0, 0);
3047                 }
3048                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3049                     s->mv_dir = MV_DIR_BACKWARD;
3050                     s->mv_type = MV_TYPE_FIELD;
3051                     s->mb_intra= 0;
3052                     for(i=0; i<2; i++){
3053                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3054                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3055                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3056                     }
3057                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3058                                  &dmin, &next_block, 0, 0);
3059                 }
3060                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3061                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3062                     s->mv_type = MV_TYPE_FIELD;
3063                     s->mb_intra= 0;
3064                     for(dir=0; dir<2; dir++){
3065                         for(i=0; i<2; i++){
3066                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3067                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3068                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3069                         }
3070                     }
3071                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3072                                  &dmin, &next_block, 0, 0);
3073                 }
3074                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3075                     s->mv_dir = 0;
3076                     s->mv_type = MV_TYPE_16X16;
3077                     s->mb_intra= 1;
3078                     s->mv[0][0][0] = 0;
3079                     s->mv[0][0][1] = 0;
3080                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3081                                  &dmin, &next_block, 0, 0);
3082                     if(s->h263_pred || s->h263_aic){
3083                         if(best_s.mb_intra)
3084                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3085                         else
3086                             ff_clean_intra_table_entries(s); //old mode?
3087                     }
3088                 }
3089
3090                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3091                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3092                         const int last_qp= backup_s.qscale;
3093                         int qpi, qp, dc[6];
3094                         int16_t ac[6][16];
3095                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3096                         static const int dquant_tab[4]={-1,1,-2,2};
3097                         int storecoefs = s->mb_intra && s->dc_val[0];
3098
3099                         av_assert2(backup_s.dquant == 0);
3100
3101                         //FIXME intra
3102                         s->mv_dir= best_s.mv_dir;
3103                         s->mv_type = MV_TYPE_16X16;
3104                         s->mb_intra= best_s.mb_intra;
3105                         s->mv[0][0][0] = best_s.mv[0][0][0];
3106                         s->mv[0][0][1] = best_s.mv[0][0][1];
3107                         s->mv[1][0][0] = best_s.mv[1][0][0];
3108                         s->mv[1][0][1] = best_s.mv[1][0][1];
3109
3110                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3111                         for(; qpi<4; qpi++){
3112                             int dquant= dquant_tab[qpi];
3113                             qp= last_qp + dquant;
3114                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3115                                 continue;
3116                             backup_s.dquant= dquant;
3117                             if(storecoefs){
3118                                 for(i=0; i<6; i++){
3119                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3120                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3121                                 }
3122                             }
3123
3124                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3125                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3126                             if(best_s.qscale != qp){
3127                                 if(storecoefs){
3128                                     for(i=0; i<6; i++){
3129                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3130                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3131                                     }
3132                                 }
3133                             }
3134                         }
3135                     }
3136                 }
3137                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3138                     int mx= s->b_direct_mv_table[xy][0];
3139                     int my= s->b_direct_mv_table[xy][1];
3140
3141                     backup_s.dquant = 0;
3142                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3143                     s->mb_intra= 0;
3144                     ff_mpeg4_set_direct_mv(s, mx, my);
3145                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3146                                  &dmin, &next_block, mx, my);
3147                 }
3148                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3149                     backup_s.dquant = 0;
3150                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3151                     s->mb_intra= 0;
3152                     ff_mpeg4_set_direct_mv(s, 0, 0);
3153                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3154                                  &dmin, &next_block, 0, 0);
3155                 }
3156                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3157                     int coded=0;
3158                     for(i=0; i<6; i++)
3159                         coded |= s->block_last_index[i];
3160                     if(coded){
3161                         int mx,my;
3162                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3163                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3164                             mx=my=0; //FIXME find the one we actually used
3165                             ff_mpeg4_set_direct_mv(s, mx, my);
3166                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3167                             mx= s->mv[1][0][0];
3168                             my= s->mv[1][0][1];
3169                         }else{
3170                             mx= s->mv[0][0][0];
3171                             my= s->mv[0][0][1];
3172                         }
3173
3174                         s->mv_dir= best_s.mv_dir;
3175                         s->mv_type = best_s.mv_type;
3176                         s->mb_intra= 0;
3177 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3178                         s->mv[0][0][1] = best_s.mv[0][0][1];
3179                         s->mv[1][0][0] = best_s.mv[1][0][0];
3180                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3181                         backup_s.dquant= 0;
3182                         s->skipdct=1;
3183                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3184                                         &dmin, &next_block, mx, my);
3185                         s->skipdct=0;
3186                     }
3187                 }
3188
3189                 s->current_picture.qscale_table[xy] = best_s.qscale;
3190
3191                 copy_context_after_encode(s, &best_s, -1);
3192
3193                 pb_bits_count= put_bits_count(&s->pb);
3194                 flush_put_bits(&s->pb);
3195                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3196                 s->pb= backup_s.pb;
3197
3198                 if(s->data_partitioning){
3199                     pb2_bits_count= put_bits_count(&s->pb2);
3200                     flush_put_bits(&s->pb2);
3201                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3202                     s->pb2= backup_s.pb2;
3203
3204                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3205                     flush_put_bits(&s->tex_pb);
3206                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3207                     s->tex_pb= backup_s.tex_pb;
3208                 }
3209                 s->last_bits= put_bits_count(&s->pb);
3210
3211                 if (CONFIG_H263_ENCODER &&
3212                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3213                     ff_h263_update_motion_val(s);
3214
3215                 if(next_block==0){ //FIXME 16 vs linesize16
3216                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3217                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3218                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3219                 }
3220
3221                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3222                     ff_mpv_decode_mb(s, s->block);
3223             } else {
3224                 int motion_x = 0, motion_y = 0;
3225                 s->mv_type=MV_TYPE_16X16;
3226                 // only one MB-Type possible
3227
3228                 switch(mb_type){
3229                 case CANDIDATE_MB_TYPE_INTRA:
3230                     s->mv_dir = 0;
3231                     s->mb_intra= 1;
3232                     motion_x= s->mv[0][0][0] = 0;
3233                     motion_y= s->mv[0][0][1] = 0;
3234                     break;
3235                 case CANDIDATE_MB_TYPE_INTER:
3236                     s->mv_dir = MV_DIR_FORWARD;
3237                     s->mb_intra= 0;
3238                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3239                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3240                     break;
3241                 case CANDIDATE_MB_TYPE_INTER_I:
3242                     s->mv_dir = MV_DIR_FORWARD;
3243                     s->mv_type = MV_TYPE_FIELD;
3244                     s->mb_intra= 0;
3245                     for(i=0; i<2; i++){
3246                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3247                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3248                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3249                     }
3250                     break;
3251                 case CANDIDATE_MB_TYPE_INTER4V:
3252                     s->mv_dir = MV_DIR_FORWARD;
3253                     s->mv_type = MV_TYPE_8X8;
3254                     s->mb_intra= 0;
3255                     for(i=0; i<4; i++){
3256                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3257                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3258                     }
3259                     break;
3260                 case CANDIDATE_MB_TYPE_DIRECT:
3261                     if (CONFIG_MPEG4_ENCODER) {
3262                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3263                         s->mb_intra= 0;
3264                         motion_x=s->b_direct_mv_table[xy][0];
3265                         motion_y=s->b_direct_mv_table[xy][1];
3266                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3267                     }
3268                     break;
3269                 case CANDIDATE_MB_TYPE_DIRECT0:
3270                     if (CONFIG_MPEG4_ENCODER) {
3271                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3272                         s->mb_intra= 0;
3273                         ff_mpeg4_set_direct_mv(s, 0, 0);
3274                     }
3275                     break;
3276                 case CANDIDATE_MB_TYPE_BIDIR:
3277                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3278                     s->mb_intra= 0;
3279                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3280                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3281                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3282                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3283                     break;
3284                 case CANDIDATE_MB_TYPE_BACKWARD:
3285                     s->mv_dir = MV_DIR_BACKWARD;
3286                     s->mb_intra= 0;
3287                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3288                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3289                     break;
3290                 case CANDIDATE_MB_TYPE_FORWARD:
3291                     s->mv_dir = MV_DIR_FORWARD;
3292                     s->mb_intra= 0;
3293                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3294                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3295                     break;
3296                 case CANDIDATE_MB_TYPE_FORWARD_I:
3297                     s->mv_dir = MV_DIR_FORWARD;
3298                     s->mv_type = MV_TYPE_FIELD;
3299                     s->mb_intra= 0;
3300                     for(i=0; i<2; i++){
3301                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3302                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3303                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3304                     }
3305                     break;
3306                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3307                     s->mv_dir = MV_DIR_BACKWARD;
3308                     s->mv_type = MV_TYPE_FIELD;
3309                     s->mb_intra= 0;
3310                     for(i=0; i<2; i++){
3311                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3312                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3313                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3314                     }
3315                     break;
3316                 case CANDIDATE_MB_TYPE_BIDIR_I:
3317                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3318                     s->mv_type = MV_TYPE_FIELD;
3319                     s->mb_intra= 0;
3320                     for(dir=0; dir<2; dir++){
3321                         for(i=0; i<2; i++){
3322                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3323                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3324                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3325                         }
3326                     }
3327                     break;
3328                 default:
3329                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3330                 }
3331
3332                 encode_mb(s, motion_x, motion_y);
3333
3334                 // RAL: Update last macroblock type
3335                 s->last_mv_dir = s->mv_dir;
3336
3337                 if (CONFIG_H263_ENCODER &&
3338                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3339                     ff_h263_update_motion_val(s);
3340
3341                 ff_mpv_decode_mb(s, s->block);
3342             }
3343
3344             /* clean the MV table in IPS frames for direct mode in B frames */
3345             if(s->mb_intra /* && I,P,S_TYPE */){
3346                 s->p_mv_table[xy][0]=0;
3347                 s->p_mv_table[xy][1]=0;
3348             }
3349
3350             if (s->avctx->flags & CODEC_FLAG_PSNR) {
3351                 int w= 16;
3352                 int h= 16;
3353
3354                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3355                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3356
3357                 s->current_picture.error[0] += sse(
3358                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3359                     s->dest[0], w, h, s->linesize);
3360                 s->current_picture.error[1] += sse(
3361                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3362                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3363                 s->current_picture.error[2] += sse(
3364                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3365                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3366             }
3367             if(s->loop_filter){
3368                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3369                     ff_h263_loop_filter(s);
3370             }
3371             ff_dlog(s->avctx, "MB %d %d bits\n",
3372                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3373         }
3374     }
3375
3376     //not beautiful here but we must write it before flushing so it has to be here
3377     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3378         ff_msmpeg4_encode_ext_header(s);
3379
3380     write_slice_end(s);
3381
3382     /* Send the last GOB if RTP */
3383     if (s->avctx->rtp_callback) {
3384         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3385         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3386         /* Call the RTP callback to send the last GOB */
3387         emms_c();
3388         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3389     }
3390
3391     return 0;
3392 }
3393
3394 #define MERGE(field) dst->field += src->field; src->field=0
3395 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3396     MERGE(me.scene_change_score);
3397     MERGE(me.mc_mb_var_sum_temp);
3398     MERGE(me.mb_var_sum_temp);
3399 }
3400
3401 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3402     int i;
3403
3404     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3405     MERGE(dct_count[1]);
3406     MERGE(mv_bits);
3407     MERGE(i_tex_bits);
3408     MERGE(p_tex_bits);
3409     MERGE(i_count);
3410     MERGE(f_count);
3411     MERGE(b_count);
3412     MERGE(skip_count);
3413     MERGE(misc_bits);
3414     MERGE(er.error_count);
3415     MERGE(padding_bug_score);
3416     MERGE(current_picture.error[0]);
3417     MERGE(current_picture.error[1]);
3418     MERGE(current_picture.error[2]);
3419
3420     if(dst->avctx->noise_reduction){
3421         for(i=0; i<64; i++){
3422             MERGE(dct_error_sum[0][i]);
3423             MERGE(dct_error_sum[1][i]);
3424         }
3425     }
3426
3427     assert(put_bits_count(&src->pb) % 8 ==0);
3428     assert(put_bits_count(&dst->pb) % 8 ==0);
3429     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3430     flush_put_bits(&dst->pb);
3431 }
3432
3433 static int estimate_qp(MpegEncContext *s, int dry_run){
3434     if (s->next_lambda){
3435         s->current_picture_ptr->f->quality =
3436         s->current_picture.f->quality = s->next_lambda;
3437         if(!dry_run) s->next_lambda= 0;
3438     } else if (!s->fixed_qscale) {
3439         s->current_picture_ptr->f->quality =
3440         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3441         if (s->current_picture.f->quality < 0)
3442             return -1;
3443     }
3444
3445     if(s->adaptive_quant){
3446         switch(s->codec_id){
3447         case AV_CODEC_ID_MPEG4:
3448             if (CONFIG_MPEG4_ENCODER)
3449                 ff_clean_mpeg4_qscales(s);
3450             break;
3451         case AV_CODEC_ID_H263:
3452         case AV_CODEC_ID_H263P:
3453         case AV_CODEC_ID_FLV1:
3454             if (CONFIG_H263_ENCODER)
3455                 ff_clean_h263_qscales(s);
3456             break;
3457         default:
3458             ff_init_qscale_tab(s);
3459         }
3460
3461         s->lambda= s->lambda_table[0];
3462         //FIXME broken
3463     }else
3464         s->lambda = s->current_picture.f->quality;
3465     update_qscale(s);
3466     return 0;
3467 }
3468
3469 /* must be called before writing the header */
3470 static void set_frame_distances(MpegEncContext * s){
3471     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3472     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3473
3474     if(s->pict_type==AV_PICTURE_TYPE_B){
3475         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3476         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3477     }else{
3478         s->pp_time= s->time - s->last_non_b_time;
3479         s->last_non_b_time= s->time;
3480         assert(s->picture_number==0 || s->pp_time > 0);
3481     }
3482 }
3483
3484 static int encode_picture(MpegEncContext *s, int picture_number)
3485 {
3486     int i, ret;
3487     int bits;
3488     int context_count = s->slice_context_count;
3489
3490     s->picture_number = picture_number;
3491
3492     /* Reset the average MB variance */
3493     s->me.mb_var_sum_temp    =
3494     s->me.mc_mb_var_sum_temp = 0;
3495
3496     /* we need to initialize some time vars before we can encode b-frames */
3497     // RAL: Condition added for MPEG1VIDEO
3498     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3499         set_frame_distances(s);
3500     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3501         ff_set_mpeg4_time(s);
3502
3503     s->me.scene_change_score=0;
3504
3505 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3506
3507     if(s->pict_type==AV_PICTURE_TYPE_I){
3508         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3509         else                        s->no_rounding=0;
3510     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3511         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3512             s->no_rounding ^= 1;
3513     }
3514
3515     if (s->avctx->flags & CODEC_FLAG_PASS2) {
3516         if (estimate_qp(s,1) < 0)
3517             return -1;
3518         ff_get_2pass_fcode(s);
3519     } else if (!(s->avctx->flags & CODEC_FLAG_QSCALE)) {
3520         if(s->pict_type==AV_PICTURE_TYPE_B)
3521             s->lambda= s->last_lambda_for[s->pict_type];
3522         else
3523             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3524         update_qscale(s);
3525     }
3526
3527     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3528         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3529         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3530         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3531         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3532     }
3533
3534     s->mb_intra=0; //for the rate distortion & bit compare functions
3535     for(i=1; i<context_count; i++){
3536         ret = ff_update_duplicate_context(s->thread_context[i], s);
3537         if (ret < 0)
3538             return ret;
3539     }
3540
3541     if(ff_init_me(s)<0)
3542         return -1;
3543
3544     /* Estimate motion for every MB */
3545     if(s->pict_type != AV_PICTURE_TYPE_I){
3546         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3547         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3548         if (s->pict_type != AV_PICTURE_TYPE_B) {
3549             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3550                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3551             }
3552         }
3553
3554         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3555     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3556         /* I-Frame */
3557         for(i=0; i<s->mb_stride*s->mb_height; i++)
3558             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3559
3560         if(!s->fixed_qscale){
3561             /* finding spatial complexity for I-frame rate control */
3562             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3563         }
3564     }
3565     for(i=1; i<context_count; i++){
3566         merge_context_after_me(s, s->thread_context[i]);
3567     }
3568     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3569     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3570     emms_c();
3571
3572     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3573         s->pict_type= AV_PICTURE_TYPE_I;
3574         for(i=0; i<s->mb_stride*s->mb_height; i++)
3575             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3576         if(s->msmpeg4_version >= 3)
3577             s->no_rounding=1;
3578         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3579                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3580     }
3581
3582     if(!s->umvplus){
3583         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3584             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3585
3586             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3587                 int a,b;
3588                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3589                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3590                 s->f_code= FFMAX3(s->f_code, a, b);
3591             }
3592
3593             ff_fix_long_p_mvs(s);
3594             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3595             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3596                 int j;
3597                 for(i=0; i<2; i++){
3598                     for(j=0; j<2; j++)
3599                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3600                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3601                 }
3602             }
3603         }
3604
3605         if(s->pict_type==AV_PICTURE_TYPE_B){
3606             int a, b;
3607
3608             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3609             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3610             s->f_code = FFMAX(a, b);
3611
3612             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3613             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3614             s->b_code = FFMAX(a, b);
3615
3616             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3617             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3618             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3619             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3620             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3621                 int dir, j;
3622                 for(dir=0; dir<2; dir++){
3623                     for(i=0; i<2; i++){
3624                         for(j=0; j<2; j++){
3625                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3626                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3627                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3628                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3629                         }
3630                     }
3631                 }
3632             }
3633         }
3634     }
3635
3636     if (estimate_qp(s, 0) < 0)
3637         return -1;
3638
3639     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3640         s->pict_type == AV_PICTURE_TYPE_I &&
3641         !(s->avctx->flags & CODEC_FLAG_QSCALE))
3642         s->qscale= 3; //reduce clipping problems
3643
3644     if (s->out_format == FMT_MJPEG) {
3645         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3646         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3647
3648         if (s->avctx->intra_matrix) {
3649             chroma_matrix =
3650             luma_matrix = s->avctx->intra_matrix;
3651         }
3652         if (s->avctx->chroma_intra_matrix)
3653             chroma_matrix = s->avctx->chroma_intra_matrix;
3654
3655         /* for mjpeg, we do include qscale in the matrix */
3656         for(i=1;i<64;i++){
3657             int j = s->idsp.idct_permutation[i];
3658
3659             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3660             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3661         }
3662         s->y_dc_scale_table=
3663         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3664         s->chroma_intra_matrix[0] =
3665         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3666         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3667                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3668         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3669                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3670         s->qscale= 8;
3671     }
3672     if(s->codec_id == AV_CODEC_ID_AMV){
3673         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3674         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3675         for(i=1;i<64;i++){
3676             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3677
3678             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3679             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3680         }
3681         s->y_dc_scale_table= y;
3682         s->c_dc_scale_table= c;
3683         s->intra_matrix[0] = 13;
3684         s->chroma_intra_matrix[0] = 14;
3685         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3686                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3687         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3688                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3689         s->qscale= 8;
3690     }
3691
3692     //FIXME var duplication
3693     s->current_picture_ptr->f->key_frame =
3694     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3695     s->current_picture_ptr->f->pict_type =
3696     s->current_picture.f->pict_type = s->pict_type;
3697
3698     if (s->current_picture.f->key_frame)
3699         s->picture_in_gop_number=0;
3700
3701     s->mb_x = s->mb_y = 0;
3702     s->last_bits= put_bits_count(&s->pb);
3703     switch(s->out_format) {
3704     case FMT_MJPEG:
3705         if (CONFIG_MJPEG_ENCODER)
3706             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3707                                            s->intra_matrix, s->chroma_intra_matrix);
3708         break;
3709     case FMT_H261:
3710         if (CONFIG_H261_ENCODER)
3711             ff_h261_encode_picture_header(s, picture_number);
3712         break;
3713     case FMT_H263:
3714         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3715             ff_wmv2_encode_picture_header(s, picture_number);
3716         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3717             ff_msmpeg4_encode_picture_header(s, picture_number);
3718         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3719             ff_mpeg4_encode_picture_header(s, picture_number);
3720         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3721             ret = ff_rv10_encode_picture_header(s, picture_number);
3722             if (ret < 0)
3723                 return ret;
3724         }
3725         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3726             ff_rv20_encode_picture_header(s, picture_number);
3727         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3728             ff_flv_encode_picture_header(s, picture_number);
3729         else if (CONFIG_H263_ENCODER)
3730             ff_h263_encode_picture_header(s, picture_number);
3731         break;
3732     case FMT_MPEG1:
3733         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3734             ff_mpeg1_encode_picture_header(s, picture_number);
3735         break;
3736     default:
3737         av_assert0(0);
3738     }
3739     bits= put_bits_count(&s->pb);
3740     s->header_bits= bits - s->last_bits;
3741
3742     for(i=1; i<context_count; i++){
3743         update_duplicate_context_after_me(s->thread_context[i], s);
3744     }
3745     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3746     for(i=1; i<context_count; i++){
3747         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3748             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3749         merge_context_after_encode(s, s->thread_context[i]);
3750     }
3751     emms_c();
3752     return 0;
3753 }
3754
3755 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3756     const int intra= s->mb_intra;
3757     int i;
3758
3759     s->dct_count[intra]++;
3760
3761     for(i=0; i<64; i++){
3762         int level= block[i];
3763
3764         if(level){
3765             if(level>0){
3766                 s->dct_error_sum[intra][i] += level;
3767                 level -= s->dct_offset[intra][i];
3768                 if(level<0) level=0;
3769             }else{
3770                 s->dct_error_sum[intra][i] -= level;
3771                 level += s->dct_offset[intra][i];
3772                 if(level>0) level=0;
3773             }
3774             block[i]= level;
3775         }
3776     }
3777 }
3778
3779 static int dct_quantize_trellis_c(MpegEncContext *s,
3780                                   int16_t *block, int n,
3781                                   int qscale, int *overflow){
3782     const int *qmat;
3783     const uint16_t *matrix;
3784     const uint8_t *scantable= s->intra_scantable.scantable;
3785     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3786     int max=0;
3787     unsigned int threshold1, threshold2;
3788     int bias=0;
3789     int run_tab[65];
3790     int level_tab[65];
3791     int score_tab[65];
3792     int survivor[65];
3793     int survivor_count;
3794     int last_run=0;
3795     int last_level=0;
3796     int last_score= 0;
3797     int last_i;
3798     int coeff[2][64];
3799     int coeff_count[64];
3800     int qmul, qadd, start_i, last_non_zero, i, dc;
3801     const int esc_length= s->ac_esc_length;
3802     uint8_t * length;
3803     uint8_t * last_length;
3804     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3805
3806     s->fdsp.fdct(block);
3807
3808     if(s->dct_error_sum)
3809         s->denoise_dct(s, block);
3810     qmul= qscale*16;
3811     qadd= ((qscale-1)|1)*8;
3812
3813     if (s->mb_intra) {
3814         int q;
3815         if (!s->h263_aic) {
3816             if (n < 4)
3817                 q = s->y_dc_scale;
3818             else
3819                 q = s->c_dc_scale;
3820             q = q << 3;
3821         } else{
3822             /* For AIC we skip quant/dequant of INTRADC */
3823             q = 1 << 3;
3824             qadd=0;
3825         }
3826
3827         /* note: block[0] is assumed to be positive */
3828         block[0] = (block[0] + (q >> 1)) / q;
3829         start_i = 1;
3830         last_non_zero = 0;
3831         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3832         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3833         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3834             bias= 1<<(QMAT_SHIFT-1);
3835
3836         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3837             length     = s->intra_chroma_ac_vlc_length;
3838             last_length= s->intra_chroma_ac_vlc_last_length;
3839         } else {
3840             length     = s->intra_ac_vlc_length;
3841             last_length= s->intra_ac_vlc_last_length;
3842         }
3843     } else {
3844         start_i = 0;
3845         last_non_zero = -1;
3846         qmat = s->q_inter_matrix[qscale];
3847         matrix = s->inter_matrix;
3848         length     = s->inter_ac_vlc_length;
3849         last_length= s->inter_ac_vlc_last_length;
3850     }
3851     last_i= start_i;
3852
3853     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3854     threshold2= (threshold1<<1);
3855
3856     for(i=63; i>=start_i; i--) {
3857         const int j = scantable[i];
3858         int level = block[j] * qmat[j];
3859
3860         if(((unsigned)(level+threshold1))>threshold2){
3861             last_non_zero = i;
3862             break;
3863         }
3864     }
3865
3866     for(i=start_i; i<=last_non_zero; i++) {
3867         const int j = scantable[i];
3868         int level = block[j] * qmat[j];
3869
3870 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3871 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3872         if(((unsigned)(level+threshold1))>threshold2){
3873             if(level>0){
3874                 level= (bias + level)>>QMAT_SHIFT;
3875                 coeff[0][i]= level;
3876                 coeff[1][i]= level-1;
3877 //                coeff[2][k]= level-2;
3878             }else{
3879                 level= (bias - level)>>QMAT_SHIFT;
3880                 coeff[0][i]= -level;
3881                 coeff[1][i]= -level+1;
3882 //                coeff[2][k]= -level+2;
3883             }
3884             coeff_count[i]= FFMIN(level, 2);
3885             av_assert2(coeff_count[i]);
3886             max |=level;
3887         }else{
3888             coeff[0][i]= (level>>31)|1;
3889             coeff_count[i]= 1;
3890         }
3891     }
3892
3893     *overflow= s->max_qcoeff < max; //overflow might have happened
3894
3895     if(last_non_zero < start_i){
3896         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3897         return last_non_zero;
3898     }
3899
3900     score_tab[start_i]= 0;
3901     survivor[0]= start_i;
3902     survivor_count= 1;
3903
3904     for(i=start_i; i<=last_non_zero; i++){
3905         int level_index, j, zero_distortion;
3906         int dct_coeff= FFABS(block[ scantable[i] ]);
3907         int best_score=256*256*256*120;
3908
3909         if (s->fdsp.fdct == ff_fdct_ifast)
3910             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3911         zero_distortion= dct_coeff*dct_coeff;
3912
3913         for(level_index=0; level_index < coeff_count[i]; level_index++){
3914             int distortion;
3915             int level= coeff[level_index][i];
3916             const int alevel= FFABS(level);
3917             int unquant_coeff;
3918
3919             av_assert2(level);
3920
3921             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3922                 unquant_coeff= alevel*qmul + qadd;
3923             } else if(s->out_format == FMT_MJPEG) {
3924                 j = s->idsp.idct_permutation[scantable[i]];
3925                 unquant_coeff = alevel * matrix[j] * 8;
3926             }else{ //MPEG1
3927                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3928                 if(s->mb_intra){
3929                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3930                         unquant_coeff =   (unquant_coeff - 1) | 1;
3931                 }else{
3932                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3933                         unquant_coeff =   (unquant_coeff - 1) | 1;
3934                 }
3935                 unquant_coeff<<= 3;
3936             }
3937
3938             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3939             level+=64;
3940             if((level&(~127)) == 0){
3941                 for(j=survivor_count-1; j>=0; j--){
3942                     int run= i - survivor[j];
3943                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3944                     score += score_tab[i-run];
3945
3946                     if(score < best_score){
3947                         best_score= score;
3948                         run_tab[i+1]= run;
3949                         level_tab[i+1]= level-64;
3950                     }
3951                 }
3952
3953                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3954                     for(j=survivor_count-1; j>=0; j--){
3955                         int run= i - survivor[j];
3956                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3957                         score += score_tab[i-run];
3958                         if(score < last_score){
3959                             last_score= score;
3960                             last_run= run;
3961                             last_level= level-64;
3962                             last_i= i+1;
3963                         }
3964                     }
3965                 }
3966             }else{
3967                 distortion += esc_length*lambda;
3968                 for(j=survivor_count-1; j>=0; j--){
3969                     int run= i - survivor[j];
3970                     int score= distortion + score_tab[i-run];
3971
3972                     if(score < best_score){
3973                         best_score= score;
3974                         run_tab[i+1]= run;
3975                         level_tab[i+1]= level-64;
3976                     }
3977                 }
3978
3979                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3980                   for(j=survivor_count-1; j>=0; j--){
3981                         int run= i - survivor[j];
3982                         int score= distortion + score_tab[i-run];
3983                         if(score < last_score){
3984                             last_score= score;
3985                             last_run= run;
3986                             last_level= level-64;
3987                             last_i= i+1;
3988                         }
3989                     }
3990                 }
3991             }
3992         }
3993
3994         score_tab[i+1]= best_score;
3995
3996         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3997         if(last_non_zero <= 27){
3998             for(; survivor_count; survivor_count--){
3999                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4000                     break;
4001             }
4002         }else{
4003             for(; survivor_count; survivor_count--){
4004                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4005                     break;
4006             }
4007         }
4008
4009         survivor[ survivor_count++ ]= i+1;
4010     }
4011
4012     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4013         last_score= 256*256*256*120;
4014         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4015             int score= score_tab[i];
4016             if(i) score += lambda*2; //FIXME exacter?
4017
4018             if(score < last_score){
4019                 last_score= score;
4020                 last_i= i;
4021                 last_level= level_tab[i];
4022                 last_run= run_tab[i];
4023             }
4024         }
4025     }
4026
4027     s->coded_score[n] = last_score;
4028
4029     dc= FFABS(block[0]);
4030     last_non_zero= last_i - 1;
4031     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4032
4033     if(last_non_zero < start_i)
4034         return last_non_zero;
4035
4036     if(last_non_zero == 0 && start_i == 0){
4037         int best_level= 0;
4038         int best_score= dc * dc;
4039
4040         for(i=0; i<coeff_count[0]; i++){
4041             int level= coeff[i][0];
4042             int alevel= FFABS(level);
4043             int unquant_coeff, score, distortion;
4044
4045             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4046                     unquant_coeff= (alevel*qmul + qadd)>>3;
4047             }else{ //MPEG1
4048                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4049                     unquant_coeff =   (unquant_coeff - 1) | 1;
4050             }
4051             unquant_coeff = (unquant_coeff + 4) >> 3;
4052             unquant_coeff<<= 3 + 3;
4053
4054             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4055             level+=64;
4056             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4057             else                    score= distortion + esc_length*lambda;
4058
4059             if(score < best_score){
4060                 best_score= score;
4061                 best_level= level - 64;
4062             }
4063         }
4064         block[0]= best_level;
4065         s->coded_score[n] = best_score - dc*dc;
4066         if(best_level == 0) return -1;
4067         else                return last_non_zero;
4068     }
4069
4070     i= last_i;
4071     av_assert2(last_level);
4072
4073     block[ perm_scantable[last_non_zero] ]= last_level;
4074     i -= last_run + 1;
4075
4076     for(; i>start_i; i -= run_tab[i] + 1){
4077         block[ perm_scantable[i-1] ]= level_tab[i];
4078     }
4079
4080     return last_non_zero;
4081 }
4082
4083 //#define REFINE_STATS 1
4084 static int16_t basis[64][64];
4085
4086 static void build_basis(uint8_t *perm){
4087     int i, j, x, y;
4088     emms_c();
4089     for(i=0; i<8; i++){
4090         for(j=0; j<8; j++){
4091             for(y=0; y<8; y++){
4092                 for(x=0; x<8; x++){
4093                     double s= 0.25*(1<<BASIS_SHIFT);
4094                     int index= 8*i + j;
4095                     int perm_index= perm[index];
4096                     if(i==0) s*= sqrt(0.5);
4097                     if(j==0) s*= sqrt(0.5);
4098                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4099                 }
4100             }
4101         }
4102     }
4103 }
4104
4105 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4106                         int16_t *block, int16_t *weight, int16_t *orig,
4107                         int n, int qscale){
4108     int16_t rem[64];
4109     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4110     const uint8_t *scantable= s->intra_scantable.scantable;
4111     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4112 //    unsigned int threshold1, threshold2;
4113 //    int bias=0;
4114     int run_tab[65];
4115     int prev_run=0;
4116     int prev_level=0;
4117     int qmul, qadd, start_i, last_non_zero, i, dc;
4118     uint8_t * length;
4119     uint8_t * last_length;
4120     int lambda;
4121     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4122 #ifdef REFINE_STATS
4123 static int count=0;
4124 static int after_last=0;
4125 static int to_zero=0;
4126 static int from_zero=0;
4127 static int raise=0;
4128 static int lower=0;
4129 static int messed_sign=0;
4130 #endif
4131
4132     if(basis[0][0] == 0)
4133         build_basis(s->idsp.idct_permutation);
4134
4135     qmul= qscale*2;
4136     qadd= (qscale-1)|1;
4137     if (s->mb_intra) {
4138         if (!s->h263_aic) {
4139             if (n < 4)
4140                 q = s->y_dc_scale;
4141             else
4142                 q = s->c_dc_scale;
4143         } else{
4144             /* For AIC we skip quant/dequant of INTRADC */
4145             q = 1;
4146             qadd=0;
4147         }
4148         q <<= RECON_SHIFT-3;
4149         /* note: block[0] is assumed to be positive */
4150         dc= block[0]*q;
4151 //        block[0] = (block[0] + (q >> 1)) / q;
4152         start_i = 1;
4153 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4154 //            bias= 1<<(QMAT_SHIFT-1);
4155         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4156             length     = s->intra_chroma_ac_vlc_length;
4157             last_length= s->intra_chroma_ac_vlc_last_length;
4158         } else {
4159             length     = s->intra_ac_vlc_length;
4160             last_length= s->intra_ac_vlc_last_length;
4161         }
4162     } else {
4163         dc= 0;
4164         start_i = 0;
4165         length     = s->inter_ac_vlc_length;
4166         last_length= s->inter_ac_vlc_last_length;
4167     }
4168     last_non_zero = s->block_last_index[n];
4169
4170 #ifdef REFINE_STATS
4171 {START_TIMER
4172 #endif
4173     dc += (1<<(RECON_SHIFT-1));
4174     for(i=0; i<64; i++){
4175         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4176     }
4177 #ifdef REFINE_STATS
4178 STOP_TIMER("memset rem[]")}
4179 #endif
4180     sum=0;
4181     for(i=0; i<64; i++){
4182         int one= 36;
4183         int qns=4;
4184         int w;
4185
4186         w= FFABS(weight[i]) + qns*one;
4187         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4188
4189         weight[i] = w;
4190 //        w=weight[i] = (63*qns + (w/2)) / w;
4191
4192         av_assert2(w>0);
4193         av_assert2(w<(1<<6));
4194         sum += w*w;
4195     }
4196     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4197 #ifdef REFINE_STATS
4198 {START_TIMER
4199 #endif
4200     run=0;
4201     rle_index=0;
4202     for(i=start_i; i<=last_non_zero; i++){
4203         int j= perm_scantable[i];
4204         const int level= block[j];
4205         int coeff;
4206
4207         if(level){
4208             if(level<0) coeff= qmul*level - qadd;
4209             else        coeff= qmul*level + qadd;
4210             run_tab[rle_index++]=run;
4211             run=0;
4212
4213             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4214         }else{
4215             run++;
4216         }
4217     }
4218 #ifdef REFINE_STATS
4219 if(last_non_zero>0){
4220 STOP_TIMER("init rem[]")
4221 }
4222 }
4223
4224 {START_TIMER
4225 #endif
4226     for(;;){
4227         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4228         int best_coeff=0;
4229         int best_change=0;
4230         int run2, best_unquant_change=0, analyze_gradient;
4231 #ifdef REFINE_STATS
4232 {START_TIMER
4233 #endif
4234         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4235
4236         if(analyze_gradient){
4237 #ifdef REFINE_STATS
4238 {START_TIMER
4239 #endif
4240             for(i=0; i<64; i++){
4241                 int w= weight[i];
4242
4243                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4244             }
4245 #ifdef REFINE_STATS
4246 STOP_TIMER("rem*w*w")}
4247 {START_TIMER
4248 #endif
4249             s->fdsp.fdct(d1);
4250 #ifdef REFINE_STATS
4251 STOP_TIMER("dct")}
4252 #endif
4253         }
4254
4255         if(start_i){
4256             const int level= block[0];
4257             int change, old_coeff;
4258
4259             av_assert2(s->mb_intra);
4260
4261             old_coeff= q*level;
4262
4263             for(change=-1; change<=1; change+=2){
4264                 int new_level= level + change;
4265                 int score, new_coeff;
4266
4267                 new_coeff= q*new_level;
4268                 if(new_coeff >= 2048 || new_coeff < 0)
4269                     continue;
4270
4271                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4272                                                   new_coeff - old_coeff);
4273                 if(score<best_score){
4274                     best_score= score;
4275                     best_coeff= 0;
4276                     best_change= change;
4277                     best_unquant_change= new_coeff - old_coeff;
4278                 }
4279             }
4280         }
4281
4282         run=0;
4283         rle_index=0;
4284         run2= run_tab[rle_index++];
4285         prev_level=0;
4286         prev_run=0;
4287
4288         for(i=start_i; i<64; i++){
4289             int j= perm_scantable[i];
4290             const int level= block[j];
4291             int change, old_coeff;
4292
4293             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4294                 break;
4295
4296             if(level){
4297                 if(level<0) old_coeff= qmul*level - qadd;
4298                 else        old_coeff= qmul*level + qadd;
4299                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4300             }else{
4301                 old_coeff=0;
4302                 run2--;
4303                 av_assert2(run2>=0 || i >= last_non_zero );
4304             }
4305
4306             for(change=-1; change<=1; change+=2){
4307                 int new_level= level + change;
4308                 int score, new_coeff, unquant_change;
4309
4310                 score=0;
4311                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4312                    continue;
4313
4314                 if(new_level){
4315                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4316                     else            new_coeff= qmul*new_level + qadd;
4317                     if(new_coeff >= 2048 || new_coeff <= -2048)
4318                         continue;
4319                     //FIXME check for overflow
4320
4321                     if(level){
4322                         if(level < 63 && level > -63){
4323                             if(i < last_non_zero)
4324                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4325                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4326                             else
4327                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4328                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4329                         }
4330                     }else{
4331                         av_assert2(FFABS(new_level)==1);
4332
4333                         if(analyze_gradient){
4334                             int g= d1[ scantable[i] ];
4335                             if(g && (g^new_level) >= 0)
4336                                 continue;
4337                         }
4338
4339                         if(i < last_non_zero){
4340                             int next_i= i + run2 + 1;
4341                             int next_level= block[ perm_scantable[next_i] ] + 64;
4342
4343                             if(next_level&(~127))
4344                                 next_level= 0;
4345
4346                             if(next_i < last_non_zero)
4347                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4348                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4349                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4350                             else
4351                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4352                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4353                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4354                         }else{
4355                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4356                             if(prev_level){
4357                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4358                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4359                             }
4360                         }
4361                     }
4362                 }else{
4363                     new_coeff=0;
4364                     av_assert2(FFABS(level)==1);
4365
4366                     if(i < last_non_zero){
4367                         int next_i= i + run2 + 1;
4368                         int next_level= block[ perm_scantable[next_i] ] + 64;
4369
4370                         if(next_level&(~127))
4371                             next_level= 0;
4372
4373                         if(next_i < last_non_zero)
4374                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4375                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4376                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4377                         else
4378                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4379                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4380                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4381                     }else{
4382                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4383                         if(prev_level){
4384                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4385                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4386                         }
4387                     }
4388                 }
4389
4390                 score *= lambda;
4391
4392                 unquant_change= new_coeff - old_coeff;
4393                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4394
4395                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4396                                                    unquant_change);
4397                 if(score<best_score){
4398                     best_score= score;
4399                     best_coeff= i;
4400                     best_change= change;
4401                     best_unquant_change= unquant_change;
4402                 }
4403             }
4404             if(level){
4405                 prev_level= level + 64;
4406                 if(prev_level&(~127))
4407                     prev_level= 0;
4408                 prev_run= run;
4409                 run=0;
4410             }else{
4411                 run++;
4412             }
4413         }
4414 #ifdef REFINE_STATS
4415 STOP_TIMER("iterative step")}
4416 #endif
4417
4418         if(best_change){
4419             int j= perm_scantable[ best_coeff ];
4420
4421             block[j] += best_change;
4422
4423             if(best_coeff > last_non_zero){
4424                 last_non_zero= best_coeff;
4425                 av_assert2(block[j]);
4426 #ifdef REFINE_STATS
4427 after_last++;
4428 #endif
4429             }else{
4430 #ifdef REFINE_STATS
4431 if(block[j]){
4432     if(block[j] - best_change){
4433         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4434             raise++;
4435         }else{
4436             lower++;
4437         }
4438     }else{
4439         from_zero++;
4440     }
4441 }else{
4442     to_zero++;
4443 }
4444 #endif
4445                 for(; last_non_zero>=start_i; last_non_zero--){
4446                     if(block[perm_scantable[last_non_zero]])
4447                         break;
4448                 }
4449             }
4450 #ifdef REFINE_STATS
4451 count++;
4452 if(256*256*256*64 % count == 0){
4453     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4454 }
4455 #endif
4456             run=0;
4457             rle_index=0;
4458             for(i=start_i; i<=last_non_zero; i++){
4459                 int j= perm_scantable[i];
4460                 const int level= block[j];
4461
4462                  if(level){
4463                      run_tab[rle_index++]=run;
4464                      run=0;
4465                  }else{
4466                      run++;
4467                  }
4468             }
4469
4470             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4471         }else{
4472             break;
4473         }
4474     }
4475 #ifdef REFINE_STATS
4476 if(last_non_zero>0){
4477 STOP_TIMER("iterative search")
4478 }
4479 }
4480 #endif
4481
4482     return last_non_zero;
4483 }
4484
4485 int ff_dct_quantize_c(MpegEncContext *s,
4486                         int16_t *block, int n,
4487                         int qscale, int *overflow)
4488 {
4489     int i, j, level, last_non_zero, q, start_i;
4490     const int *qmat;
4491     const uint8_t *scantable= s->intra_scantable.scantable;
4492     int bias;
4493     int max=0;
4494     unsigned int threshold1, threshold2;
4495
4496     s->fdsp.fdct(block);
4497
4498     if(s->dct_error_sum)
4499         s->denoise_dct(s, block);
4500
4501     if (s->mb_intra) {
4502         if (!s->h263_aic) {
4503             if (n < 4)
4504                 q = s->y_dc_scale;
4505             else
4506                 q = s->c_dc_scale;
4507             q = q << 3;
4508         } else
4509             /* For AIC we skip quant/dequant of INTRADC */
4510             q = 1 << 3;
4511
4512         /* note: block[0] is assumed to be positive */
4513         block[0] = (block[0] + (q >> 1)) / q;
4514         start_i = 1;
4515         last_non_zero = 0;
4516         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4517         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4518     } else {
4519         start_i = 0;
4520         last_non_zero = -1;
4521         qmat = s->q_inter_matrix[qscale];
4522         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4523     }
4524     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4525     threshold2= (threshold1<<1);
4526     for(i=63;i>=start_i;i--) {
4527         j = scantable[i];
4528         level = block[j] * qmat[j];
4529
4530         if(((unsigned)(level+threshold1))>threshold2){
4531             last_non_zero = i;
4532             break;
4533         }else{
4534             block[j]=0;
4535         }
4536     }
4537     for(i=start_i; i<=last_non_zero; i++) {
4538         j = scantable[i];
4539         level = block[j] * qmat[j];
4540
4541 //        if(   bias+level >= (1<<QMAT_SHIFT)
4542 //           || bias-level >= (1<<QMAT_SHIFT)){
4543         if(((unsigned)(level+threshold1))>threshold2){
4544             if(level>0){
4545                 level= (bias + level)>>QMAT_SHIFT;
4546                 block[j]= level;
4547             }else{
4548                 level= (bias - level)>>QMAT_SHIFT;
4549                 block[j]= -level;
4550             }
4551             max |=level;
4552         }else{
4553             block[j]=0;
4554         }
4555     }
4556     *overflow= s->max_qcoeff < max; //overflow might have happened
4557
4558     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4559     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4560         ff_block_permute(block, s->idsp.idct_permutation,
4561                          scantable, last_non_zero);
4562
4563     return last_non_zero;
4564 }
4565
4566 #define OFFSET(x) offsetof(MpegEncContext, x)
4567 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4568 static const AVOption h263_options[] = {
4569     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4570     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4571     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4572     FF_MPV_COMMON_OPTS
4573     { NULL },
4574 };
4575
4576 static const AVClass h263_class = {
4577     .class_name = "H.263 encoder",
4578     .item_name  = av_default_item_name,
4579     .option     = h263_options,
4580     .version    = LIBAVUTIL_VERSION_INT,
4581 };
4582
4583 AVCodec ff_h263_encoder = {
4584     .name           = "h263",
4585     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4586     .type           = AVMEDIA_TYPE_VIDEO,
4587     .id             = AV_CODEC_ID_H263,
4588     .priv_data_size = sizeof(MpegEncContext),
4589     .init           = ff_mpv_encode_init,
4590     .encode2        = ff_mpv_encode_picture,
4591     .close          = ff_mpv_encode_end,
4592     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4593     .priv_class     = &h263_class,
4594 };
4595
4596 static const AVOption h263p_options[] = {
4597     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4598     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4599     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4600     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4601     FF_MPV_COMMON_OPTS
4602     { NULL },
4603 };
4604 static const AVClass h263p_class = {
4605     .class_name = "H.263p encoder",
4606     .item_name  = av_default_item_name,
4607     .option     = h263p_options,
4608     .version    = LIBAVUTIL_VERSION_INT,
4609 };
4610
4611 AVCodec ff_h263p_encoder = {
4612     .name           = "h263p",
4613     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4614     .type           = AVMEDIA_TYPE_VIDEO,
4615     .id             = AV_CODEC_ID_H263P,
4616     .priv_data_size = sizeof(MpegEncContext),
4617     .init           = ff_mpv_encode_init,
4618     .encode2        = ff_mpv_encode_picture,
4619     .close          = ff_mpv_encode_end,
4620     .capabilities   = CODEC_CAP_SLICE_THREADS,
4621     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4622     .priv_class     = &h263p_class,
4623 };
4624
4625 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4626
4627 AVCodec ff_msmpeg4v2_encoder = {
4628     .name           = "msmpeg4v2",
4629     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4630     .type           = AVMEDIA_TYPE_VIDEO,
4631     .id             = AV_CODEC_ID_MSMPEG4V2,
4632     .priv_data_size = sizeof(MpegEncContext),
4633     .init           = ff_mpv_encode_init,
4634     .encode2        = ff_mpv_encode_picture,
4635     .close          = ff_mpv_encode_end,
4636     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4637     .priv_class     = &msmpeg4v2_class,
4638 };
4639
4640 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4641
4642 AVCodec ff_msmpeg4v3_encoder = {
4643     .name           = "msmpeg4",
4644     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4645     .type           = AVMEDIA_TYPE_VIDEO,
4646     .id             = AV_CODEC_ID_MSMPEG4V3,
4647     .priv_data_size = sizeof(MpegEncContext),
4648     .init           = ff_mpv_encode_init,
4649     .encode2        = ff_mpv_encode_picture,
4650     .close          = ff_mpv_encode_end,
4651     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4652     .priv_class     = &msmpeg4v3_class,
4653 };
4654
4655 FF_MPV_GENERIC_CLASS(wmv1)
4656
4657 AVCodec ff_wmv1_encoder = {
4658     .name           = "wmv1",
4659     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4660     .type           = AVMEDIA_TYPE_VIDEO,
4661     .id             = AV_CODEC_ID_WMV1,
4662     .priv_data_size = sizeof(MpegEncContext),
4663     .init           = ff_mpv_encode_init,
4664     .encode2        = ff_mpv_encode_picture,
4665     .close          = ff_mpv_encode_end,
4666     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4667     .priv_class     = &wmv1_class,
4668 };