git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "idctdsp.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mjpegenc_common.h"
  46 #include "mathops.h"
  47 #include "mpegutils.h"
  48 #include "mjpegenc.h"
  49 #include "msmpeg4.h"
  50 #include "pixblockdsp.h"
  51 #include "qpeldsp.h"
  52 #include "faandct.h"
  53 #include "thread.h"
  54 #include "aandcttab.h"
  55 #include "flv.h"
  56 #include "mpeg4video.h"
  57 #include "internal.h"
  58 #include "bytestream.h"
  59 #include "wmv2.h"
  60 #include <limits.h>
  61 #include "sp5x.h"
  62
  63 #define QUANT_BIAS_SHIFT 8
  64
  65 #define QMAT_SHIFT_MMX 16
  66 #define QMAT_SHIFT 21
  67
  68 static int encode_picture(MpegEncContext *s, int picture_number);
  69 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  70 static int sse_mb(MpegEncContext *s);
  71 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  72 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  73
  74 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  75 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  76
  77 const AVOption ff_mpv_generic_options[] = {
  78     FF_MPV_COMMON_OPTS
  79     { NULL },
  80 };
  81
  82 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
  83                        uint16_t (*qmat16)[2][64],
  84                        const uint16_t *quant_matrix,
  85                        int bias, int qmin, int qmax, int intra)
  86 {
  87     FDCTDSPContext *fdsp = &s->fdsp;
  88     int qscale;
  89     int shift = 0;
  90
  91     for (qscale = qmin; qscale <= qmax; qscale++) {
  92         int i;
  93         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
  94 #if CONFIG_FAANDCT
  95             fdsp->fdct == ff_faandct            ||
  96 #endif /* CONFIG_FAANDCT */
  97             fdsp->fdct == ff_jpeg_fdct_islow_10) {
  98             for (i = 0; i < 64; i++) {
  99                 const int j = s->idsp.idct_permutation[i];
 100                 int64_t den = (int64_t) qscale * quant_matrix[j];
 101                 /* 16 <= qscale * quant_matrix[i] <= 7905
 102                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 103                  *             19952 <=              x  <= 249205026
 104                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 105                  *           3444240 >= (1 << 36) / (x) >= 275 */
 106
 107                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 108             }
 109         } else if (fdsp->fdct == ff_fdct_ifast) {
 110             for (i = 0; i < 64; i++) {
 111                 const int j = s->idsp.idct_permutation[i];
 112                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
 113                 /* 16 <= qscale * quant_matrix[i] <= 7905
 114                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 115                  *             19952 <=              x  <= 249205026
 116                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 117                  *           3444240 >= (1 << 36) / (x) >= 275 */
 118
 119                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
 120             }
 121         } else {
 122             for (i = 0; i < 64; i++) {
 123                 const int j = s->idsp.idct_permutation[i];
 124                 int64_t den = (int64_t) qscale * quant_matrix[j];
 125                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 126                  * Assume x = qscale * quant_matrix[i]
 127                  * So             16 <=              x  <= 7905
 128                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 129                  * so          32768 >= (1 << 19) / (x) >= 67 */
 130                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
 131                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 132                 //                    (qscale * quant_matrix[i]);
 133                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
 134
 135                 if (qmat16[qscale][0][i] == 0 ||
 136                     qmat16[qscale][0][i] == 128 * 256)
 137                     qmat16[qscale][0][i] = 128 * 256 - 1;
 138                 qmat16[qscale][1][i] =
 139                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 140                                 qmat16[qscale][0][i]);
 141             }
 142         }
 143
 144         for (i = intra; i < 64; i++) {
 145             int64_t max = 8191;
 146             if (fdsp->fdct == ff_fdct_ifast) {
 147                 max = (8191LL * ff_aanscales[i]) >> 14;
 148             }
 149             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 150                 shift++;
 151             }
 152         }
 153     }
 154     if (shift) {
 155         av_log(NULL, AV_LOG_INFO,
 156                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 157                QMAT_SHIFT - shift);
 158     }
 159 }
 160
 161 static inline void update_qscale(MpegEncContext *s)
 162 {
 163     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 164                 (FF_LAMBDA_SHIFT + 7);
 165     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 166
 167     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 168                  FF_LAMBDA_SHIFT;
 169 }
 170
 171 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 172 {
 173     int i;
 174
 175     if (matrix) {
 176         put_bits(pb, 1, 1);
 177         for (i = 0; i < 64; i++) {
 178             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 179         }
 180     } else
 181         put_bits(pb, 1, 0);
 182 }
 183
 184 /**
 185  * init s->current_picture.qscale_table from s->lambda_table
 186  */
 187 void ff_init_qscale_tab(MpegEncContext *s)
 188 {
 189     int8_t * const qscale_table = s->current_picture.qscale_table;
 190     int i;
 191
 192     for (i = 0; i < s->mb_num; i++) {
 193         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 194         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 195         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 196                                                   s->avctx->qmax);
 197     }
 198 }
 199
 200 static void update_duplicate_context_after_me(MpegEncContext *dst,
 201                                               MpegEncContext *src)
 202 {
 203 #define COPY(a) dst->a= src->a
 204     COPY(pict_type);
 205     COPY(current_picture);
 206     COPY(f_code);
 207     COPY(b_code);
 208     COPY(qscale);
 209     COPY(lambda);
 210     COPY(lambda2);
 211     COPY(picture_in_gop_number);
 212     COPY(gop_picture_number);
 213     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 214     COPY(progressive_frame);    // FIXME don't set in encode_header
 215     COPY(partitioned_frame);    // FIXME don't set in encode_header
 216 #undef COPY
 217 }
 218
 219 /**
 220  * Set the given MpegEncContext to defaults for encoding.
 221  * the changed fields will not depend upon the prior state of the MpegEncContext.
 222  */
 223 static void mpv_encode_defaults(MpegEncContext *s)
 224 {
 225     int i;
 226     ff_mpv_common_defaults(s);
 227
 228     for (i = -16; i < 16; i++) {
 229         default_fcode_tab[i + MAX_MV] = 1;
 230     }
 231     s->me.mv_penalty = default_mv_penalty;
 232     s->fcode_tab     = default_fcode_tab;
 233
 234     s->input_picture_number  = 0;
 235     s->picture_in_gop_number = 0;
 236 }
 237
 238 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 239     if (ARCH_X86)
 240         ff_dct_encode_init_x86(s);
 241
 242     if (CONFIG_H263_ENCODER)
 243         ff_h263dsp_init(&s->h263dsp);
 244     if (!s->dct_quantize)
 245         s->dct_quantize = ff_dct_quantize_c;
 246     if (!s->denoise_dct)
 247         s->denoise_dct  = denoise_dct_c;
 248     s->fast_dct_quantize = s->dct_quantize;
 249     if (s->avctx->trellis)
 250         s->dct_quantize  = dct_quantize_trellis_c;
 251
 252     return 0;
 253 }
 254
 255 /* init video encoder */
 256 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 257 {
 258     MpegEncContext *s = avctx->priv_data;
 259     int i, ret, format_supported;
 260
 261     mpv_encode_defaults(s);
 262
 263     switch (avctx->codec_id) {
 264     case AV_CODEC_ID_MPEG2VIDEO:
 265         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 266             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 267             av_log(avctx, AV_LOG_ERROR,
 268                    "only YUV420 and YUV422 are supported\n");
 269             return -1;
 270         }
 271         break;
 272     case AV_CODEC_ID_MJPEG:
 273     case AV_CODEC_ID_AMV:
 274         format_supported = 0;
 275         /* JPEG color space */
 276         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
 277             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
 278             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
 279             (avctx->color_range == AVCOL_RANGE_JPEG &&
 280              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 281               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 282               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
 283             format_supported = 1;
 284         /* MPEG color space */
 285         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
 286                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
 287                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
 288                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
 289             format_supported = 1;
 290
 291         if (!format_supported) {
 292             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 293             return -1;
 294         }
 295         break;
 296     default:
 297         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 298             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 299             return -1;
 300         }
 301     }
 302
 303     switch (avctx->pix_fmt) {
 304     case AV_PIX_FMT_YUVJ444P:
 305     case AV_PIX_FMT_YUV444P:
 306         s->chroma_format = CHROMA_444;
 307         break;
 308     case AV_PIX_FMT_YUVJ422P:
 309     case AV_PIX_FMT_YUV422P:
 310         s->chroma_format = CHROMA_422;
 311         break;
 312     case AV_PIX_FMT_YUVJ420P:
 313     case AV_PIX_FMT_YUV420P:
 314     default:
 315         s->chroma_format = CHROMA_420;
 316         break;
 317     }
 318
 319     s->bit_rate = avctx->bit_rate;
 320     s->width    = avctx->width;
 321     s->height   = avctx->height;
 322     if (avctx->gop_size > 600 &&
 323         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 324         av_log(avctx, AV_LOG_WARNING,
 325                "keyframe interval too large!, reducing it from %d to %d\n",
 326                avctx->gop_size, 600);
 327         avctx->gop_size = 600;
 328     }
 329     s->gop_size     = avctx->gop_size;
 330     s->avctx        = avctx;
 331     if (avctx->max_b_frames > MAX_B_FRAMES) {
 332         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 333                "is %d.\n", MAX_B_FRAMES);
 334         avctx->max_b_frames = MAX_B_FRAMES;
 335     }
 336     s->max_b_frames = avctx->max_b_frames;
 337     s->codec_id     = avctx->codec->id;
 338     s->strict_std_compliance = avctx->strict_std_compliance;
 339     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 340     s->mpeg_quant         = avctx->mpeg_quant;
 341     s->rtp_mode           = !!avctx->rtp_payload_size;
 342     s->intra_dc_precision = avctx->intra_dc_precision;
 343
 344     // workaround some differences between how applications specify dc precision
 345     if (s->intra_dc_precision < 0) {
 346         s->intra_dc_precision += 8;
 347     } else if (s->intra_dc_precision >= 8)
 348         s->intra_dc_precision -= 8;
 349
 350     if (s->intra_dc_precision < 0) {
 351         av_log(avctx, AV_LOG_ERROR,
 352                 "intra dc precision must be positive, note some applications use"
 353                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 354         return AVERROR(EINVAL);
 355     }
 356
 357     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 358         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 359         return AVERROR(EINVAL);
 360     }
 361     s->user_specified_pts = AV_NOPTS_VALUE;
 362
 363     if (s->gop_size <= 1) {
 364         s->intra_only = 1;
 365         s->gop_size   = 12;
 366     } else {
 367         s->intra_only = 0;
 368     }
 369
 370     s->me_method = avctx->me_method;
 371
 372     /* Fixed QSCALE */
 373     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 374
 375 #if FF_API_MPV_OPT
 376     FF_DISABLE_DEPRECATION_WARNINGS
 377     if (avctx->border_masking != 0.0)
 378         s->border_masking = avctx->border_masking;
 379     FF_ENABLE_DEPRECATION_WARNINGS
 380 #endif
 381
 382     s->adaptive_quant = (s->avctx->lumi_masking ||
 383                          s->avctx->dark_masking ||
 384                          s->avctx->temporal_cplx_masking ||
 385                          s->avctx->spatial_cplx_masking  ||
 386                          s->avctx->p_masking      ||
 387                          s->border_masking ||
 388                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 389                         !s->fixed_qscale;
 390
 391     s->loop_filter = !!(s->avctx->flags & CODEC_FLAG_LOOP_FILTER);
 392
 393     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 394         switch(avctx->codec_id) {
 395         case AV_CODEC_ID_MPEG1VIDEO:
 396         case AV_CODEC_ID_MPEG2VIDEO:
 397             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 398             break;
 399         case AV_CODEC_ID_MPEG4:
 400         case AV_CODEC_ID_MSMPEG4V1:
 401         case AV_CODEC_ID_MSMPEG4V2:
 402         case AV_CODEC_ID_MSMPEG4V3:
 403             if       (avctx->rc_max_rate >= 15000000) {
 404                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 405             } else if(avctx->rc_max_rate >=  2000000) {
 406                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 407             } else if(avctx->rc_max_rate >=   384000) {
 408                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 409             } else
 410                 avctx->rc_buffer_size = 40;
 411             avctx->rc_buffer_size *= 16384;
 412             break;
 413         }
 414         if (avctx->rc_buffer_size) {
 415             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 416         }
 417     }
 418
 419     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 420         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 421         return -1;
 422     }
 423
 424     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 425         av_log(avctx, AV_LOG_INFO,
 426                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 427     }
 428
 429     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 430         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 431         return -1;
 432     }
 433
 434     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 435         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 436         return -1;
 437     }
 438
 439     if (avctx->rc_max_rate &&
 440         avctx->rc_max_rate == avctx->bit_rate &&
 441         avctx->rc_max_rate != avctx->rc_min_rate) {
 442         av_log(avctx, AV_LOG_INFO,
 443                "impossible bitrate constraints, this will fail\n");
 444     }
 445
 446     if (avctx->rc_buffer_size &&
 447         avctx->bit_rate * (int64_t)avctx->time_base.num >
 448             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 449         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 450         return -1;
 451     }
 452
 453     if (!s->fixed_qscale &&
 454         avctx->bit_rate * av_q2d(avctx->time_base) >
 455             avctx->bit_rate_tolerance) {
 456         av_log(avctx, AV_LOG_WARNING,
 457                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 458         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 459     }
 460
 461     if (s->avctx->rc_max_rate &&
 462         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 463         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 464          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 465         90000LL * (avctx->rc_buffer_size - 1) >
 466             s->avctx->rc_max_rate * 0xFFFFLL) {
 467         av_log(avctx, AV_LOG_INFO,
 468                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 469                "specified vbv buffer is too large for the given bitrate!\n");
 470     }
 471
 472     if ((s->avctx->flags & CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 473         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 474         s->codec_id != AV_CODEC_ID_FLV1) {
 475         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 476         return -1;
 477     }
 478
 479     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 480         av_log(avctx, AV_LOG_ERROR,
 481                "OBMC is only supported with simple mb decision\n");
 482         return -1;
 483     }
 484
 485     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 486         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 487         return -1;
 488     }
 489
 490     if (s->max_b_frames                    &&
 491         s->codec_id != AV_CODEC_ID_MPEG4      &&
 492         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 493         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 494         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 495         return -1;
 496     }
 497     if (s->max_b_frames < 0) {
 498         av_log(avctx, AV_LOG_ERROR,
 499                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 500         return -1;
 501     }
 502
 503     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 504          s->codec_id == AV_CODEC_ID_H263  ||
 505          s->codec_id == AV_CODEC_ID_H263P) &&
 506         (avctx->sample_aspect_ratio.num > 255 ||
 507          avctx->sample_aspect_ratio.den > 255)) {
 508         av_log(avctx, AV_LOG_WARNING,
 509                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 510                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 511         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 512                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 513     }
 514
 515     if ((s->codec_id == AV_CODEC_ID_H263  ||
 516          s->codec_id == AV_CODEC_ID_H263P) &&
 517         (avctx->width  > 2048 ||
 518          avctx->height > 1152 )) {
 519         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 520         return -1;
 521     }
 522     if ((s->codec_id == AV_CODEC_ID_H263  ||
 523          s->codec_id == AV_CODEC_ID_H263P) &&
 524         ((avctx->width &3) ||
 525          (avctx->height&3) )) {
 526         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 527         return -1;
 528     }
 529
 530     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 531         (avctx->width  > 4095 ||
 532          avctx->height > 4095 )) {
 533         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 534         return -1;
 535     }
 536
 537     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 538         (avctx->width  > 16383 ||
 539          avctx->height > 16383 )) {
 540         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 541         return -1;
 542     }
 543
 544     if (s->codec_id == AV_CODEC_ID_RV10 &&
 545         (avctx->width &15 ||
 546          avctx->height&15 )) {
 547         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 548         return AVERROR(EINVAL);
 549     }
 550
 551     if (s->codec_id == AV_CODEC_ID_RV20 &&
 552         (avctx->width &3 ||
 553          avctx->height&3 )) {
 554         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 555         return AVERROR(EINVAL);
 556     }
 557
 558     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 559          s->codec_id == AV_CODEC_ID_WMV2) &&
 560          avctx->width & 1) {
 561          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 562          return -1;
 563     }
 564
 565     if ((s->avctx->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 566         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 567         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 568         return -1;
 569     }
 570
 571     // FIXME mpeg2 uses that too
 572     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 573                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 574         av_log(avctx, AV_LOG_ERROR,
 575                "mpeg2 style quantization not supported by codec\n");
 576         return -1;
 577     }
 578
 579     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 580         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 581         return -1;
 582     }
 583
 584     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 585         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 586         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 587         return -1;
 588     }
 589
 590     if (s->avctx->scenechange_threshold < 1000000000 &&
 591         (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)) {
 592         av_log(avctx, AV_LOG_ERROR,
 593                "closed gop with scene change detection are not supported yet, "
 594                "set threshold to 1000000000\n");
 595         return -1;
 596     }
 597
 598     if (s->avctx->flags & CODEC_FLAG_LOW_DELAY) {
 599         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 600             av_log(avctx, AV_LOG_ERROR,
 601                   "low delay forcing is only available for mpeg2\n");
 602             return -1;
 603         }
 604         if (s->max_b_frames != 0) {
 605             av_log(avctx, AV_LOG_ERROR,
 606                    "b frames cannot be used with low delay\n");
 607             return -1;
 608         }
 609     }
 610
 611     if (s->q_scale_type == 1) {
 612         if (avctx->qmax > 12) {
 613             av_log(avctx, AV_LOG_ERROR,
 614                    "non linear quant only supports qmax <= 12 currently\n");
 615             return -1;
 616         }
 617     }
 618
 619     if (s->avctx->thread_count > 1         &&
 620         s->codec_id != AV_CODEC_ID_MPEG4      &&
 621         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 622         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 623         s->codec_id != AV_CODEC_ID_MJPEG      &&
 624         (s->codec_id != AV_CODEC_ID_H263P)) {
 625         av_log(avctx, AV_LOG_ERROR,
 626                "multi threaded encoding not supported by codec\n");
 627         return -1;
 628     }
 629
 630     if (s->avctx->thread_count < 1) {
 631         av_log(avctx, AV_LOG_ERROR,
 632                "automatic thread number detection not supported by codec, "
 633                "patch welcome\n");
 634         return -1;
 635     }
 636
 637     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 638         s->rtp_mode = 1;
 639
 640     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 641         s->h263_slice_structured = 1;
 642
 643     if (!avctx->time_base.den || !avctx->time_base.num) {
 644         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 645         return -1;
 646     }
 647
 648     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 649         av_log(avctx, AV_LOG_INFO,
 650                "notice: b_frame_strategy only affects the first pass\n");
 651         avctx->b_frame_strategy = 0;
 652     }
 653
 654     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 655     if (i > 1) {
 656         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 657         avctx->time_base.den /= i;
 658         avctx->time_base.num /= i;
 659         //return -1;
 660     }
 661
 662     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 663         // (a + x * 3 / 8) / x
 664         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 665         s->inter_quant_bias = 0;
 666     } else {
 667         s->intra_quant_bias = 0;
 668         // (a - x / 4) / x
 669         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 670     }
 671
 672     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 673         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 674         return AVERROR(EINVAL);
 675     }
 676
 677     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 678         s->intra_quant_bias = avctx->intra_quant_bias;
 679     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 680         s->inter_quant_bias = avctx->inter_quant_bias;
 681
 682     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 683
 684     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 685         s->avctx->time_base.den > (1 << 16) - 1) {
 686         av_log(avctx, AV_LOG_ERROR,
 687                "timebase %d/%d not supported by MPEG 4 standard, "
 688                "the maximum admitted value for the timebase denominator "
 689                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 690                (1 << 16) - 1);
 691         return -1;
 692     }
 693     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 694
 695     switch (avctx->codec->id) {
 696     case AV_CODEC_ID_MPEG1VIDEO:
 697         s->out_format = FMT_MPEG1;
 698         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
 699         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 700         break;
 701     case AV_CODEC_ID_MPEG2VIDEO:
 702         s->out_format = FMT_MPEG1;
 703         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
 704         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 705         s->rtp_mode   = 1;
 706         break;
 707     case AV_CODEC_ID_MJPEG:
 708     case AV_CODEC_ID_AMV:
 709         s->out_format = FMT_MJPEG;
 710         s->intra_only = 1; /* force intra only for jpeg */
 711         if (!CONFIG_MJPEG_ENCODER ||
 712             ff_mjpeg_encode_init(s) < 0)
 713             return -1;
 714         avctx->delay = 0;
 715         s->low_delay = 1;
 716         break;
 717     case AV_CODEC_ID_H261:
 718         if (!CONFIG_H261_ENCODER)
 719             return -1;
 720         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 721             av_log(avctx, AV_LOG_ERROR,
 722                    "The specified picture size of %dx%d is not valid for the "
 723                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 724                     s->width, s->height);
 725             return -1;
 726         }
 727         s->out_format = FMT_H261;
 728         avctx->delay  = 0;
 729         s->low_delay  = 1;
 730         s->rtp_mode   = 0; /* Sliced encoding not supported */
 731         break;
 732     case AV_CODEC_ID_H263:
 733         if (!CONFIG_H263_ENCODER)
 734             return -1;
 735         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 736                              s->width, s->height) == 8) {
 737             av_log(avctx, AV_LOG_ERROR,
 738                    "The specified picture size of %dx%d is not valid for "
 739                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 740                    "352x288, 704x576, and 1408x1152. "
 741                    "Try H.263+.\n", s->width, s->height);
 742             return -1;
 743         }
 744         s->out_format = FMT_H263;
 745         avctx->delay  = 0;
 746         s->low_delay  = 1;
 747         break;
 748     case AV_CODEC_ID_H263P:
 749         s->out_format = FMT_H263;
 750         s->h263_plus  = 1;
 751         /* Fx */
 752         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 753         s->modified_quant  = s->h263_aic;
 754         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 755         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 756
 757         /* /Fx */
 758         /* These are just to be sure */
 759         avctx->delay = 0;
 760         s->low_delay = 1;
 761         break;
 762     case AV_CODEC_ID_FLV1:
 763         s->out_format      = FMT_H263;
 764         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 765         s->unrestricted_mv = 1;
 766         s->rtp_mode  = 0; /* don't allow GOB */
 767         avctx->delay = 0;
 768         s->low_delay = 1;
 769         break;
 770     case AV_CODEC_ID_RV10:
 771         s->out_format = FMT_H263;
 772         avctx->delay  = 0;
 773         s->low_delay  = 1;
 774         break;
 775     case AV_CODEC_ID_RV20:
 776         s->out_format      = FMT_H263;
 777         avctx->delay       = 0;
 778         s->low_delay       = 1;
 779         s->modified_quant  = 1;
 780         s->h263_aic        = 1;
 781         s->h263_plus       = 1;
 782         s->loop_filter     = 1;
 783         s->unrestricted_mv = 0;
 784         break;
 785     case AV_CODEC_ID_MPEG4:
 786         s->out_format      = FMT_H263;
 787         s->h263_pred       = 1;
 788         s->unrestricted_mv = 1;
 789         s->low_delay       = s->max_b_frames ? 0 : 1;
 790         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 791         break;
 792     case AV_CODEC_ID_MSMPEG4V2:
 793         s->out_format      = FMT_H263;
 794         s->h263_pred       = 1;
 795         s->unrestricted_mv = 1;
 796         s->msmpeg4_version = 2;
 797         avctx->delay       = 0;
 798         s->low_delay       = 1;
 799         break;
 800     case AV_CODEC_ID_MSMPEG4V3:
 801         s->out_format        = FMT_H263;
 802         s->h263_pred         = 1;
 803         s->unrestricted_mv   = 1;
 804         s->msmpeg4_version   = 3;
 805         s->flipflop_rounding = 1;
 806         avctx->delay         = 0;
 807         s->low_delay         = 1;
 808         break;
 809     case AV_CODEC_ID_WMV1:
 810         s->out_format        = FMT_H263;
 811         s->h263_pred         = 1;
 812         s->unrestricted_mv   = 1;
 813         s->msmpeg4_version   = 4;
 814         s->flipflop_rounding = 1;
 815         avctx->delay         = 0;
 816         s->low_delay         = 1;
 817         break;
 818     case AV_CODEC_ID_WMV2:
 819         s->out_format        = FMT_H263;
 820         s->h263_pred         = 1;
 821         s->unrestricted_mv   = 1;
 822         s->msmpeg4_version   = 5;
 823         s->flipflop_rounding = 1;
 824         avctx->delay         = 0;
 825         s->low_delay         = 1;
 826         break;
 827     default:
 828         return -1;
 829     }
 830
 831     avctx->has_b_frames = !s->low_delay;
 832
 833     s->encoding = 1;
 834
 835     s->progressive_frame    =
 836     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 837                                                 CODEC_FLAG_INTERLACED_ME) ||
 838                                 s->alternate_scan);
 839
 840     /* init */
 841     ff_mpv_idct_init(s);
 842     if (ff_mpv_common_init(s) < 0)
 843         return -1;
 844
 845     ff_fdctdsp_init(&s->fdsp, avctx);
 846     ff_me_cmp_init(&s->mecc, avctx);
 847     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 848     ff_pixblockdsp_init(&s->pdsp, avctx);
 849     ff_qpeldsp_init(&s->qdsp);
 850
 851     s->avctx->coded_frame = s->current_picture.f;
 852
 853     if (s->msmpeg4_version) {
 854         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 855                           2 * 2 * (MAX_LEVEL + 1) *
 856                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 857     }
 858     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 859
 860     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 865     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 866     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 867                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 868     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 869                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 870
 871     if (s->avctx->noise_reduction) {
 872         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 873                           2 * 64 * sizeof(uint16_t), fail);
 874     }
 875
 876     ff_dct_encode_init(s);
 877
 878     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 879         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 880
 881     s->quant_precision = 5;
 882
 883     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
 884     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
 885
 886     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 887         ff_h261_encode_init(s);
 888     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 889         ff_h263_encode_init(s);
 890     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 891         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
 892             return ret;
 893     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 894         && s->out_format == FMT_MPEG1)
 895         ff_mpeg1_encode_init(s);
 896
 897     /* init q matrix */
 898     for (i = 0; i < 64; i++) {
 899         int j = s->idsp.idct_permutation[i];
 900         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 901             s->mpeg_quant) {
 902             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 903             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 904         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 905             s->intra_matrix[j] =
 906             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 907         } else {
 908             /* mpeg1/2 */
 909             s->chroma_intra_matrix[j] =
 910             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 911             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 912         }
 913         if (s->avctx->intra_matrix)
 914             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 915         if (s->avctx->inter_matrix)
 916             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 917     }
 918
 919     /* precompute matrix */
 920     /* for mjpeg, we do include qscale in the matrix */
 921     if (s->out_format != FMT_MJPEG) {
 922         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
 923                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 924                           31, 1);
 925         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
 926                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 927                           31, 0);
 928     }
 929
 930     if (ff_rate_control_init(s) < 0)
 931         return -1;
 932
 933 #if FF_API_ERROR_RATE
 934     FF_DISABLE_DEPRECATION_WARNINGS
 935     if (avctx->error_rate)
 936         s->error_rate = avctx->error_rate;
 937     FF_ENABLE_DEPRECATION_WARNINGS;
 938 #endif
 939
 940 #if FF_API_NORMALIZE_AQP
 941     FF_DISABLE_DEPRECATION_WARNINGS
 942     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 943         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 944     FF_ENABLE_DEPRECATION_WARNINGS;
 945 #endif
 946
 947 #if FF_API_MV0
 948     FF_DISABLE_DEPRECATION_WARNINGS
 949     if (avctx->flags & CODEC_FLAG_MV0)
 950         s->mpv_flags |= FF_MPV_FLAG_MV0;
 951     FF_ENABLE_DEPRECATION_WARNINGS
 952 #endif
 953
 954 #if FF_API_MPV_OPT
 955     FF_DISABLE_DEPRECATION_WARNINGS
 956     if (avctx->rc_qsquish != 0.0)
 957         s->rc_qsquish = avctx->rc_qsquish;
 958     if (avctx->rc_qmod_amp != 0.0)
 959         s->rc_qmod_amp = avctx->rc_qmod_amp;
 960     if (avctx->rc_qmod_freq)
 961         s->rc_qmod_freq = avctx->rc_qmod_freq;
 962     if (avctx->rc_buffer_aggressivity != 1.0)
 963         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
 964     if (avctx->rc_initial_cplx != 0.0)
 965         s->rc_initial_cplx = avctx->rc_initial_cplx;
 966     if (avctx->lmin)
 967         s->lmin = avctx->lmin;
 968     if (avctx->lmax)
 969         s->lmax = avctx->lmax;
 970
 971     if (avctx->rc_eq) {
 972         av_freep(&s->rc_eq);
 973         s->rc_eq = av_strdup(avctx->rc_eq);
 974         if (!s->rc_eq)
 975             return AVERROR(ENOMEM);
 976     }
 977     FF_ENABLE_DEPRECATION_WARNINGS
 978 #endif
 979
 980     if (avctx->b_frame_strategy == 2) {
 981         for (i = 0; i < s->max_b_frames + 2; i++) {
 982             s->tmp_frames[i] = av_frame_alloc();
 983             if (!s->tmp_frames[i])
 984                 return AVERROR(ENOMEM);
 985
 986             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 987             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 988             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 989
 990             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 991             if (ret < 0)
 992                 return ret;
 993         }
 994     }
 995
 996     return 0;
 997 fail:
 998     ff_mpv_encode_end(avctx);
 999     return AVERROR_UNKNOWN;
1000 }
1001
1002 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1003 {
1004     MpegEncContext *s = avctx->priv_data;
1005     int i;
1006
1007     ff_rate_control_uninit(s);
1008
1009     ff_mpv_common_end(s);
1010     if (CONFIG_MJPEG_ENCODER &&
1011         s->out_format == FMT_MJPEG)
1012         ff_mjpeg_encode_close(s);
1013
1014     av_freep(&avctx->extradata);
1015
1016     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1017         av_frame_free(&s->tmp_frames[i]);
1018
1019     ff_free_picture_tables(&s->new_picture);
1020     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1021
1022     av_freep(&s->avctx->stats_out);
1023     av_freep(&s->ac_stats);
1024
1025     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1026     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1027     s->q_chroma_intra_matrix=   NULL;
1028     s->q_chroma_intra_matrix16= NULL;
1029     av_freep(&s->q_intra_matrix);
1030     av_freep(&s->q_inter_matrix);
1031     av_freep(&s->q_intra_matrix16);
1032     av_freep(&s->q_inter_matrix16);
1033     av_freep(&s->input_picture);
1034     av_freep(&s->reordered_input_picture);
1035     av_freep(&s->dct_offset);
1036
1037     return 0;
1038 }
1039
1040 static int get_sae(uint8_t *src, int ref, int stride)
1041 {
1042     int x,y;
1043     int acc = 0;
1044
1045     for (y = 0; y < 16; y++) {
1046         for (x = 0; x < 16; x++) {
1047             acc += FFABS(src[x + y * stride] - ref);
1048         }
1049     }
1050
1051     return acc;
1052 }
1053
1054 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1055                            uint8_t *ref, int stride)
1056 {
1057     int x, y, w, h;
1058     int acc = 0;
1059
1060     w = s->width  & ~15;
1061     h = s->height & ~15;
1062
1063     for (y = 0; y < h; y += 16) {
1064         for (x = 0; x < w; x += 16) {
1065             int offset = x + y * stride;
1066             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1067                                       stride, 16);
1068             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1069             int sae  = get_sae(src + offset, mean, stride);
1070
1071             acc += sae + 500 < sad;
1072         }
1073     }
1074     return acc;
1075 }
1076
1077
1078 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1079 {
1080     Picture *pic = NULL;
1081     int64_t pts;
1082     int i, display_picture_number = 0, ret;
1083     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1084                                                  (s->low_delay ? 0 : 1);
1085     int direct = 1;
1086
1087     if (pic_arg) {
1088         pts = pic_arg->pts;
1089         display_picture_number = s->input_picture_number++;
1090
1091         if (pts != AV_NOPTS_VALUE) {
1092             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1093                 int64_t last = s->user_specified_pts;
1094
1095                 if (pts <= last) {
1096                     av_log(s->avctx, AV_LOG_ERROR,
1097                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1098                            pts, last);
1099                     return AVERROR(EINVAL);
1100                 }
1101
1102                 if (!s->low_delay && display_picture_number == 1)
1103                     s->dts_delta = pts - last;
1104             }
1105             s->user_specified_pts = pts;
1106         } else {
1107             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1108                 s->user_specified_pts =
1109                 pts = s->user_specified_pts + 1;
1110                 av_log(s->avctx, AV_LOG_INFO,
1111                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1112                        pts);
1113             } else {
1114                 pts = display_picture_number;
1115             }
1116         }
1117     }
1118
1119     if (pic_arg) {
1120         if (!pic_arg->buf[0] ||
1121             pic_arg->linesize[0] != s->linesize ||
1122             pic_arg->linesize[1] != s->uvlinesize ||
1123             pic_arg->linesize[2] != s->uvlinesize)
1124             direct = 0;
1125         if ((s->width & 15) || (s->height & 15))
1126             direct = 0;
1127         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1128             direct = 0;
1129         if (s->linesize & (STRIDE_ALIGN-1))
1130             direct = 0;
1131
1132         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1133                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1134
1135         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1136         if (i < 0)
1137             return i;
1138
1139         pic = &s->picture[i];
1140         pic->reference = 3;
1141
1142         if (direct) {
1143             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1144                 return ret;
1145         }
1146         ret = ff_alloc_picture(s, pic, direct);
1147         if (ret < 0)
1148             return ret;
1149
1150         if (!direct) {
1151             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1152                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1153                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1154                 // empty
1155             } else {
1156                 int h_chroma_shift, v_chroma_shift;
1157                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1158                                                  &h_chroma_shift,
1159                                                  &v_chroma_shift);
1160
1161                 for (i = 0; i < 3; i++) {
1162                     int src_stride = pic_arg->linesize[i];
1163                     int dst_stride = i ? s->uvlinesize : s->linesize;
1164                     int h_shift = i ? h_chroma_shift : 0;
1165                     int v_shift = i ? v_chroma_shift : 0;
1166                     int w = s->width  >> h_shift;
1167                     int h = s->height >> v_shift;
1168                     uint8_t *src = pic_arg->data[i];
1169                     uint8_t *dst = pic->f->data[i];
1170                     int vpad = 16;
1171
1172                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1173                         && !s->progressive_sequence
1174                         && FFALIGN(s->height, 32) - s->height > 16)
1175                         vpad = 32;
1176
1177                     if (!s->avctx->rc_buffer_size)
1178                         dst += INPLACE_OFFSET;
1179
1180                     if (src_stride == dst_stride)
1181                         memcpy(dst, src, src_stride * h);
1182                     else {
1183                         int h2 = h;
1184                         uint8_t *dst2 = dst;
1185                         while (h2--) {
1186                             memcpy(dst2, src, w);
1187                             dst2 += dst_stride;
1188                             src += src_stride;
1189                         }
1190                     }
1191                     if ((s->width & 15) || (s->height & (vpad-1))) {
1192                         s->mpvencdsp.draw_edges(dst, dst_stride,
1193                                                 w, h,
1194                                                 16 >> h_shift,
1195                                                 vpad >> v_shift,
1196                                                 EDGE_BOTTOM);
1197                     }
1198                 }
1199             }
1200         }
1201         ret = av_frame_copy_props(pic->f, pic_arg);
1202         if (ret < 0)
1203             return ret;
1204
1205         pic->f->display_picture_number = display_picture_number;
1206         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1207     }
1208
1209     /* shift buffer entries */
1210     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1211         s->input_picture[i - 1] = s->input_picture[i];
1212
1213     s->input_picture[encoding_delay] = (Picture*) pic;
1214
1215     return 0;
1216 }
1217
1218 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1219 {
1220     int x, y, plane;
1221     int score = 0;
1222     int64_t score64 = 0;
1223
1224     for (plane = 0; plane < 3; plane++) {
1225         const int stride = p->f->linesize[plane];
1226         const int bw = plane ? 1 : 2;
1227         for (y = 0; y < s->mb_height * bw; y++) {
1228             for (x = 0; x < s->mb_width * bw; x++) {
1229                 int off = p->shared ? 0 : 16;
1230                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1231                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1232                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1233
1234                 switch (FFABS(s->avctx->frame_skip_exp)) {
1235                 case 0: score    =  FFMAX(score, v);          break;
1236                 case 1: score   += FFABS(v);                  break;
1237                 case 2: score64 += v * (int64_t)v;                       break;
1238                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1239                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1240                 }
1241             }
1242         }
1243     }
1244     emms_c();
1245
1246     if (score)
1247         score64 = score;
1248     if (s->avctx->frame_skip_exp < 0)
1249         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1250                       -1.0/s->avctx->frame_skip_exp);
1251
1252     if (score64 < s->avctx->frame_skip_threshold)
1253         return 1;
1254     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1255         return 1;
1256     return 0;
1257 }
1258
1259 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1260 {
1261     AVPacket pkt = { 0 };
1262     int ret, got_output;
1263
1264     av_init_packet(&pkt);
1265     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1266     if (ret < 0)
1267         return ret;
1268
1269     ret = pkt.size;
1270     av_free_packet(&pkt);
1271     return ret;
1272 }
1273
1274 static int estimate_best_b_count(MpegEncContext *s)
1275 {
1276     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1277     AVCodecContext *c = avcodec_alloc_context3(NULL);
1278     const int scale = s->avctx->brd_scale;
1279     int i, j, out_size, p_lambda, b_lambda, lambda2;
1280     int64_t best_rd  = INT64_MAX;
1281     int best_b_count = -1;
1282
1283     if (!c)
1284         return AVERROR(ENOMEM);
1285     av_assert0(scale >= 0 && scale <= 3);
1286
1287     //emms_c();
1288     //s->next_picture_ptr->quality;
1289     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1290     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1291     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1292     if (!b_lambda) // FIXME we should do this somewhere else
1293         b_lambda = p_lambda;
1294     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1295                FF_LAMBDA_SHIFT;
1296
1297     c->width        = s->width  >> scale;
1298     c->height       = s->height >> scale;
1299     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1300     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1301     c->mb_decision  = s->avctx->mb_decision;
1302     c->me_cmp       = s->avctx->me_cmp;
1303     c->mb_cmp       = s->avctx->mb_cmp;
1304     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1305     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1306     c->time_base    = s->avctx->time_base;
1307     c->max_b_frames = s->max_b_frames;
1308
1309     if (avcodec_open2(c, codec, NULL) < 0)
1310         return -1;
1311
1312     for (i = 0; i < s->max_b_frames + 2; i++) {
1313         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1314                                                 s->next_picture_ptr;
1315         uint8_t *data[4];
1316
1317         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1318             pre_input = *pre_input_ptr;
1319             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1320
1321             if (!pre_input.shared && i) {
1322                 data[0] += INPLACE_OFFSET;
1323                 data[1] += INPLACE_OFFSET;
1324                 data[2] += INPLACE_OFFSET;
1325             }
1326
1327             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1328                                        s->tmp_frames[i]->linesize[0],
1329                                        data[0],
1330                                        pre_input.f->linesize[0],
1331                                        c->width, c->height);
1332             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1333                                        s->tmp_frames[i]->linesize[1],
1334                                        data[1],
1335                                        pre_input.f->linesize[1],
1336                                        c->width >> 1, c->height >> 1);
1337             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1338                                        s->tmp_frames[i]->linesize[2],
1339                                        data[2],
1340                                        pre_input.f->linesize[2],
1341                                        c->width >> 1, c->height >> 1);
1342         }
1343     }
1344
1345     for (j = 0; j < s->max_b_frames + 1; j++) {
1346         int64_t rd = 0;
1347
1348         if (!s->input_picture[j])
1349             break;
1350
1351         c->error[0] = c->error[1] = c->error[2] = 0;
1352
1353         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1354         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1355
1356         out_size = encode_frame(c, s->tmp_frames[0]);
1357
1358         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1359
1360         for (i = 0; i < s->max_b_frames + 1; i++) {
1361             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1362
1363             s->tmp_frames[i + 1]->pict_type = is_p ?
1364                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1365             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1366
1367             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1368
1369             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1370         }
1371
1372         /* get the delayed frames */
1373         while (out_size) {
1374             out_size = encode_frame(c, NULL);
1375             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1376         }
1377
1378         rd += c->error[0] + c->error[1] + c->error[2];
1379
1380         if (rd < best_rd) {
1381             best_rd = rd;
1382             best_b_count = j;
1383         }
1384     }
1385
1386     avcodec_close(c);
1387     av_freep(&c);
1388
1389     return best_b_count;
1390 }
1391
1392 static int select_input_picture(MpegEncContext *s)
1393 {
1394     int i, ret;
1395
1396     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1397         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1398     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1399
1400     /* set next picture type & ordering */
1401     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1402         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1403             if (s->picture_in_gop_number < s->gop_size &&
1404                 s->next_picture_ptr &&
1405                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1406                 // FIXME check that te gop check above is +-1 correct
1407                 av_frame_unref(s->input_picture[0]->f);
1408
1409                 ff_vbv_update(s, 0);
1410
1411                 goto no_output_pic;
1412             }
1413         }
1414
1415         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1416             !s->next_picture_ptr || s->intra_only) {
1417             s->reordered_input_picture[0] = s->input_picture[0];
1418             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1419             s->reordered_input_picture[0]->f->coded_picture_number =
1420                 s->coded_picture_number++;
1421         } else {
1422             int b_frames;
1423
1424             if (s->avctx->flags & CODEC_FLAG_PASS2) {
1425                 for (i = 0; i < s->max_b_frames + 1; i++) {
1426                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1427
1428                     if (pict_num >= s->rc_context.num_entries)
1429                         break;
1430                     if (!s->input_picture[i]) {
1431                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1432                         break;
1433                     }
1434
1435                     s->input_picture[i]->f->pict_type =
1436                         s->rc_context.entry[pict_num].new_pict_type;
1437                 }
1438             }
1439
1440             if (s->avctx->b_frame_strategy == 0) {
1441                 b_frames = s->max_b_frames;
1442                 while (b_frames && !s->input_picture[b_frames])
1443                     b_frames--;
1444             } else if (s->avctx->b_frame_strategy == 1) {
1445                 for (i = 1; i < s->max_b_frames + 1; i++) {
1446                     if (s->input_picture[i] &&
1447                         s->input_picture[i]->b_frame_score == 0) {
1448                         s->input_picture[i]->b_frame_score =
1449                             get_intra_count(s,
1450                                             s->input_picture[i    ]->f->data[0],
1451                                             s->input_picture[i - 1]->f->data[0],
1452                                             s->linesize) + 1;
1453                     }
1454                 }
1455                 for (i = 0; i < s->max_b_frames + 1; i++) {
1456                     if (!s->input_picture[i] ||
1457                         s->input_picture[i]->b_frame_score - 1 >
1458                             s->mb_num / s->avctx->b_sensitivity)
1459                         break;
1460                 }
1461
1462                 b_frames = FFMAX(0, i - 1);
1463
1464                 /* reset scores */
1465                 for (i = 0; i < b_frames + 1; i++) {
1466                     s->input_picture[i]->b_frame_score = 0;
1467                 }
1468             } else if (s->avctx->b_frame_strategy == 2) {
1469                 b_frames = estimate_best_b_count(s);
1470             } else {
1471                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1472                 b_frames = 0;
1473             }
1474
1475             emms_c();
1476
1477             for (i = b_frames - 1; i >= 0; i--) {
1478                 int type = s->input_picture[i]->f->pict_type;
1479                 if (type && type != AV_PICTURE_TYPE_B)
1480                     b_frames = i;
1481             }
1482             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1483                 b_frames == s->max_b_frames) {
1484                 av_log(s->avctx, AV_LOG_ERROR,
1485                        "warning, too many b frames in a row\n");
1486             }
1487
1488             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1489                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1490                     s->gop_size > s->picture_in_gop_number) {
1491                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1492                 } else {
1493                     if (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)
1494                         b_frames = 0;
1495                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1496                 }
1497             }
1498
1499             if ((s->avctx->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1500                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1501                 b_frames--;
1502
1503             s->reordered_input_picture[0] = s->input_picture[b_frames];
1504             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1505                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1506             s->reordered_input_picture[0]->f->coded_picture_number =
1507                 s->coded_picture_number++;
1508             for (i = 0; i < b_frames; i++) {
1509                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1510                 s->reordered_input_picture[i + 1]->f->pict_type =
1511                     AV_PICTURE_TYPE_B;
1512                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1513                     s->coded_picture_number++;
1514             }
1515         }
1516     }
1517 no_output_pic:
1518     if (s->reordered_input_picture[0]) {
1519         s->reordered_input_picture[0]->reference =
1520            s->reordered_input_picture[0]->f->pict_type !=
1521                AV_PICTURE_TYPE_B ? 3 : 0;
1522
1523         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1524         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1525             return ret;
1526
1527         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1528             // input is a shared pix, so we can't modifiy it -> alloc a new
1529             // one & ensure that the shared one is reuseable
1530
1531             Picture *pic;
1532             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1533             if (i < 0)
1534                 return i;
1535             pic = &s->picture[i];
1536
1537             pic->reference = s->reordered_input_picture[0]->reference;
1538             if (ff_alloc_picture(s, pic, 0) < 0) {
1539                 return -1;
1540             }
1541
1542             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1543             if (ret < 0)
1544                 return ret;
1545
1546             /* mark us unused / free shared pic */
1547             av_frame_unref(s->reordered_input_picture[0]->f);
1548             s->reordered_input_picture[0]->shared = 0;
1549
1550             s->current_picture_ptr = pic;
1551         } else {
1552             // input is not a shared pix -> reuse buffer for current_pix
1553             s->current_picture_ptr = s->reordered_input_picture[0];
1554             for (i = 0; i < 4; i++) {
1555                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1556             }
1557         }
1558         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1559         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1560                                        s->current_picture_ptr)) < 0)
1561             return ret;
1562
1563         s->picture_number = s->new_picture.f->display_picture_number;
1564     } else {
1565         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1566     }
1567     return 0;
1568 }
1569
1570 static void frame_end(MpegEncContext *s)
1571 {
1572     if (s->unrestricted_mv &&
1573         s->current_picture.reference &&
1574         !s->intra_only) {
1575         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1576         int hshift = desc->log2_chroma_w;
1577         int vshift = desc->log2_chroma_h;
1578         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1579                                 s->current_picture.f->linesize[0],
1580                                 s->h_edge_pos, s->v_edge_pos,
1581                                 EDGE_WIDTH, EDGE_WIDTH,
1582                                 EDGE_TOP | EDGE_BOTTOM);
1583         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1584                                 s->current_picture.f->linesize[1],
1585                                 s->h_edge_pos >> hshift,
1586                                 s->v_edge_pos >> vshift,
1587                                 EDGE_WIDTH >> hshift,
1588                                 EDGE_WIDTH >> vshift,
1589                                 EDGE_TOP | EDGE_BOTTOM);
1590         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1591                                 s->current_picture.f->linesize[2],
1592                                 s->h_edge_pos >> hshift,
1593                                 s->v_edge_pos >> vshift,
1594                                 EDGE_WIDTH >> hshift,
1595                                 EDGE_WIDTH >> vshift,
1596                                 EDGE_TOP | EDGE_BOTTOM);
1597     }
1598
1599     emms_c();
1600
1601     s->last_pict_type                 = s->pict_type;
1602     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1603     if (s->pict_type!= AV_PICTURE_TYPE_B)
1604         s->last_non_b_pict_type = s->pict_type;
1605
1606     s->avctx->coded_frame = s->current_picture_ptr->f;
1607
1608 }
1609
1610 static void update_noise_reduction(MpegEncContext *s)
1611 {
1612     int intra, i;
1613
1614     for (intra = 0; intra < 2; intra++) {
1615         if (s->dct_count[intra] > (1 << 16)) {
1616             for (i = 0; i < 64; i++) {
1617                 s->dct_error_sum[intra][i] >>= 1;
1618             }
1619             s->dct_count[intra] >>= 1;
1620         }
1621
1622         for (i = 0; i < 64; i++) {
1623             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1624                                        s->dct_count[intra] +
1625                                        s->dct_error_sum[intra][i] / 2) /
1626                                       (s->dct_error_sum[intra][i] + 1);
1627         }
1628     }
1629 }
1630
1631 static int frame_start(MpegEncContext *s)
1632 {
1633     int ret;
1634
1635     /* mark & release old frames */
1636     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1637         s->last_picture_ptr != s->next_picture_ptr &&
1638         s->last_picture_ptr->f->buf[0]) {
1639         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1640     }
1641
1642     s->current_picture_ptr->f->pict_type = s->pict_type;
1643     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1644
1645     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1646     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1647                                    s->current_picture_ptr)) < 0)
1648         return ret;
1649
1650     if (s->pict_type != AV_PICTURE_TYPE_B) {
1651         s->last_picture_ptr = s->next_picture_ptr;
1652         if (!s->droppable)
1653             s->next_picture_ptr = s->current_picture_ptr;
1654     }
1655
1656     if (s->last_picture_ptr) {
1657         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1658         if (s->last_picture_ptr->f->buf[0] &&
1659             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1660                                        s->last_picture_ptr)) < 0)
1661             return ret;
1662     }
1663     if (s->next_picture_ptr) {
1664         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1665         if (s->next_picture_ptr->f->buf[0] &&
1666             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1667                                        s->next_picture_ptr)) < 0)
1668             return ret;
1669     }
1670
1671     if (s->picture_structure!= PICT_FRAME) {
1672         int i;
1673         for (i = 0; i < 4; i++) {
1674             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1675                 s->current_picture.f->data[i] +=
1676                     s->current_picture.f->linesize[i];
1677             }
1678             s->current_picture.f->linesize[i] *= 2;
1679             s->last_picture.f->linesize[i]    *= 2;
1680             s->next_picture.f->linesize[i]    *= 2;
1681         }
1682     }
1683
1684     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1685         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1686         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1687     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1688         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1689         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1690     } else {
1691         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1692         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1693     }
1694
1695     if (s->dct_error_sum) {
1696         av_assert2(s->avctx->noise_reduction && s->encoding);
1697         update_noise_reduction(s);
1698     }
1699
1700     return 0;
1701 }
1702
1703 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1704                           const AVFrame *pic_arg, int *got_packet)
1705 {
1706     MpegEncContext *s = avctx->priv_data;
1707     int i, stuffing_count, ret;
1708     int context_count = s->slice_context_count;
1709
1710     s->picture_in_gop_number++;
1711
1712     if (load_input_picture(s, pic_arg) < 0)
1713         return -1;
1714
1715     if (select_input_picture(s) < 0) {
1716         return -1;
1717     }
1718
1719     /* output? */
1720     if (s->new_picture.f->data[0]) {
1721         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1722         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1723                                               :
1724                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1725         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1726             return ret;
1727         if (s->mb_info) {
1728             s->mb_info_ptr = av_packet_new_side_data(pkt,
1729                                  AV_PKT_DATA_H263_MB_INFO,
1730                                  s->mb_width*s->mb_height*12);
1731             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1732         }
1733
1734         for (i = 0; i < context_count; i++) {
1735             int start_y = s->thread_context[i]->start_mb_y;
1736             int   end_y = s->thread_context[i]->  end_mb_y;
1737             int h       = s->mb_height;
1738             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1739             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1740
1741             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1742         }
1743
1744         s->pict_type = s->new_picture.f->pict_type;
1745         //emms_c();
1746         ret = frame_start(s);
1747         if (ret < 0)
1748             return ret;
1749 vbv_retry:
1750         ret = encode_picture(s, s->picture_number);
1751         if (growing_buffer) {
1752             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1753             pkt->data = s->pb.buf;
1754             pkt->size = avctx->internal->byte_buffer_size;
1755         }
1756         if (ret < 0)
1757             return -1;
1758
1759         avctx->header_bits = s->header_bits;
1760         avctx->mv_bits     = s->mv_bits;
1761         avctx->misc_bits   = s->misc_bits;
1762         avctx->i_tex_bits  = s->i_tex_bits;
1763         avctx->p_tex_bits  = s->p_tex_bits;
1764         avctx->i_count     = s->i_count;
1765         // FIXME f/b_count in avctx
1766         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1767         avctx->skip_count  = s->skip_count;
1768
1769         frame_end(s);
1770
1771         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1772             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1773
1774         if (avctx->rc_buffer_size) {
1775             RateControlContext *rcc = &s->rc_context;
1776             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1777
1778             if (put_bits_count(&s->pb) > max_size &&
1779                 s->lambda < s->lmax) {
1780                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1781                                        (s->qscale + 1) / s->qscale);
1782                 if (s->adaptive_quant) {
1783                     int i;
1784                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1785                         s->lambda_table[i] =
1786                             FFMAX(s->lambda_table[i] + 1,
1787                                   s->lambda_table[i] * (s->qscale + 1) /
1788                                   s->qscale);
1789                 }
1790                 s->mb_skipped = 0;        // done in frame_start()
1791                 // done in encode_picture() so we must undo it
1792                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1793                     if (s->flipflop_rounding          ||
1794                         s->codec_id == AV_CODEC_ID_H263P ||
1795                         s->codec_id == AV_CODEC_ID_MPEG4)
1796                         s->no_rounding ^= 1;
1797                 }
1798                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1799                     s->time_base       = s->last_time_base;
1800                     s->last_non_b_time = s->time - s->pp_time;
1801                 }
1802                 for (i = 0; i < context_count; i++) {
1803                     PutBitContext *pb = &s->thread_context[i]->pb;
1804                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1805                 }
1806                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1807                 goto vbv_retry;
1808             }
1809
1810             av_assert0(s->avctx->rc_max_rate);
1811         }
1812
1813         if (s->avctx->flags & CODEC_FLAG_PASS1)
1814             ff_write_pass1_stats(s);
1815
1816         for (i = 0; i < 4; i++) {
1817             s->current_picture_ptr->f->error[i] =
1818             s->current_picture.f->error[i] =
1819                 s->current_picture.error[i];
1820             avctx->error[i] += s->current_picture_ptr->f->error[i];
1821         }
1822
1823         if (s->avctx->flags & CODEC_FLAG_PASS1)
1824             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1825                    avctx->i_tex_bits + avctx->p_tex_bits ==
1826                        put_bits_count(&s->pb));
1827         flush_put_bits(&s->pb);
1828         s->frame_bits  = put_bits_count(&s->pb);
1829
1830         stuffing_count = ff_vbv_update(s, s->frame_bits);
1831         s->stuffing_bits = 8*stuffing_count;
1832         if (stuffing_count) {
1833             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1834                     stuffing_count + 50) {
1835                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1836                 return -1;
1837             }
1838
1839             switch (s->codec_id) {
1840             case AV_CODEC_ID_MPEG1VIDEO:
1841             case AV_CODEC_ID_MPEG2VIDEO:
1842                 while (stuffing_count--) {
1843                     put_bits(&s->pb, 8, 0);
1844                 }
1845             break;
1846             case AV_CODEC_ID_MPEG4:
1847                 put_bits(&s->pb, 16, 0);
1848                 put_bits(&s->pb, 16, 0x1C3);
1849                 stuffing_count -= 4;
1850                 while (stuffing_count--) {
1851                     put_bits(&s->pb, 8, 0xFF);
1852                 }
1853             break;
1854             default:
1855                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1856             }
1857             flush_put_bits(&s->pb);
1858             s->frame_bits  = put_bits_count(&s->pb);
1859         }
1860
1861         /* update mpeg1/2 vbv_delay for CBR */
1862         if (s->avctx->rc_max_rate                          &&
1863             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1864             s->out_format == FMT_MPEG1                     &&
1865             90000LL * (avctx->rc_buffer_size - 1) <=
1866                 s->avctx->rc_max_rate * 0xFFFFLL) {
1867             int vbv_delay, min_delay;
1868             double inbits  = s->avctx->rc_max_rate *
1869                              av_q2d(s->avctx->time_base);
1870             int    minbits = s->frame_bits - 8 *
1871                              (s->vbv_delay_ptr - s->pb.buf - 1);
1872             double bits    = s->rc_context.buffer_index + minbits - inbits;
1873
1874             if (bits < 0)
1875                 av_log(s->avctx, AV_LOG_ERROR,
1876                        "Internal error, negative bits\n");
1877
1878             assert(s->repeat_first_field == 0);
1879
1880             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1881             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1882                         s->avctx->rc_max_rate;
1883
1884             vbv_delay = FFMAX(vbv_delay, min_delay);
1885
1886             av_assert0(vbv_delay < 0xFFFF);
1887
1888             s->vbv_delay_ptr[0] &= 0xF8;
1889             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1890             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1891             s->vbv_delay_ptr[2] &= 0x07;
1892             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1893             avctx->vbv_delay     = vbv_delay * 300;
1894         }
1895         s->total_bits     += s->frame_bits;
1896         avctx->frame_bits  = s->frame_bits;
1897
1898         pkt->pts = s->current_picture.f->pts;
1899         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1900             if (!s->current_picture.f->coded_picture_number)
1901                 pkt->dts = pkt->pts - s->dts_delta;
1902             else
1903                 pkt->dts = s->reordered_pts;
1904             s->reordered_pts = pkt->pts;
1905         } else
1906             pkt->dts = pkt->pts;
1907         if (s->current_picture.f->key_frame)
1908             pkt->flags |= AV_PKT_FLAG_KEY;
1909         if (s->mb_info)
1910             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1911     } else {
1912         s->frame_bits = 0;
1913     }
1914
1915     /* release non-reference frames */
1916     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1917         if (!s->picture[i].reference)
1918             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1919     }
1920
1921     av_assert1((s->frame_bits & 7) == 0);
1922
1923     pkt->size = s->frame_bits / 8;
1924     *got_packet = !!pkt->size;
1925     return 0;
1926 }
1927
1928 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1929                                                 int n, int threshold)
1930 {
1931     static const char tab[64] = {
1932         3, 2, 2, 1, 1, 1, 1, 1,
1933         1, 1, 1, 1, 1, 1, 1, 1,
1934         1, 1, 1, 1, 1, 1, 1, 1,
1935         0, 0, 0, 0, 0, 0, 0, 0,
1936         0, 0, 0, 0, 0, 0, 0, 0,
1937         0, 0, 0, 0, 0, 0, 0, 0,
1938         0, 0, 0, 0, 0, 0, 0, 0,
1939         0, 0, 0, 0, 0, 0, 0, 0
1940     };
1941     int score = 0;
1942     int run = 0;
1943     int i;
1944     int16_t *block = s->block[n];
1945     const int last_index = s->block_last_index[n];
1946     int skip_dc;
1947
1948     if (threshold < 0) {
1949         skip_dc = 0;
1950         threshold = -threshold;
1951     } else
1952         skip_dc = 1;
1953
1954     /* Are all we could set to zero already zero? */
1955     if (last_index <= skip_dc - 1)
1956         return;
1957
1958     for (i = 0; i <= last_index; i++) {
1959         const int j = s->intra_scantable.permutated[i];
1960         const int level = FFABS(block[j]);
1961         if (level == 1) {
1962             if (skip_dc && i == 0)
1963                 continue;
1964             score += tab[run];
1965             run = 0;
1966         } else if (level > 1) {
1967             return;
1968         } else {
1969             run++;
1970         }
1971     }
1972     if (score >= threshold)
1973         return;
1974     for (i = skip_dc; i <= last_index; i++) {
1975         const int j = s->intra_scantable.permutated[i];
1976         block[j] = 0;
1977     }
1978     if (block[0])
1979         s->block_last_index[n] = 0;
1980     else
1981         s->block_last_index[n] = -1;
1982 }
1983
1984 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1985                                int last_index)
1986 {
1987     int i;
1988     const int maxlevel = s->max_qcoeff;
1989     const int minlevel = s->min_qcoeff;
1990     int overflow = 0;
1991
1992     if (s->mb_intra) {
1993         i = 1; // skip clipping of intra dc
1994     } else
1995         i = 0;
1996
1997     for (; i <= last_index; i++) {
1998         const int j = s->intra_scantable.permutated[i];
1999         int level = block[j];
2000
2001         if (level > maxlevel) {
2002             level = maxlevel;
2003             overflow++;
2004         } else if (level < minlevel) {
2005             level = minlevel;
2006             overflow++;
2007         }
2008
2009         block[j] = level;
2010     }
2011
2012     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2013         av_log(s->avctx, AV_LOG_INFO,
2014                "warning, clipping %d dct coefficients to %d..%d\n",
2015                overflow, minlevel, maxlevel);
2016 }
2017
2018 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2019 {
2020     int x, y;
2021     // FIXME optimize
2022     for (y = 0; y < 8; y++) {
2023         for (x = 0; x < 8; x++) {
2024             int x2, y2;
2025             int sum = 0;
2026             int sqr = 0;
2027             int count = 0;
2028
2029             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2030                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2031                     int v = ptr[x2 + y2 * stride];
2032                     sum += v;
2033                     sqr += v * v;
2034                     count++;
2035                 }
2036             }
2037             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2038         }
2039     }
2040 }
2041
2042 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2043                                                 int motion_x, int motion_y,
2044                                                 int mb_block_height,
2045                                                 int mb_block_width,
2046                                                 int mb_block_count)
2047 {
2048     int16_t weight[12][64];
2049     int16_t orig[12][64];
2050     const int mb_x = s->mb_x;
2051     const int mb_y = s->mb_y;
2052     int i;
2053     int skip_dct[12];
2054     int dct_offset = s->linesize * 8; // default for progressive frames
2055     int uv_dct_offset = s->uvlinesize * 8;
2056     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2057     ptrdiff_t wrap_y, wrap_c;
2058
2059     for (i = 0; i < mb_block_count; i++)
2060         skip_dct[i] = s->skipdct;
2061
2062     if (s->adaptive_quant) {
2063         const int last_qp = s->qscale;
2064         const int mb_xy = mb_x + mb_y * s->mb_stride;
2065
2066         s->lambda = s->lambda_table[mb_xy];
2067         update_qscale(s);
2068
2069         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2070             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2071             s->dquant = s->qscale - last_qp;
2072
2073             if (s->out_format == FMT_H263) {
2074                 s->dquant = av_clip(s->dquant, -2, 2);
2075
2076                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2077                     if (!s->mb_intra) {
2078                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2079                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2080                                 s->dquant = 0;
2081                         }
2082                         if (s->mv_type == MV_TYPE_8X8)
2083                             s->dquant = 0;
2084                     }
2085                 }
2086             }
2087         }
2088         ff_set_qscale(s, last_qp + s->dquant);
2089     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2090         ff_set_qscale(s, s->qscale + s->dquant);
2091
2092     wrap_y = s->linesize;
2093     wrap_c = s->uvlinesize;
2094     ptr_y  = s->new_picture.f->data[0] +
2095              (mb_y * 16 * wrap_y)              + mb_x * 16;
2096     ptr_cb = s->new_picture.f->data[1] +
2097              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2098     ptr_cr = s->new_picture.f->data[2] +
2099              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2100
2101     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2102         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2103         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2104         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2105         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2106                                  wrap_y, wrap_y,
2107                                  16, 16, mb_x * 16, mb_y * 16,
2108                                  s->width, s->height);
2109         ptr_y = ebuf;
2110         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2111                                  wrap_c, wrap_c,
2112                                  mb_block_width, mb_block_height,
2113                                  mb_x * mb_block_width, mb_y * mb_block_height,
2114                                  cw, ch);
2115         ptr_cb = ebuf + 16 * wrap_y;
2116         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2117                                  wrap_c, wrap_c,
2118                                  mb_block_width, mb_block_height,
2119                                  mb_x * mb_block_width, mb_y * mb_block_height,
2120                                  cw, ch);
2121         ptr_cr = ebuf + 16 * wrap_y + 16;
2122     }
2123
2124     if (s->mb_intra) {
2125         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2126             int progressive_score, interlaced_score;
2127
2128             s->interlaced_dct = 0;
2129             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2130                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2131                                                      NULL, wrap_y, 8) - 400;
2132
2133             if (progressive_score > 0) {
2134                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2135                                                         NULL, wrap_y * 2, 8) +
2136                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2137                                                         NULL, wrap_y * 2, 8);
2138                 if (progressive_score > interlaced_score) {
2139                     s->interlaced_dct = 1;
2140
2141                     dct_offset = wrap_y;
2142                     uv_dct_offset = wrap_c;
2143                     wrap_y <<= 1;
2144                     if (s->chroma_format == CHROMA_422 ||
2145                         s->chroma_format == CHROMA_444)
2146                         wrap_c <<= 1;
2147                 }
2148             }
2149         }
2150
2151         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2152         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2153         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2154         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2155
2156         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2157             skip_dct[4] = 1;
2158             skip_dct[5] = 1;
2159         } else {
2160             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2161             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2162             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2163                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2164                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2165             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2166                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2167                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2168                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2169                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2170                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2171                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2172             }
2173         }
2174     } else {
2175         op_pixels_func (*op_pix)[4];
2176         qpel_mc_func (*op_qpix)[16];
2177         uint8_t *dest_y, *dest_cb, *dest_cr;
2178
2179         dest_y  = s->dest[0];
2180         dest_cb = s->dest[1];
2181         dest_cr = s->dest[2];
2182
2183         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2184             op_pix  = s->hdsp.put_pixels_tab;
2185             op_qpix = s->qdsp.put_qpel_pixels_tab;
2186         } else {
2187             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2188             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2189         }
2190
2191         if (s->mv_dir & MV_DIR_FORWARD) {
2192             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2193                           s->last_picture.f->data,
2194                           op_pix, op_qpix);
2195             op_pix  = s->hdsp.avg_pixels_tab;
2196             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2197         }
2198         if (s->mv_dir & MV_DIR_BACKWARD) {
2199             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2200                           s->next_picture.f->data,
2201                           op_pix, op_qpix);
2202         }
2203
2204         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2205             int progressive_score, interlaced_score;
2206
2207             s->interlaced_dct = 0;
2208             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2209                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2210                                                      ptr_y + wrap_y * 8,
2211                                                      wrap_y, 8) - 400;
2212
2213             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2214                 progressive_score -= 400;
2215
2216             if (progressive_score > 0) {
2217                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2218                                                         wrap_y * 2, 8) +
2219                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2220                                                         ptr_y + wrap_y,
2221                                                         wrap_y * 2, 8);
2222
2223                 if (progressive_score > interlaced_score) {
2224                     s->interlaced_dct = 1;
2225
2226                     dct_offset = wrap_y;
2227                     uv_dct_offset = wrap_c;
2228                     wrap_y <<= 1;
2229                     if (s->chroma_format == CHROMA_422)
2230                         wrap_c <<= 1;
2231                 }
2232             }
2233         }
2234
2235         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2236         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2237         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2238                             dest_y + dct_offset, wrap_y);
2239         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2240                             dest_y + dct_offset + 8, wrap_y);
2241
2242         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2243             skip_dct[4] = 1;
2244             skip_dct[5] = 1;
2245         } else {
2246             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2247             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2248             if (!s->chroma_y_shift) { /* 422 */
2249                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2250                                     dest_cb + uv_dct_offset, wrap_c);
2251                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2252                                     dest_cr + uv_dct_offset, wrap_c);
2253             }
2254         }
2255         /* pre quantization */
2256         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2257                 2 * s->qscale * s->qscale) {
2258             // FIXME optimize
2259             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2260                 skip_dct[0] = 1;
2261             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2262                 skip_dct[1] = 1;
2263             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2264                                wrap_y, 8) < 20 * s->qscale)
2265                 skip_dct[2] = 1;
2266             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2267                                wrap_y, 8) < 20 * s->qscale)
2268                 skip_dct[3] = 1;
2269             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2270                 skip_dct[4] = 1;
2271             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2272                 skip_dct[5] = 1;
2273             if (!s->chroma_y_shift) { /* 422 */
2274                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2275                                    dest_cb + uv_dct_offset,
2276                                    wrap_c, 8) < 20 * s->qscale)
2277                     skip_dct[6] = 1;
2278                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2279                                    dest_cr + uv_dct_offset,
2280                                    wrap_c, 8) < 20 * s->qscale)
2281                     skip_dct[7] = 1;
2282             }
2283         }
2284     }
2285
2286     if (s->quantizer_noise_shaping) {
2287         if (!skip_dct[0])
2288             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2289         if (!skip_dct[1])
2290             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2291         if (!skip_dct[2])
2292             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2293         if (!skip_dct[3])
2294             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2295         if (!skip_dct[4])
2296             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2297         if (!skip_dct[5])
2298             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2299         if (!s->chroma_y_shift) { /* 422 */
2300             if (!skip_dct[6])
2301                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2302                                   wrap_c);
2303             if (!skip_dct[7])
2304                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2305                                   wrap_c);
2306         }
2307         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2308     }
2309
2310     /* DCT & quantize */
2311     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2312     {
2313         for (i = 0; i < mb_block_count; i++) {
2314             if (!skip_dct[i]) {
2315                 int overflow;
2316                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2317                 // FIXME we could decide to change to quantizer instead of
2318                 // clipping
2319                 // JS: I don't think that would be a good idea it could lower
2320                 //     quality instead of improve it. Just INTRADC clipping
2321                 //     deserves changes in quantizer
2322                 if (overflow)
2323                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2324             } else
2325                 s->block_last_index[i] = -1;
2326         }
2327         if (s->quantizer_noise_shaping) {
2328             for (i = 0; i < mb_block_count; i++) {
2329                 if (!skip_dct[i]) {
2330                     s->block_last_index[i] =
2331                         dct_quantize_refine(s, s->block[i], weight[i],
2332                                             orig[i], i, s->qscale);
2333                 }
2334             }
2335         }
2336
2337         if (s->luma_elim_threshold && !s->mb_intra)
2338             for (i = 0; i < 4; i++)
2339                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2340         if (s->chroma_elim_threshold && !s->mb_intra)
2341             for (i = 4; i < mb_block_count; i++)
2342                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2343
2344         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2345             for (i = 0; i < mb_block_count; i++) {
2346                 if (s->block_last_index[i] == -1)
2347                     s->coded_score[i] = INT_MAX / 256;
2348             }
2349         }
2350     }
2351
2352     if ((s->avctx->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2353         s->block_last_index[4] =
2354         s->block_last_index[5] = 0;
2355         s->block[4][0] =
2356         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2357         if (!s->chroma_y_shift) { /* 422 / 444 */
2358             for (i=6; i<12; i++) {
2359                 s->block_last_index[i] = 0;
2360                 s->block[i][0] = s->block[4][0];
2361             }
2362         }
2363     }
2364
2365     // non c quantize code returns incorrect block_last_index FIXME
2366     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2367         for (i = 0; i < mb_block_count; i++) {
2368             int j;
2369             if (s->block_last_index[i] > 0) {
2370                 for (j = 63; j > 0; j--) {
2371                     if (s->block[i][s->intra_scantable.permutated[j]])
2372                         break;
2373                 }
2374                 s->block_last_index[i] = j;
2375             }
2376         }
2377     }
2378
2379     /* huffman encode */
2380     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2381     case AV_CODEC_ID_MPEG1VIDEO:
2382     case AV_CODEC_ID_MPEG2VIDEO:
2383         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2384             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2385         break;
2386     case AV_CODEC_ID_MPEG4:
2387         if (CONFIG_MPEG4_ENCODER)
2388             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2389         break;
2390     case AV_CODEC_ID_MSMPEG4V2:
2391     case AV_CODEC_ID_MSMPEG4V3:
2392     case AV_CODEC_ID_WMV1:
2393         if (CONFIG_MSMPEG4_ENCODER)
2394             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2395         break;
2396     case AV_CODEC_ID_WMV2:
2397         if (CONFIG_WMV2_ENCODER)
2398             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2399         break;
2400     case AV_CODEC_ID_H261:
2401         if (CONFIG_H261_ENCODER)
2402             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2403         break;
2404     case AV_CODEC_ID_H263:
2405     case AV_CODEC_ID_H263P:
2406     case AV_CODEC_ID_FLV1:
2407     case AV_CODEC_ID_RV10:
2408     case AV_CODEC_ID_RV20:
2409         if (CONFIG_H263_ENCODER)
2410             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2411         break;
2412     case AV_CODEC_ID_MJPEG:
2413     case AV_CODEC_ID_AMV:
2414         if (CONFIG_MJPEG_ENCODER)
2415             ff_mjpeg_encode_mb(s, s->block);
2416         break;
2417     default:
2418         av_assert1(0);
2419     }
2420 }
2421
2422 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2423 {
2424     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2425     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2426     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2427 }
2428
2429 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2430     int i;
2431
2432     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2433
2434     /* mpeg1 */
2435     d->mb_skip_run= s->mb_skip_run;
2436     for(i=0; i<3; i++)
2437         d->last_dc[i] = s->last_dc[i];
2438
2439     /* statistics */
2440     d->mv_bits= s->mv_bits;
2441     d->i_tex_bits= s->i_tex_bits;
2442     d->p_tex_bits= s->p_tex_bits;
2443     d->i_count= s->i_count;
2444     d->f_count= s->f_count;
2445     d->b_count= s->b_count;
2446     d->skip_count= s->skip_count;
2447     d->misc_bits= s->misc_bits;
2448     d->last_bits= 0;
2449
2450     d->mb_skipped= 0;
2451     d->qscale= s->qscale;
2452     d->dquant= s->dquant;
2453
2454     d->esc3_level_length= s->esc3_level_length;
2455 }
2456
2457 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2458     int i;
2459
2460     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2461     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2462
2463     /* mpeg1 */
2464     d->mb_skip_run= s->mb_skip_run;
2465     for(i=0; i<3; i++)
2466         d->last_dc[i] = s->last_dc[i];
2467
2468     /* statistics */
2469     d->mv_bits= s->mv_bits;
2470     d->i_tex_bits= s->i_tex_bits;
2471     d->p_tex_bits= s->p_tex_bits;
2472     d->i_count= s->i_count;
2473     d->f_count= s->f_count;
2474     d->b_count= s->b_count;
2475     d->skip_count= s->skip_count;
2476     d->misc_bits= s->misc_bits;
2477
2478     d->mb_intra= s->mb_intra;
2479     d->mb_skipped= s->mb_skipped;
2480     d->mv_type= s->mv_type;
2481     d->mv_dir= s->mv_dir;
2482     d->pb= s->pb;
2483     if(s->data_partitioning){
2484         d->pb2= s->pb2;
2485         d->tex_pb= s->tex_pb;
2486     }
2487     d->block= s->block;
2488     for(i=0; i<8; i++)
2489         d->block_last_index[i]= s->block_last_index[i];
2490     d->interlaced_dct= s->interlaced_dct;
2491     d->qscale= s->qscale;
2492
2493     d->esc3_level_length= s->esc3_level_length;
2494 }
2495
2496 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2497                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2498                            int *dmin, int *next_block, int motion_x, int motion_y)
2499 {
2500     int score;
2501     uint8_t *dest_backup[3];
2502
2503     copy_context_before_encode(s, backup, type);
2504
2505     s->block= s->blocks[*next_block];
2506     s->pb= pb[*next_block];
2507     if(s->data_partitioning){
2508         s->pb2   = pb2   [*next_block];
2509         s->tex_pb= tex_pb[*next_block];
2510     }
2511
2512     if(*next_block){
2513         memcpy(dest_backup, s->dest, sizeof(s->dest));
2514         s->dest[0] = s->rd_scratchpad;
2515         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2516         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2517         av_assert0(s->linesize >= 32); //FIXME
2518     }
2519
2520     encode_mb(s, motion_x, motion_y);
2521
2522     score= put_bits_count(&s->pb);
2523     if(s->data_partitioning){
2524         score+= put_bits_count(&s->pb2);
2525         score+= put_bits_count(&s->tex_pb);
2526     }
2527
2528     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2529         ff_mpv_decode_mb(s, s->block);
2530
2531         score *= s->lambda2;
2532         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2533     }
2534
2535     if(*next_block){
2536         memcpy(s->dest, dest_backup, sizeof(s->dest));
2537     }
2538
2539     if(score<*dmin){
2540         *dmin= score;
2541         *next_block^=1;
2542
2543         copy_context_after_encode(best, s, type);
2544     }
2545 }
2546
2547 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2548     uint32_t *sq = ff_square_tab + 256;
2549     int acc=0;
2550     int x,y;
2551
2552     if(w==16 && h==16)
2553         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2554     else if(w==8 && h==8)
2555         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2556
2557     for(y=0; y<h; y++){
2558         for(x=0; x<w; x++){
2559             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2560         }
2561     }
2562
2563     av_assert2(acc>=0);
2564
2565     return acc;
2566 }
2567
2568 static int sse_mb(MpegEncContext *s){
2569     int w= 16;
2570     int h= 16;
2571
2572     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2573     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2574
2575     if(w==16 && h==16)
2576       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2577         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2578                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2579                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2580       }else{
2581         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2582                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2583                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2584       }
2585     else
2586         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2587                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2588                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2589 }
2590
2591 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2592     MpegEncContext *s= *(void**)arg;
2593
2594
2595     s->me.pre_pass=1;
2596     s->me.dia_size= s->avctx->pre_dia_size;
2597     s->first_slice_line=1;
2598     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2599         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2600             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2601         }
2602         s->first_slice_line=0;
2603     }
2604
2605     s->me.pre_pass=0;
2606
2607     return 0;
2608 }
2609
2610 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2611     MpegEncContext *s= *(void**)arg;
2612
2613     ff_check_alignment();
2614
2615     s->me.dia_size= s->avctx->dia_size;
2616     s->first_slice_line=1;
2617     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2618         s->mb_x=0; //for block init below
2619         ff_init_block_index(s);
2620         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2621             s->block_index[0]+=2;
2622             s->block_index[1]+=2;
2623             s->block_index[2]+=2;
2624             s->block_index[3]+=2;
2625
2626             /* compute motion vector & mb_type and store in context */
2627             if(s->pict_type==AV_PICTURE_TYPE_B)
2628                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2629             else
2630                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2631         }
2632         s->first_slice_line=0;
2633     }
2634     return 0;
2635 }
2636
2637 static int mb_var_thread(AVCodecContext *c, void *arg){
2638     MpegEncContext *s= *(void**)arg;
2639     int mb_x, mb_y;
2640
2641     ff_check_alignment();
2642
2643     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2644         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2645             int xx = mb_x * 16;
2646             int yy = mb_y * 16;
2647             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2648             int varc;
2649             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2650
2651             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2652                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2653
2654             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2655             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2656             s->me.mb_var_sum_temp    += varc;
2657         }
2658     }
2659     return 0;
2660 }
2661
2662 static void write_slice_end(MpegEncContext *s){
2663     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2664         if(s->partitioned_frame){
2665             ff_mpeg4_merge_partitions(s);
2666         }
2667
2668         ff_mpeg4_stuffing(&s->pb);
2669     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2670         ff_mjpeg_encode_stuffing(s);
2671     }
2672
2673     avpriv_align_put_bits(&s->pb);
2674     flush_put_bits(&s->pb);
2675
2676     if ((s->avctx->flags & CODEC_FLAG_PASS1) && !s->partitioned_frame)
2677         s->misc_bits+= get_bits_diff(s);
2678 }
2679
2680 static void write_mb_info(MpegEncContext *s)
2681 {
2682     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2683     int offset = put_bits_count(&s->pb);
2684     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2685     int gobn = s->mb_y / s->gob_index;
2686     int pred_x, pred_y;
2687     if (CONFIG_H263_ENCODER)
2688         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2689     bytestream_put_le32(&ptr, offset);
2690     bytestream_put_byte(&ptr, s->qscale);
2691     bytestream_put_byte(&ptr, gobn);
2692     bytestream_put_le16(&ptr, mba);
2693     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2694     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2695     /* 4MV not implemented */
2696     bytestream_put_byte(&ptr, 0); /* hmv2 */
2697     bytestream_put_byte(&ptr, 0); /* vmv2 */
2698 }
2699
2700 static void update_mb_info(MpegEncContext *s, int startcode)
2701 {
2702     if (!s->mb_info)
2703         return;
2704     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2705         s->mb_info_size += 12;
2706         s->prev_mb_info = s->last_mb_info;
2707     }
2708     if (startcode) {
2709         s->prev_mb_info = put_bits_count(&s->pb)/8;
2710         /* This might have incremented mb_info_size above, and we return without
2711          * actually writing any info into that slot yet. But in that case,
2712          * this will be called again at the start of the after writing the
2713          * start code, actually writing the mb info. */
2714         return;
2715     }
2716
2717     s->last_mb_info = put_bits_count(&s->pb)/8;
2718     if (!s->mb_info_size)
2719         s->mb_info_size += 12;
2720     write_mb_info(s);
2721 }
2722
2723 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2724 {
2725     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2726         && s->slice_context_count == 1
2727         && s->pb.buf == s->avctx->internal->byte_buffer) {
2728         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2729         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2730
2731         uint8_t *new_buffer = NULL;
2732         int new_buffer_size = 0;
2733
2734         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2735                               s->avctx->internal->byte_buffer_size + size_increase);
2736         if (!new_buffer)
2737             return AVERROR(ENOMEM);
2738
2739         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2740         av_free(s->avctx->internal->byte_buffer);
2741         s->avctx->internal->byte_buffer      = new_buffer;
2742         s->avctx->internal->byte_buffer_size = new_buffer_size;
2743         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2744         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2745         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2746     }
2747     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2748         return AVERROR(EINVAL);
2749     return 0;
2750 }
2751
2752 static int encode_thread(AVCodecContext *c, void *arg){
2753     MpegEncContext *s= *(void**)arg;
2754     int mb_x, mb_y, pdif = 0;
2755     int chr_h= 16>>s->chroma_y_shift;
2756     int i, j;
2757     MpegEncContext best_s = { 0 }, backup_s;
2758     uint8_t bit_buf[2][MAX_MB_BYTES];
2759     uint8_t bit_buf2[2][MAX_MB_BYTES];
2760     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2761     PutBitContext pb[2], pb2[2], tex_pb[2];
2762
2763     ff_check_alignment();
2764
2765     for(i=0; i<2; i++){
2766         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2767         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2768         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2769     }
2770
2771     s->last_bits= put_bits_count(&s->pb);
2772     s->mv_bits=0;
2773     s->misc_bits=0;
2774     s->i_tex_bits=0;
2775     s->p_tex_bits=0;
2776     s->i_count=0;
2777     s->f_count=0;
2778     s->b_count=0;
2779     s->skip_count=0;
2780
2781     for(i=0; i<3; i++){
2782         /* init last dc values */
2783         /* note: quant matrix value (8) is implied here */
2784         s->last_dc[i] = 128 << s->intra_dc_precision;
2785
2786         s->current_picture.error[i] = 0;
2787     }
2788     if(s->codec_id==AV_CODEC_ID_AMV){
2789         s->last_dc[0] = 128*8/13;
2790         s->last_dc[1] = 128*8/14;
2791         s->last_dc[2] = 128*8/14;
2792     }
2793     s->mb_skip_run = 0;
2794     memset(s->last_mv, 0, sizeof(s->last_mv));
2795
2796     s->last_mv_dir = 0;
2797
2798     switch(s->codec_id){
2799     case AV_CODEC_ID_H263:
2800     case AV_CODEC_ID_H263P:
2801     case AV_CODEC_ID_FLV1:
2802         if (CONFIG_H263_ENCODER)
2803             s->gob_index = H263_GOB_HEIGHT(s->height);
2804         break;
2805     case AV_CODEC_ID_MPEG4:
2806         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2807             ff_mpeg4_init_partitions(s);
2808         break;
2809     }
2810
2811     s->resync_mb_x=0;
2812     s->resync_mb_y=0;
2813     s->first_slice_line = 1;
2814     s->ptr_lastgob = s->pb.buf;
2815     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2816         s->mb_x=0;
2817         s->mb_y= mb_y;
2818
2819         ff_set_qscale(s, s->qscale);
2820         ff_init_block_index(s);
2821
2822         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2823             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2824             int mb_type= s->mb_type[xy];
2825 //            int d;
2826             int dmin= INT_MAX;
2827             int dir;
2828             int size_increase =  s->avctx->internal->byte_buffer_size/4
2829                                + s->mb_width*MAX_MB_BYTES;
2830
2831             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2832             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2833                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2834                 return -1;
2835             }
2836             if(s->data_partitioning){
2837                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2838                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2839                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2840                     return -1;
2841                 }
2842             }
2843
2844             s->mb_x = mb_x;
2845             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2846             ff_update_block_index(s);
2847
2848             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2849                 ff_h261_reorder_mb_index(s);
2850                 xy= s->mb_y*s->mb_stride + s->mb_x;
2851                 mb_type= s->mb_type[xy];
2852             }
2853
2854             /* write gob / video packet header  */
2855             if(s->rtp_mode){
2856                 int current_packet_size, is_gob_start;
2857
2858                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2859
2860                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2861
2862                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2863
2864                 switch(s->codec_id){
2865                 case AV_CODEC_ID_H263:
2866                 case AV_CODEC_ID_H263P:
2867                     if(!s->h263_slice_structured)
2868                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2869                     break;
2870                 case AV_CODEC_ID_MPEG2VIDEO:
2871                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2872                 case AV_CODEC_ID_MPEG1VIDEO:
2873                     if(s->mb_skip_run) is_gob_start=0;
2874                     break;
2875                 case AV_CODEC_ID_MJPEG:
2876                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2877                     break;
2878                 }
2879
2880                 if(is_gob_start){
2881                     if(s->start_mb_y != mb_y || mb_x!=0){
2882                         write_slice_end(s);
2883
2884                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2885                             ff_mpeg4_init_partitions(s);
2886                         }
2887                     }
2888
2889                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2890                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2891
2892                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2893                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2894                         int d = 100 / s->error_rate;
2895                         if(r % d == 0){
2896                             current_packet_size=0;
2897                             s->pb.buf_ptr= s->ptr_lastgob;
2898                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2899                         }
2900                     }
2901
2902                     if (s->avctx->rtp_callback){
2903                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2904                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2905                     }
2906                     update_mb_info(s, 1);
2907
2908                     switch(s->codec_id){
2909                     case AV_CODEC_ID_MPEG4:
2910                         if (CONFIG_MPEG4_ENCODER) {
2911                             ff_mpeg4_encode_video_packet_header(s);
2912                             ff_mpeg4_clean_buffers(s);
2913                         }
2914                     break;
2915                     case AV_CODEC_ID_MPEG1VIDEO:
2916                     case AV_CODEC_ID_MPEG2VIDEO:
2917                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2918                             ff_mpeg1_encode_slice_header(s);
2919                             ff_mpeg1_clean_buffers(s);
2920                         }
2921                     break;
2922                     case AV_CODEC_ID_H263:
2923                     case AV_CODEC_ID_H263P:
2924                         if (CONFIG_H263_ENCODER)
2925                             ff_h263_encode_gob_header(s, mb_y);
2926                     break;
2927                     }
2928
2929                     if (s->avctx->flags & CODEC_FLAG_PASS1) {
2930                         int bits= put_bits_count(&s->pb);
2931                         s->misc_bits+= bits - s->last_bits;
2932                         s->last_bits= bits;
2933                     }
2934
2935                     s->ptr_lastgob += current_packet_size;
2936                     s->first_slice_line=1;
2937                     s->resync_mb_x=mb_x;
2938                     s->resync_mb_y=mb_y;
2939                 }
2940             }
2941
2942             if(  (s->resync_mb_x   == s->mb_x)
2943                && s->resync_mb_y+1 == s->mb_y){
2944                 s->first_slice_line=0;
2945             }
2946
2947             s->mb_skipped=0;
2948             s->dquant=0; //only for QP_RD
2949
2950             update_mb_info(s, 0);
2951
2952             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2953                 int next_block=0;
2954                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2955
2956                 copy_context_before_encode(&backup_s, s, -1);
2957                 backup_s.pb= s->pb;
2958                 best_s.data_partitioning= s->data_partitioning;
2959                 best_s.partitioned_frame= s->partitioned_frame;
2960                 if(s->data_partitioning){
2961                     backup_s.pb2= s->pb2;
2962                     backup_s.tex_pb= s->tex_pb;
2963                 }
2964
2965                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2966                     s->mv_dir = MV_DIR_FORWARD;
2967                     s->mv_type = MV_TYPE_16X16;
2968                     s->mb_intra= 0;
2969                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2970                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2971                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2972                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2973                 }
2974                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2975                     s->mv_dir = MV_DIR_FORWARD;
2976                     s->mv_type = MV_TYPE_FIELD;
2977                     s->mb_intra= 0;
2978                     for(i=0; i<2; i++){
2979                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2980                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2981                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2982                     }
2983                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2984                                  &dmin, &next_block, 0, 0);
2985                 }
2986                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2987                     s->mv_dir = MV_DIR_FORWARD;
2988                     s->mv_type = MV_TYPE_16X16;
2989                     s->mb_intra= 0;
2990                     s->mv[0][0][0] = 0;
2991                     s->mv[0][0][1] = 0;
2992                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2993                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2994                 }
2995                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2996                     s->mv_dir = MV_DIR_FORWARD;
2997                     s->mv_type = MV_TYPE_8X8;
2998                     s->mb_intra= 0;
2999                     for(i=0; i<4; i++){
3000                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3001                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3002                     }
3003                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3004                                  &dmin, &next_block, 0, 0);
3005                 }
3006                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3007                     s->mv_dir = MV_DIR_FORWARD;
3008                     s->mv_type = MV_TYPE_16X16;
3009                     s->mb_intra= 0;
3010                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3011                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3012                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3013                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3014                 }
3015                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3016                     s->mv_dir = MV_DIR_BACKWARD;
3017                     s->mv_type = MV_TYPE_16X16;
3018                     s->mb_intra= 0;
3019                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3020                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3021                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3022                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3023                 }
3024                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3025                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3026                     s->mv_type = MV_TYPE_16X16;
3027                     s->mb_intra= 0;
3028                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3029                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3030                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3031                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3032                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3033                                  &dmin, &next_block, 0, 0);
3034                 }
3035                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3036                     s->mv_dir = MV_DIR_FORWARD;
3037                     s->mv_type = MV_TYPE_FIELD;
3038                     s->mb_intra= 0;
3039                     for(i=0; i<2; i++){
3040                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3041                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3042                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3043                     }
3044                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3045                                  &dmin, &next_block, 0, 0);
3046                 }
3047                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3048                     s->mv_dir = MV_DIR_BACKWARD;
3049                     s->mv_type = MV_TYPE_FIELD;
3050                     s->mb_intra= 0;
3051                     for(i=0; i<2; i++){
3052                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3053                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3054                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3055                     }
3056                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3057                                  &dmin, &next_block, 0, 0);
3058                 }
3059                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3060                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3061                     s->mv_type = MV_TYPE_FIELD;
3062                     s->mb_intra= 0;
3063                     for(dir=0; dir<2; dir++){
3064                         for(i=0; i<2; i++){
3065                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3066                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3067                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3068                         }
3069                     }
3070                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3071                                  &dmin, &next_block, 0, 0);
3072                 }
3073                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3074                     s->mv_dir = 0;
3075                     s->mv_type = MV_TYPE_16X16;
3076                     s->mb_intra= 1;
3077                     s->mv[0][0][0] = 0;
3078                     s->mv[0][0][1] = 0;
3079                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3080                                  &dmin, &next_block, 0, 0);
3081                     if(s->h263_pred || s->h263_aic){
3082                         if(best_s.mb_intra)
3083                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3084                         else
3085                             ff_clean_intra_table_entries(s); //old mode?
3086                     }
3087                 }
3088
3089                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3090                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3091                         const int last_qp= backup_s.qscale;
3092                         int qpi, qp, dc[6];
3093                         int16_t ac[6][16];
3094                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3095                         static const int dquant_tab[4]={-1,1,-2,2};
3096                         int storecoefs = s->mb_intra && s->dc_val[0];
3097
3098                         av_assert2(backup_s.dquant == 0);
3099
3100                         //FIXME intra
3101                         s->mv_dir= best_s.mv_dir;
3102                         s->mv_type = MV_TYPE_16X16;
3103                         s->mb_intra= best_s.mb_intra;
3104                         s->mv[0][0][0] = best_s.mv[0][0][0];
3105                         s->mv[0][0][1] = best_s.mv[0][0][1];
3106                         s->mv[1][0][0] = best_s.mv[1][0][0];
3107                         s->mv[1][0][1] = best_s.mv[1][0][1];
3108
3109                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3110                         for(; qpi<4; qpi++){
3111                             int dquant= dquant_tab[qpi];
3112                             qp= last_qp + dquant;
3113                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3114                                 continue;
3115                             backup_s.dquant= dquant;
3116                             if(storecoefs){
3117                                 for(i=0; i<6; i++){
3118                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3119                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3120                                 }
3121                             }
3122
3123                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3124                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3125                             if(best_s.qscale != qp){
3126                                 if(storecoefs){
3127                                     for(i=0; i<6; i++){
3128                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3129                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3130                                     }
3131                                 }
3132                             }
3133                         }
3134                     }
3135                 }
3136                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3137                     int mx= s->b_direct_mv_table[xy][0];
3138                     int my= s->b_direct_mv_table[xy][1];
3139
3140                     backup_s.dquant = 0;
3141                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3142                     s->mb_intra= 0;
3143                     ff_mpeg4_set_direct_mv(s, mx, my);
3144                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3145                                  &dmin, &next_block, mx, my);
3146                 }
3147                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3148                     backup_s.dquant = 0;
3149                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3150                     s->mb_intra= 0;
3151                     ff_mpeg4_set_direct_mv(s, 0, 0);
3152                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3153                                  &dmin, &next_block, 0, 0);
3154                 }
3155                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3156                     int coded=0;
3157                     for(i=0; i<6; i++)
3158                         coded |= s->block_last_index[i];
3159                     if(coded){
3160                         int mx,my;
3161                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3162                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3163                             mx=my=0; //FIXME find the one we actually used
3164                             ff_mpeg4_set_direct_mv(s, mx, my);
3165                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3166                             mx= s->mv[1][0][0];
3167                             my= s->mv[1][0][1];
3168                         }else{
3169                             mx= s->mv[0][0][0];
3170                             my= s->mv[0][0][1];
3171                         }
3172
3173                         s->mv_dir= best_s.mv_dir;
3174                         s->mv_type = best_s.mv_type;
3175                         s->mb_intra= 0;
3176 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3177                         s->mv[0][0][1] = best_s.mv[0][0][1];
3178                         s->mv[1][0][0] = best_s.mv[1][0][0];
3179                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3180                         backup_s.dquant= 0;
3181                         s->skipdct=1;
3182                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3183                                         &dmin, &next_block, mx, my);
3184                         s->skipdct=0;
3185                     }
3186                 }
3187
3188                 s->current_picture.qscale_table[xy] = best_s.qscale;
3189
3190                 copy_context_after_encode(s, &best_s, -1);
3191
3192                 pb_bits_count= put_bits_count(&s->pb);
3193                 flush_put_bits(&s->pb);
3194                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3195                 s->pb= backup_s.pb;
3196
3197                 if(s->data_partitioning){
3198                     pb2_bits_count= put_bits_count(&s->pb2);
3199                     flush_put_bits(&s->pb2);
3200                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3201                     s->pb2= backup_s.pb2;
3202
3203                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3204                     flush_put_bits(&s->tex_pb);
3205                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3206                     s->tex_pb= backup_s.tex_pb;
3207                 }
3208                 s->last_bits= put_bits_count(&s->pb);
3209
3210                 if (CONFIG_H263_ENCODER &&
3211                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3212                     ff_h263_update_motion_val(s);
3213
3214                 if(next_block==0){ //FIXME 16 vs linesize16
3215                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3216                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3217                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3218                 }
3219
3220                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3221                     ff_mpv_decode_mb(s, s->block);
3222             } else {
3223                 int motion_x = 0, motion_y = 0;
3224                 s->mv_type=MV_TYPE_16X16;
3225                 // only one MB-Type possible
3226
3227                 switch(mb_type){
3228                 case CANDIDATE_MB_TYPE_INTRA:
3229                     s->mv_dir = 0;
3230                     s->mb_intra= 1;
3231                     motion_x= s->mv[0][0][0] = 0;
3232                     motion_y= s->mv[0][0][1] = 0;
3233                     break;
3234                 case CANDIDATE_MB_TYPE_INTER:
3235                     s->mv_dir = MV_DIR_FORWARD;
3236                     s->mb_intra= 0;
3237                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3238                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3239                     break;
3240                 case CANDIDATE_MB_TYPE_INTER_I:
3241                     s->mv_dir = MV_DIR_FORWARD;
3242                     s->mv_type = MV_TYPE_FIELD;
3243                     s->mb_intra= 0;
3244                     for(i=0; i<2; i++){
3245                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3246                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3247                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3248                     }
3249                     break;
3250                 case CANDIDATE_MB_TYPE_INTER4V:
3251                     s->mv_dir = MV_DIR_FORWARD;
3252                     s->mv_type = MV_TYPE_8X8;
3253                     s->mb_intra= 0;
3254                     for(i=0; i<4; i++){
3255                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3256                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3257                     }
3258                     break;
3259                 case CANDIDATE_MB_TYPE_DIRECT:
3260                     if (CONFIG_MPEG4_ENCODER) {
3261                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3262                         s->mb_intra= 0;
3263                         motion_x=s->b_direct_mv_table[xy][0];
3264                         motion_y=s->b_direct_mv_table[xy][1];
3265                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3266                     }
3267                     break;
3268                 case CANDIDATE_MB_TYPE_DIRECT0:
3269                     if (CONFIG_MPEG4_ENCODER) {
3270                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3271                         s->mb_intra= 0;
3272                         ff_mpeg4_set_direct_mv(s, 0, 0);
3273                     }
3274                     break;
3275                 case CANDIDATE_MB_TYPE_BIDIR:
3276                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3277                     s->mb_intra= 0;
3278                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3279                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3280                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3281                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3282                     break;
3283                 case CANDIDATE_MB_TYPE_BACKWARD:
3284                     s->mv_dir = MV_DIR_BACKWARD;
3285                     s->mb_intra= 0;
3286                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3287                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3288                     break;
3289                 case CANDIDATE_MB_TYPE_FORWARD:
3290                     s->mv_dir = MV_DIR_FORWARD;
3291                     s->mb_intra= 0;
3292                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3293                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3294                     break;
3295                 case CANDIDATE_MB_TYPE_FORWARD_I:
3296                     s->mv_dir = MV_DIR_FORWARD;
3297                     s->mv_type = MV_TYPE_FIELD;
3298                     s->mb_intra= 0;
3299                     for(i=0; i<2; i++){
3300                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3301                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3302                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3303                     }
3304                     break;
3305                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3306                     s->mv_dir = MV_DIR_BACKWARD;
3307                     s->mv_type = MV_TYPE_FIELD;
3308                     s->mb_intra= 0;
3309                     for(i=0; i<2; i++){
3310                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3311                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3312                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3313                     }
3314                     break;
3315                 case CANDIDATE_MB_TYPE_BIDIR_I:
3316                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3317                     s->mv_type = MV_TYPE_FIELD;
3318                     s->mb_intra= 0;
3319                     for(dir=0; dir<2; dir++){
3320                         for(i=0; i<2; i++){
3321                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3322                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3323                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3324                         }
3325                     }
3326                     break;
3327                 default:
3328                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3329                 }
3330
3331                 encode_mb(s, motion_x, motion_y);
3332
3333                 // RAL: Update last macroblock type
3334                 s->last_mv_dir = s->mv_dir;
3335
3336                 if (CONFIG_H263_ENCODER &&
3337                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3338                     ff_h263_update_motion_val(s);
3339
3340                 ff_mpv_decode_mb(s, s->block);
3341             }
3342
3343             /* clean the MV table in IPS frames for direct mode in B frames */
3344             if(s->mb_intra /* && I,P,S_TYPE */){
3345                 s->p_mv_table[xy][0]=0;
3346                 s->p_mv_table[xy][1]=0;
3347             }
3348
3349             if (s->avctx->flags & CODEC_FLAG_PSNR) {
3350                 int w= 16;
3351                 int h= 16;
3352
3353                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3354                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3355
3356                 s->current_picture.error[0] += sse(
3357                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3358                     s->dest[0], w, h, s->linesize);
3359                 s->current_picture.error[1] += sse(
3360                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3361                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3362                 s->current_picture.error[2] += sse(
3363                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3364                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3365             }
3366             if(s->loop_filter){
3367                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3368                     ff_h263_loop_filter(s);
3369             }
3370             ff_dlog(s->avctx, "MB %d %d bits\n",
3371                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3372         }
3373     }
3374
3375     //not beautiful here but we must write it before flushing so it has to be here
3376     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3377         ff_msmpeg4_encode_ext_header(s);
3378
3379     write_slice_end(s);
3380
3381     /* Send the last GOB if RTP */
3382     if (s->avctx->rtp_callback) {
3383         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3384         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3385         /* Call the RTP callback to send the last GOB */
3386         emms_c();
3387         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3388     }
3389
3390     return 0;
3391 }
3392
3393 #define MERGE(field) dst->field += src->field; src->field=0
3394 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3395     MERGE(me.scene_change_score);
3396     MERGE(me.mc_mb_var_sum_temp);
3397     MERGE(me.mb_var_sum_temp);
3398 }
3399
3400 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3401     int i;
3402
3403     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3404     MERGE(dct_count[1]);
3405     MERGE(mv_bits);
3406     MERGE(i_tex_bits);
3407     MERGE(p_tex_bits);
3408     MERGE(i_count);
3409     MERGE(f_count);
3410     MERGE(b_count);
3411     MERGE(skip_count);
3412     MERGE(misc_bits);
3413     MERGE(er.error_count);
3414     MERGE(padding_bug_score);
3415     MERGE(current_picture.error[0]);
3416     MERGE(current_picture.error[1]);
3417     MERGE(current_picture.error[2]);
3418
3419     if(dst->avctx->noise_reduction){
3420         for(i=0; i<64; i++){
3421             MERGE(dct_error_sum[0][i]);
3422             MERGE(dct_error_sum[1][i]);
3423         }
3424     }
3425
3426     assert(put_bits_count(&src->pb) % 8 ==0);
3427     assert(put_bits_count(&dst->pb) % 8 ==0);
3428     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3429     flush_put_bits(&dst->pb);
3430 }
3431
3432 static int estimate_qp(MpegEncContext *s, int dry_run){
3433     if (s->next_lambda){
3434         s->current_picture_ptr->f->quality =
3435         s->current_picture.f->quality = s->next_lambda;
3436         if(!dry_run) s->next_lambda= 0;
3437     } else if (!s->fixed_qscale) {
3438         s->current_picture_ptr->f->quality =
3439         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3440         if (s->current_picture.f->quality < 0)
3441             return -1;
3442     }
3443
3444     if(s->adaptive_quant){
3445         switch(s->codec_id){
3446         case AV_CODEC_ID_MPEG4:
3447             if (CONFIG_MPEG4_ENCODER)
3448                 ff_clean_mpeg4_qscales(s);
3449             break;
3450         case AV_CODEC_ID_H263:
3451         case AV_CODEC_ID_H263P:
3452         case AV_CODEC_ID_FLV1:
3453             if (CONFIG_H263_ENCODER)
3454                 ff_clean_h263_qscales(s);
3455             break;
3456         default:
3457             ff_init_qscale_tab(s);
3458         }
3459
3460         s->lambda= s->lambda_table[0];
3461         //FIXME broken
3462     }else
3463         s->lambda = s->current_picture.f->quality;
3464     update_qscale(s);
3465     return 0;
3466 }
3467
3468 /* must be called before writing the header */
3469 static void set_frame_distances(MpegEncContext * s){
3470     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3471     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3472
3473     if(s->pict_type==AV_PICTURE_TYPE_B){
3474         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3475         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3476     }else{
3477         s->pp_time= s->time - s->last_non_b_time;
3478         s->last_non_b_time= s->time;
3479         assert(s->picture_number==0 || s->pp_time > 0);
3480     }
3481 }
3482
3483 static int encode_picture(MpegEncContext *s, int picture_number)
3484 {
3485     int i, ret;
3486     int bits;
3487     int context_count = s->slice_context_count;
3488
3489     s->picture_number = picture_number;
3490
3491     /* Reset the average MB variance */
3492     s->me.mb_var_sum_temp    =
3493     s->me.mc_mb_var_sum_temp = 0;
3494
3495     /* we need to initialize some time vars before we can encode b-frames */
3496     // RAL: Condition added for MPEG1VIDEO
3497     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3498         set_frame_distances(s);
3499     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3500         ff_set_mpeg4_time(s);
3501
3502     s->me.scene_change_score=0;
3503
3504 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3505
3506     if(s->pict_type==AV_PICTURE_TYPE_I){
3507         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3508         else                        s->no_rounding=0;
3509     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3510         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3511             s->no_rounding ^= 1;
3512     }
3513
3514     if (s->avctx->flags & CODEC_FLAG_PASS2) {
3515         if (estimate_qp(s,1) < 0)
3516             return -1;
3517         ff_get_2pass_fcode(s);
3518     } else if (!(s->avctx->flags & CODEC_FLAG_QSCALE)) {
3519         if(s->pict_type==AV_PICTURE_TYPE_B)
3520             s->lambda= s->last_lambda_for[s->pict_type];
3521         else
3522             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3523         update_qscale(s);
3524     }
3525
3526     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3527         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3528         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3529         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3530         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3531     }
3532
3533     s->mb_intra=0; //for the rate distortion & bit compare functions
3534     for(i=1; i<context_count; i++){
3535         ret = ff_update_duplicate_context(s->thread_context[i], s);
3536         if (ret < 0)
3537             return ret;
3538     }
3539
3540     if(ff_init_me(s)<0)
3541         return -1;
3542
3543     /* Estimate motion for every MB */
3544     if(s->pict_type != AV_PICTURE_TYPE_I){
3545         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3546         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3547         if (s->pict_type != AV_PICTURE_TYPE_B) {
3548             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3549                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3550             }
3551         }
3552
3553         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3554     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3555         /* I-Frame */
3556         for(i=0; i<s->mb_stride*s->mb_height; i++)
3557             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3558
3559         if(!s->fixed_qscale){
3560             /* finding spatial complexity for I-frame rate control */
3561             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3562         }
3563     }
3564     for(i=1; i<context_count; i++){
3565         merge_context_after_me(s, s->thread_context[i]);
3566     }
3567     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3568     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3569     emms_c();
3570
3571     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3572         s->pict_type= AV_PICTURE_TYPE_I;
3573         for(i=0; i<s->mb_stride*s->mb_height; i++)
3574             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3575         if(s->msmpeg4_version >= 3)
3576             s->no_rounding=1;
3577         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3578                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3579     }
3580
3581     if(!s->umvplus){
3582         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3583             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3584
3585             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3586                 int a,b;
3587                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3588                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3589                 s->f_code= FFMAX3(s->f_code, a, b);
3590             }
3591
3592             ff_fix_long_p_mvs(s);
3593             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3594             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3595                 int j;
3596                 for(i=0; i<2; i++){
3597                     for(j=0; j<2; j++)
3598                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3599                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3600                 }
3601             }
3602         }
3603
3604         if(s->pict_type==AV_PICTURE_TYPE_B){
3605             int a, b;
3606
3607             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3608             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3609             s->f_code = FFMAX(a, b);
3610
3611             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3612             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3613             s->b_code = FFMAX(a, b);
3614
3615             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3616             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3617             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3618             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3619             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3620                 int dir, j;
3621                 for(dir=0; dir<2; dir++){
3622                     for(i=0; i<2; i++){
3623                         for(j=0; j<2; j++){
3624                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3625                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3626                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3627                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3628                         }
3629                     }
3630                 }
3631             }
3632         }
3633     }
3634
3635     if (estimate_qp(s, 0) < 0)
3636         return -1;
3637
3638     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3639         s->pict_type == AV_PICTURE_TYPE_I &&
3640         !(s->avctx->flags & CODEC_FLAG_QSCALE))
3641         s->qscale= 3; //reduce clipping problems
3642
3643     if (s->out_format == FMT_MJPEG) {
3644         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3645         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3646
3647         if (s->avctx->intra_matrix) {
3648             chroma_matrix =
3649             luma_matrix = s->avctx->intra_matrix;
3650         }
3651         if (s->avctx->chroma_intra_matrix)
3652             chroma_matrix = s->avctx->chroma_intra_matrix;
3653
3654         /* for mjpeg, we do include qscale in the matrix */
3655         for(i=1;i<64;i++){
3656             int j = s->idsp.idct_permutation[i];
3657
3658             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3659             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3660         }
3661         s->y_dc_scale_table=
3662         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3663         s->chroma_intra_matrix[0] =
3664         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3665         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3666                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3667         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3668                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3669         s->qscale= 8;
3670     }
3671     if(s->codec_id == AV_CODEC_ID_AMV){
3672         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3673         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3674         for(i=1;i<64;i++){
3675             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3676
3677             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3678             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3679         }
3680         s->y_dc_scale_table= y;
3681         s->c_dc_scale_table= c;
3682         s->intra_matrix[0] = 13;
3683         s->chroma_intra_matrix[0] = 14;
3684         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3685                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3686         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3687                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3688         s->qscale= 8;
3689     }
3690
3691     //FIXME var duplication
3692     s->current_picture_ptr->f->key_frame =
3693     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3694     s->current_picture_ptr->f->pict_type =
3695     s->current_picture.f->pict_type = s->pict_type;
3696
3697     if (s->current_picture.f->key_frame)
3698         s->picture_in_gop_number=0;
3699
3700     s->mb_x = s->mb_y = 0;
3701     s->last_bits= put_bits_count(&s->pb);
3702     switch(s->out_format) {
3703     case FMT_MJPEG:
3704         if (CONFIG_MJPEG_ENCODER)
3705             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3706                                            s->intra_matrix, s->chroma_intra_matrix);
3707         break;
3708     case FMT_H261:
3709         if (CONFIG_H261_ENCODER)
3710             ff_h261_encode_picture_header(s, picture_number);
3711         break;
3712     case FMT_H263:
3713         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3714             ff_wmv2_encode_picture_header(s, picture_number);
3715         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3716             ff_msmpeg4_encode_picture_header(s, picture_number);
3717         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3718             ff_mpeg4_encode_picture_header(s, picture_number);
3719         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3720             ret = ff_rv10_encode_picture_header(s, picture_number);
3721             if (ret < 0)
3722                 return ret;
3723         }
3724         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3725             ff_rv20_encode_picture_header(s, picture_number);
3726         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3727             ff_flv_encode_picture_header(s, picture_number);
3728         else if (CONFIG_H263_ENCODER)
3729             ff_h263_encode_picture_header(s, picture_number);
3730         break;
3731     case FMT_MPEG1:
3732         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3733             ff_mpeg1_encode_picture_header(s, picture_number);
3734         break;
3735     default:
3736         av_assert0(0);
3737     }
3738     bits= put_bits_count(&s->pb);
3739     s->header_bits= bits - s->last_bits;
3740
3741     for(i=1; i<context_count; i++){
3742         update_duplicate_context_after_me(s->thread_context[i], s);
3743     }
3744     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3745     for(i=1; i<context_count; i++){
3746         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3747             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3748         merge_context_after_encode(s, s->thread_context[i]);
3749     }
3750     emms_c();
3751     return 0;
3752 }
3753
3754 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3755     const int intra= s->mb_intra;
3756     int i;
3757
3758     s->dct_count[intra]++;
3759
3760     for(i=0; i<64; i++){
3761         int level= block[i];
3762
3763         if(level){
3764             if(level>0){
3765                 s->dct_error_sum[intra][i] += level;
3766                 level -= s->dct_offset[intra][i];
3767                 if(level<0) level=0;
3768             }else{
3769                 s->dct_error_sum[intra][i] -= level;
3770                 level += s->dct_offset[intra][i];
3771                 if(level>0) level=0;
3772             }
3773             block[i]= level;
3774         }
3775     }
3776 }
3777
3778 static int dct_quantize_trellis_c(MpegEncContext *s,
3779                                   int16_t *block, int n,
3780                                   int qscale, int *overflow){
3781     const int *qmat;
3782     const uint16_t *matrix;
3783     const uint8_t *scantable= s->intra_scantable.scantable;
3784     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3785     int max=0;
3786     unsigned int threshold1, threshold2;
3787     int bias=0;
3788     int run_tab[65];
3789     int level_tab[65];
3790     int score_tab[65];
3791     int survivor[65];
3792     int survivor_count;
3793     int last_run=0;
3794     int last_level=0;
3795     int last_score= 0;
3796     int last_i;
3797     int coeff[2][64];
3798     int coeff_count[64];
3799     int qmul, qadd, start_i, last_non_zero, i, dc;
3800     const int esc_length= s->ac_esc_length;
3801     uint8_t * length;
3802     uint8_t * last_length;
3803     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3804
3805     s->fdsp.fdct(block);
3806
3807     if(s->dct_error_sum)
3808         s->denoise_dct(s, block);
3809     qmul= qscale*16;
3810     qadd= ((qscale-1)|1)*8;
3811
3812     if (s->mb_intra) {
3813         int q;
3814         if (!s->h263_aic) {
3815             if (n < 4)
3816                 q = s->y_dc_scale;
3817             else
3818                 q = s->c_dc_scale;
3819             q = q << 3;
3820         } else{
3821             /* For AIC we skip quant/dequant of INTRADC */
3822             q = 1 << 3;
3823             qadd=0;
3824         }
3825
3826         /* note: block[0] is assumed to be positive */
3827         block[0] = (block[0] + (q >> 1)) / q;
3828         start_i = 1;
3829         last_non_zero = 0;
3830         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3831         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3832         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3833             bias= 1<<(QMAT_SHIFT-1);
3834
3835         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3836             length     = s->intra_chroma_ac_vlc_length;
3837             last_length= s->intra_chroma_ac_vlc_last_length;
3838         } else {
3839             length     = s->intra_ac_vlc_length;
3840             last_length= s->intra_ac_vlc_last_length;
3841         }
3842     } else {
3843         start_i = 0;
3844         last_non_zero = -1;
3845         qmat = s->q_inter_matrix[qscale];
3846         matrix = s->inter_matrix;
3847         length     = s->inter_ac_vlc_length;
3848         last_length= s->inter_ac_vlc_last_length;
3849     }
3850     last_i= start_i;
3851
3852     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3853     threshold2= (threshold1<<1);
3854
3855     for(i=63; i>=start_i; i--) {
3856         const int j = scantable[i];
3857         int level = block[j] * qmat[j];
3858
3859         if(((unsigned)(level+threshold1))>threshold2){
3860             last_non_zero = i;
3861             break;
3862         }
3863     }
3864
3865     for(i=start_i; i<=last_non_zero; i++) {
3866         const int j = scantable[i];
3867         int level = block[j] * qmat[j];
3868
3869 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3870 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3871         if(((unsigned)(level+threshold1))>threshold2){
3872             if(level>0){
3873                 level= (bias + level)>>QMAT_SHIFT;
3874                 coeff[0][i]= level;
3875                 coeff[1][i]= level-1;
3876 //                coeff[2][k]= level-2;
3877             }else{
3878                 level= (bias - level)>>QMAT_SHIFT;
3879                 coeff[0][i]= -level;
3880                 coeff[1][i]= -level+1;
3881 //                coeff[2][k]= -level+2;
3882             }
3883             coeff_count[i]= FFMIN(level, 2);
3884             av_assert2(coeff_count[i]);
3885             max |=level;
3886         }else{
3887             coeff[0][i]= (level>>31)|1;
3888             coeff_count[i]= 1;
3889         }
3890     }
3891
3892     *overflow= s->max_qcoeff < max; //overflow might have happened
3893
3894     if(last_non_zero < start_i){
3895         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3896         return last_non_zero;
3897     }
3898
3899     score_tab[start_i]= 0;
3900     survivor[0]= start_i;
3901     survivor_count= 1;
3902
3903     for(i=start_i; i<=last_non_zero; i++){
3904         int level_index, j, zero_distortion;
3905         int dct_coeff= FFABS(block[ scantable[i] ]);
3906         int best_score=256*256*256*120;
3907
3908         if (s->fdsp.fdct == ff_fdct_ifast)
3909             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3910         zero_distortion= dct_coeff*dct_coeff;
3911
3912         for(level_index=0; level_index < coeff_count[i]; level_index++){
3913             int distortion;
3914             int level= coeff[level_index][i];
3915             const int alevel= FFABS(level);
3916             int unquant_coeff;
3917
3918             av_assert2(level);
3919
3920             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3921                 unquant_coeff= alevel*qmul + qadd;
3922             } else if(s->out_format == FMT_MJPEG) {
3923                 j = s->idsp.idct_permutation[scantable[i]];
3924                 unquant_coeff = alevel * matrix[j] * 8;
3925             }else{ //MPEG1
3926                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3927                 if(s->mb_intra){
3928                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3929                         unquant_coeff =   (unquant_coeff - 1) | 1;
3930                 }else{
3931                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3932                         unquant_coeff =   (unquant_coeff - 1) | 1;
3933                 }
3934                 unquant_coeff<<= 3;
3935             }
3936
3937             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3938             level+=64;
3939             if((level&(~127)) == 0){
3940                 for(j=survivor_count-1; j>=0; j--){
3941                     int run= i - survivor[j];
3942                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3943                     score += score_tab[i-run];
3944
3945                     if(score < best_score){
3946                         best_score= score;
3947                         run_tab[i+1]= run;
3948                         level_tab[i+1]= level-64;
3949                     }
3950                 }
3951
3952                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3953                     for(j=survivor_count-1; j>=0; j--){
3954                         int run= i - survivor[j];
3955                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3956                         score += score_tab[i-run];
3957                         if(score < last_score){
3958                             last_score= score;
3959                             last_run= run;
3960                             last_level= level-64;
3961                             last_i= i+1;
3962                         }
3963                     }
3964                 }
3965             }else{
3966                 distortion += esc_length*lambda;
3967                 for(j=survivor_count-1; j>=0; j--){
3968                     int run= i - survivor[j];
3969                     int score= distortion + score_tab[i-run];
3970
3971                     if(score < best_score){
3972                         best_score= score;
3973                         run_tab[i+1]= run;
3974                         level_tab[i+1]= level-64;
3975                     }
3976                 }
3977
3978                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3979                   for(j=survivor_count-1; j>=0; j--){
3980                         int run= i - survivor[j];
3981                         int score= distortion + score_tab[i-run];
3982                         if(score < last_score){
3983                             last_score= score;
3984                             last_run= run;
3985                             last_level= level-64;
3986                             last_i= i+1;
3987                         }
3988                     }
3989                 }
3990             }
3991         }
3992
3993         score_tab[i+1]= best_score;
3994
3995         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3996         if(last_non_zero <= 27){
3997             for(; survivor_count; survivor_count--){
3998                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3999                     break;
4000             }
4001         }else{
4002             for(; survivor_count; survivor_count--){
4003                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4004                     break;
4005             }
4006         }
4007
4008         survivor[ survivor_count++ ]= i+1;
4009     }
4010
4011     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4012         last_score= 256*256*256*120;
4013         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4014             int score= score_tab[i];
4015             if(i) score += lambda*2; //FIXME exacter?
4016
4017             if(score < last_score){
4018                 last_score= score;
4019                 last_i= i;
4020                 last_level= level_tab[i];
4021                 last_run= run_tab[i];
4022             }
4023         }
4024     }
4025
4026     s->coded_score[n] = last_score;
4027
4028     dc= FFABS(block[0]);
4029     last_non_zero= last_i - 1;
4030     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4031
4032     if(last_non_zero < start_i)
4033         return last_non_zero;
4034
4035     if(last_non_zero == 0 && start_i == 0){
4036         int best_level= 0;
4037         int best_score= dc * dc;
4038
4039         for(i=0; i<coeff_count[0]; i++){
4040             int level= coeff[i][0];
4041             int alevel= FFABS(level);
4042             int unquant_coeff, score, distortion;
4043
4044             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4045                     unquant_coeff= (alevel*qmul + qadd)>>3;
4046             }else{ //MPEG1
4047                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4048                     unquant_coeff =   (unquant_coeff - 1) | 1;
4049             }
4050             unquant_coeff = (unquant_coeff + 4) >> 3;
4051             unquant_coeff<<= 3 + 3;
4052
4053             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4054             level+=64;
4055             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4056             else                    score= distortion + esc_length*lambda;
4057
4058             if(score < best_score){
4059                 best_score= score;
4060                 best_level= level - 64;
4061             }
4062         }
4063         block[0]= best_level;
4064         s->coded_score[n] = best_score - dc*dc;
4065         if(best_level == 0) return -1;
4066         else                return last_non_zero;
4067     }
4068
4069     i= last_i;
4070     av_assert2(last_level);
4071
4072     block[ perm_scantable[last_non_zero] ]= last_level;
4073     i -= last_run + 1;
4074
4075     for(; i>start_i; i -= run_tab[i] + 1){
4076         block[ perm_scantable[i-1] ]= level_tab[i];
4077     }
4078
4079     return last_non_zero;
4080 }
4081
4082 //#define REFINE_STATS 1
4083 static int16_t basis[64][64];
4084
4085 static void build_basis(uint8_t *perm){
4086     int i, j, x, y;
4087     emms_c();
4088     for(i=0; i<8; i++){
4089         for(j=0; j<8; j++){
4090             for(y=0; y<8; y++){
4091                 for(x=0; x<8; x++){
4092                     double s= 0.25*(1<<BASIS_SHIFT);
4093                     int index= 8*i + j;
4094                     int perm_index= perm[index];
4095                     if(i==0) s*= sqrt(0.5);
4096                     if(j==0) s*= sqrt(0.5);
4097                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4098                 }
4099             }
4100         }
4101     }
4102 }
4103
4104 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4105                         int16_t *block, int16_t *weight, int16_t *orig,
4106                         int n, int qscale){
4107     int16_t rem[64];
4108     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4109     const uint8_t *scantable= s->intra_scantable.scantable;
4110     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4111 //    unsigned int threshold1, threshold2;
4112 //    int bias=0;
4113     int run_tab[65];
4114     int prev_run=0;
4115     int prev_level=0;
4116     int qmul, qadd, start_i, last_non_zero, i, dc;
4117     uint8_t * length;
4118     uint8_t * last_length;
4119     int lambda;
4120     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4121 #ifdef REFINE_STATS
4122 static int count=0;
4123 static int after_last=0;
4124 static int to_zero=0;
4125 static int from_zero=0;
4126 static int raise=0;
4127 static int lower=0;
4128 static int messed_sign=0;
4129 #endif
4130
4131     if(basis[0][0] == 0)
4132         build_basis(s->idsp.idct_permutation);
4133
4134     qmul= qscale*2;
4135     qadd= (qscale-1)|1;
4136     if (s->mb_intra) {
4137         if (!s->h263_aic) {
4138             if (n < 4)
4139                 q = s->y_dc_scale;
4140             else
4141                 q = s->c_dc_scale;
4142         } else{
4143             /* For AIC we skip quant/dequant of INTRADC */
4144             q = 1;
4145             qadd=0;
4146         }
4147         q <<= RECON_SHIFT-3;
4148         /* note: block[0] is assumed to be positive */
4149         dc= block[0]*q;
4150 //        block[0] = (block[0] + (q >> 1)) / q;
4151         start_i = 1;
4152 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4153 //            bias= 1<<(QMAT_SHIFT-1);
4154         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4155             length     = s->intra_chroma_ac_vlc_length;
4156             last_length= s->intra_chroma_ac_vlc_last_length;
4157         } else {
4158             length     = s->intra_ac_vlc_length;
4159             last_length= s->intra_ac_vlc_last_length;
4160         }
4161     } else {
4162         dc= 0;
4163         start_i = 0;
4164         length     = s->inter_ac_vlc_length;
4165         last_length= s->inter_ac_vlc_last_length;
4166     }
4167     last_non_zero = s->block_last_index[n];
4168
4169 #ifdef REFINE_STATS
4170 {START_TIMER
4171 #endif
4172     dc += (1<<(RECON_SHIFT-1));
4173     for(i=0; i<64; i++){
4174         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4175     }
4176 #ifdef REFINE_STATS
4177 STOP_TIMER("memset rem[]")}
4178 #endif
4179     sum=0;
4180     for(i=0; i<64; i++){
4181         int one= 36;
4182         int qns=4;
4183         int w;
4184
4185         w= FFABS(weight[i]) + qns*one;
4186         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4187
4188         weight[i] = w;
4189 //        w=weight[i] = (63*qns + (w/2)) / w;
4190
4191         av_assert2(w>0);
4192         av_assert2(w<(1<<6));
4193         sum += w*w;
4194     }
4195     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4196 #ifdef REFINE_STATS
4197 {START_TIMER
4198 #endif
4199     run=0;
4200     rle_index=0;
4201     for(i=start_i; i<=last_non_zero; i++){
4202         int j= perm_scantable[i];
4203         const int level= block[j];
4204         int coeff;
4205
4206         if(level){
4207             if(level<0) coeff= qmul*level - qadd;
4208             else        coeff= qmul*level + qadd;
4209             run_tab[rle_index++]=run;
4210             run=0;
4211
4212             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4213         }else{
4214             run++;
4215         }
4216     }
4217 #ifdef REFINE_STATS
4218 if(last_non_zero>0){
4219 STOP_TIMER("init rem[]")
4220 }
4221 }
4222
4223 {START_TIMER
4224 #endif
4225     for(;;){
4226         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4227         int best_coeff=0;
4228         int best_change=0;
4229         int run2, best_unquant_change=0, analyze_gradient;
4230 #ifdef REFINE_STATS
4231 {START_TIMER
4232 #endif
4233         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4234
4235         if(analyze_gradient){
4236 #ifdef REFINE_STATS
4237 {START_TIMER
4238 #endif
4239             for(i=0; i<64; i++){
4240                 int w= weight[i];
4241
4242                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4243             }
4244 #ifdef REFINE_STATS
4245 STOP_TIMER("rem*w*w")}
4246 {START_TIMER
4247 #endif
4248             s->fdsp.fdct(d1);
4249 #ifdef REFINE_STATS
4250 STOP_TIMER("dct")}
4251 #endif
4252         }
4253
4254         if(start_i){
4255             const int level= block[0];
4256             int change, old_coeff;
4257
4258             av_assert2(s->mb_intra);
4259
4260             old_coeff= q*level;
4261
4262             for(change=-1; change<=1; change+=2){
4263                 int new_level= level + change;
4264                 int score, new_coeff;
4265
4266                 new_coeff= q*new_level;
4267                 if(new_coeff >= 2048 || new_coeff < 0)
4268                     continue;
4269
4270                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4271                                                   new_coeff - old_coeff);
4272                 if(score<best_score){
4273                     best_score= score;
4274                     best_coeff= 0;
4275                     best_change= change;
4276                     best_unquant_change= new_coeff - old_coeff;
4277                 }
4278             }
4279         }
4280
4281         run=0;
4282         rle_index=0;
4283         run2= run_tab[rle_index++];
4284         prev_level=0;
4285         prev_run=0;
4286
4287         for(i=start_i; i<64; i++){
4288             int j= perm_scantable[i];
4289             const int level= block[j];
4290             int change, old_coeff;
4291
4292             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4293                 break;
4294
4295             if(level){
4296                 if(level<0) old_coeff= qmul*level - qadd;
4297                 else        old_coeff= qmul*level + qadd;
4298                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4299             }else{
4300                 old_coeff=0;
4301                 run2--;
4302                 av_assert2(run2>=0 || i >= last_non_zero );
4303             }
4304
4305             for(change=-1; change<=1; change+=2){
4306                 int new_level= level + change;
4307                 int score, new_coeff, unquant_change;
4308
4309                 score=0;
4310                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4311                    continue;
4312
4313                 if(new_level){
4314                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4315                     else            new_coeff= qmul*new_level + qadd;
4316                     if(new_coeff >= 2048 || new_coeff <= -2048)
4317                         continue;
4318                     //FIXME check for overflow
4319
4320                     if(level){
4321                         if(level < 63 && level > -63){
4322                             if(i < last_non_zero)
4323                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4324                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4325                             else
4326                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4327                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4328                         }
4329                     }else{
4330                         av_assert2(FFABS(new_level)==1);
4331
4332                         if(analyze_gradient){
4333                             int g= d1[ scantable[i] ];
4334                             if(g && (g^new_level) >= 0)
4335                                 continue;
4336                         }
4337
4338                         if(i < last_non_zero){
4339                             int next_i= i + run2 + 1;
4340                             int next_level= block[ perm_scantable[next_i] ] + 64;
4341
4342                             if(next_level&(~127))
4343                                 next_level= 0;
4344
4345                             if(next_i < last_non_zero)
4346                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4347                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4348                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4349                             else
4350                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4351                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4352                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4353                         }else{
4354                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4355                             if(prev_level){
4356                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4357                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4358                             }
4359                         }
4360                     }
4361                 }else{
4362                     new_coeff=0;
4363                     av_assert2(FFABS(level)==1);
4364
4365                     if(i < last_non_zero){
4366                         int next_i= i + run2 + 1;
4367                         int next_level= block[ perm_scantable[next_i] ] + 64;
4368
4369                         if(next_level&(~127))
4370                             next_level= 0;
4371
4372                         if(next_i < last_non_zero)
4373                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4374                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4375                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4376                         else
4377                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4378                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4379                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4380                     }else{
4381                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4382                         if(prev_level){
4383                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4384                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4385                         }
4386                     }
4387                 }
4388
4389                 score *= lambda;
4390
4391                 unquant_change= new_coeff - old_coeff;
4392                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4393
4394                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4395                                                    unquant_change);
4396                 if(score<best_score){
4397                     best_score= score;
4398                     best_coeff= i;
4399                     best_change= change;
4400                     best_unquant_change= unquant_change;
4401                 }
4402             }
4403             if(level){
4404                 prev_level= level + 64;
4405                 if(prev_level&(~127))
4406                     prev_level= 0;
4407                 prev_run= run;
4408                 run=0;
4409             }else{
4410                 run++;
4411             }
4412         }
4413 #ifdef REFINE_STATS
4414 STOP_TIMER("iterative step")}
4415 #endif
4416
4417         if(best_change){
4418             int j= perm_scantable[ best_coeff ];
4419
4420             block[j] += best_change;
4421
4422             if(best_coeff > last_non_zero){
4423                 last_non_zero= best_coeff;
4424                 av_assert2(block[j]);
4425 #ifdef REFINE_STATS
4426 after_last++;
4427 #endif
4428             }else{
4429 #ifdef REFINE_STATS
4430 if(block[j]){
4431     if(block[j] - best_change){
4432         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4433             raise++;
4434         }else{
4435             lower++;
4436         }
4437     }else{
4438         from_zero++;
4439     }
4440 }else{
4441     to_zero++;
4442 }
4443 #endif
4444                 for(; last_non_zero>=start_i; last_non_zero--){
4445                     if(block[perm_scantable[last_non_zero]])
4446                         break;
4447                 }
4448             }
4449 #ifdef REFINE_STATS
4450 count++;
4451 if(256*256*256*64 % count == 0){
4452     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4453 }
4454 #endif
4455             run=0;
4456             rle_index=0;
4457             for(i=start_i; i<=last_non_zero; i++){
4458                 int j= perm_scantable[i];
4459                 const int level= block[j];
4460
4461                  if(level){
4462                      run_tab[rle_index++]=run;
4463                      run=0;
4464                  }else{
4465                      run++;
4466                  }
4467             }
4468
4469             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4470         }else{
4471             break;
4472         }
4473     }
4474 #ifdef REFINE_STATS
4475 if(last_non_zero>0){
4476 STOP_TIMER("iterative search")
4477 }
4478 }
4479 #endif
4480
4481     return last_non_zero;
4482 }
4483
4484 int ff_dct_quantize_c(MpegEncContext *s,
4485                         int16_t *block, int n,
4486                         int qscale, int *overflow)
4487 {
4488     int i, j, level, last_non_zero, q, start_i;
4489     const int *qmat;
4490     const uint8_t *scantable= s->intra_scantable.scantable;
4491     int bias;
4492     int max=0;
4493     unsigned int threshold1, threshold2;
4494
4495     s->fdsp.fdct(block);
4496
4497     if(s->dct_error_sum)
4498         s->denoise_dct(s, block);
4499
4500     if (s->mb_intra) {
4501         if (!s->h263_aic) {
4502             if (n < 4)
4503                 q = s->y_dc_scale;
4504             else
4505                 q = s->c_dc_scale;
4506             q = q << 3;
4507         } else
4508             /* For AIC we skip quant/dequant of INTRADC */
4509             q = 1 << 3;
4510
4511         /* note: block[0] is assumed to be positive */
4512         block[0] = (block[0] + (q >> 1)) / q;
4513         start_i = 1;
4514         last_non_zero = 0;
4515         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4516         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4517     } else {
4518         start_i = 0;
4519         last_non_zero = -1;
4520         qmat = s->q_inter_matrix[qscale];
4521         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4522     }
4523     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4524     threshold2= (threshold1<<1);
4525     for(i=63;i>=start_i;i--) {
4526         j = scantable[i];
4527         level = block[j] * qmat[j];
4528
4529         if(((unsigned)(level+threshold1))>threshold2){
4530             last_non_zero = i;
4531             break;
4532         }else{
4533             block[j]=0;
4534         }
4535     }
4536     for(i=start_i; i<=last_non_zero; i++) {
4537         j = scantable[i];
4538         level = block[j] * qmat[j];
4539
4540 //        if(   bias+level >= (1<<QMAT_SHIFT)
4541 //           || bias-level >= (1<<QMAT_SHIFT)){
4542         if(((unsigned)(level+threshold1))>threshold2){
4543             if(level>0){
4544                 level= (bias + level)>>QMAT_SHIFT;
4545                 block[j]= level;
4546             }else{
4547                 level= (bias - level)>>QMAT_SHIFT;
4548                 block[j]= -level;
4549             }
4550             max |=level;
4551         }else{
4552             block[j]=0;
4553         }
4554     }
4555     *overflow= s->max_qcoeff < max; //overflow might have happened
4556
4557     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4558     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4559         ff_block_permute(block, s->idsp.idct_permutation,
4560                          scantable, last_non_zero);
4561
4562     return last_non_zero;
4563 }
4564
4565 #define OFFSET(x) offsetof(MpegEncContext, x)
4566 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4567 static const AVOption h263_options[] = {
4568     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4569     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4570     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4571     FF_MPV_COMMON_OPTS
4572     { NULL },
4573 };
4574
4575 static const AVClass h263_class = {
4576     .class_name = "H.263 encoder",
4577     .item_name  = av_default_item_name,
4578     .option     = h263_options,
4579     .version    = LIBAVUTIL_VERSION_INT,
4580 };
4581
4582 AVCodec ff_h263_encoder = {
4583     .name           = "h263",
4584     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4585     .type           = AVMEDIA_TYPE_VIDEO,
4586     .id             = AV_CODEC_ID_H263,
4587     .priv_data_size = sizeof(MpegEncContext),
4588     .init           = ff_mpv_encode_init,
4589     .encode2        = ff_mpv_encode_picture,
4590     .close          = ff_mpv_encode_end,
4591     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4592     .priv_class     = &h263_class,
4593 };
4594
4595 static const AVOption h263p_options[] = {
4596     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4597     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4598     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4599     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4600     FF_MPV_COMMON_OPTS
4601     { NULL },
4602 };
4603 static const AVClass h263p_class = {
4604     .class_name = "H.263p encoder",
4605     .item_name  = av_default_item_name,
4606     .option     = h263p_options,
4607     .version    = LIBAVUTIL_VERSION_INT,
4608 };
4609
4610 AVCodec ff_h263p_encoder = {
4611     .name           = "h263p",
4612     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4613     .type           = AVMEDIA_TYPE_VIDEO,
4614     .id             = AV_CODEC_ID_H263P,
4615     .priv_data_size = sizeof(MpegEncContext),
4616     .init           = ff_mpv_encode_init,
4617     .encode2        = ff_mpv_encode_picture,
4618     .close          = ff_mpv_encode_end,
4619     .capabilities   = CODEC_CAP_SLICE_THREADS,
4620     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4621     .priv_class     = &h263p_class,
4622 };
4623
4624 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4625
4626 AVCodec ff_msmpeg4v2_encoder = {
4627     .name           = "msmpeg4v2",
4628     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4629     .type           = AVMEDIA_TYPE_VIDEO,
4630     .id             = AV_CODEC_ID_MSMPEG4V2,
4631     .priv_data_size = sizeof(MpegEncContext),
4632     .init           = ff_mpv_encode_init,
4633     .encode2        = ff_mpv_encode_picture,
4634     .close          = ff_mpv_encode_end,
4635     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4636     .priv_class     = &msmpeg4v2_class,
4637 };
4638
4639 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4640
4641 AVCodec ff_msmpeg4v3_encoder = {
4642     .name           = "msmpeg4",
4643     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4644     .type           = AVMEDIA_TYPE_VIDEO,
4645     .id             = AV_CODEC_ID_MSMPEG4V3,
4646     .priv_data_size = sizeof(MpegEncContext),
4647     .init           = ff_mpv_encode_init,
4648     .encode2        = ff_mpv_encode_picture,
4649     .close          = ff_mpv_encode_end,
4650     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4651     .priv_class     = &msmpeg4v3_class,
4652 };
4653
4654 FF_MPV_GENERIC_CLASS(wmv1)
4655
4656 AVCodec ff_wmv1_encoder = {
4657     .name           = "wmv1",
4658     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4659     .type           = AVMEDIA_TYPE_VIDEO,
4660     .id             = AV_CODEC_ID_WMV1,
4661     .priv_data_size = sizeof(MpegEncContext),
4662     .init           = ff_mpv_encode_init,
4663     .encode2        = ff_mpv_encode_picture,
4664     .close          = ff_mpv_encode_end,
4665     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4666     .priv_class     = &wmv1_class,
4667 };