git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "dsputil.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mathops.h"
  46 #include "mpegutils.h"
  47 #include "mjpegenc.h"
  48 #include "msmpeg4.h"
  49 #include "faandct.h"
  50 #include "thread.h"
  51 #include "aandcttab.h"
  52 #include "flv.h"
  53 #include "mpeg4video.h"
  54 #include "internal.h"
  55 #include "bytestream.h"
  56 #include <limits.h>
  57 #include "sp5x.h"
  58
  59 static int encode_picture(MpegEncContext *s, int picture_number);
  60 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  61 static int sse_mb(MpegEncContext *s);
  62 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  63 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  64
  65 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  66 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  67
  68 const AVOption ff_mpv_generic_options[] = {
  69     FF_MPV_COMMON_OPTS
  70     { NULL },
  71 };
  72
  73 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  74                        uint16_t (*qmat16)[2][64],
  75                        const uint16_t *quant_matrix,
  76                        int bias, int qmin, int qmax, int intra)
  77 {
  78     int qscale;
  79     int shift = 0;
  80
  81     for (qscale = qmin; qscale <= qmax; qscale++) {
  82         int i;
  83         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  84             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  85             dsp->fdct == ff_faandct) {
  86             for (i = 0; i < 64; i++) {
  87                 const int j = dsp->idct_permutation[i];
  88                 /* 16 <= qscale * quant_matrix[i] <= 7905
  89                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  90                  *             19952 <=              x  <= 249205026
  91                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  92                  *           3444240 >= (1 << 36) / (x) >= 275 */
  93
  94                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  95                                         (qscale * quant_matrix[j]));
  96             }
  97         } else if (dsp->fdct == ff_fdct_ifast) {
  98             for (i = 0; i < 64; i++) {
  99                 const int j = dsp->idct_permutation[i];
 100                 /* 16 <= qscale * quant_matrix[i] <= 7905
 101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 102                  *             19952 <=              x  <= 249205026
 103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 104                  *           3444240 >= (1 << 36) / (x) >= 275 */
 105
 106                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 107                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
 108             }
 109         } else {
 110             for (i = 0; i < 64; i++) {
 111                 const int j = dsp->idct_permutation[i];
 112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 113                  * Assume x = qscale * quant_matrix[i]
 114                  * So             16 <=              x  <= 7905
 115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 116                  * so          32768 >= (1 << 19) / (x) >= 67 */
 117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 118                                         (qscale * quant_matrix[j]));
 119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 120                 //                    (qscale * quant_matrix[i]);
 121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 122                                        (qscale * quant_matrix[j]);
 123
 124                 if (qmat16[qscale][0][i] == 0 ||
 125                     qmat16[qscale][0][i] == 128 * 256)
 126                     qmat16[qscale][0][i] = 128 * 256 - 1;
 127                 qmat16[qscale][1][i] =
 128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 129                                 qmat16[qscale][0][i]);
 130             }
 131         }
 132
 133         for (i = intra; i < 64; i++) {
 134             int64_t max = 8191;
 135             if (dsp->fdct == ff_fdct_ifast) {
 136                 max = (8191LL * ff_aanscales[i]) >> 14;
 137             }
 138             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 139                 shift++;
 140             }
 141         }
 142     }
 143     if (shift) {
 144         av_log(NULL, AV_LOG_INFO,
 145                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 146                QMAT_SHIFT - shift);
 147     }
 148 }
 149
 150 static inline void update_qscale(MpegEncContext *s)
 151 {
 152     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 153                 (FF_LAMBDA_SHIFT + 7);
 154     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 155
 156     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 157                  FF_LAMBDA_SHIFT;
 158 }
 159
 160 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 161 {
 162     int i;
 163
 164     if (matrix) {
 165         put_bits(pb, 1, 1);
 166         for (i = 0; i < 64; i++) {
 167             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 168         }
 169     } else
 170         put_bits(pb, 1, 0);
 171 }
 172
 173 /**
 174  * init s->current_picture.qscale_table from s->lambda_table
 175  */
 176 void ff_init_qscale_tab(MpegEncContext *s)
 177 {
 178     int8_t * const qscale_table = s->current_picture.qscale_table;
 179     int i;
 180
 181     for (i = 0; i < s->mb_num; i++) {
 182         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 183         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 184         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 185                                                   s->avctx->qmax);
 186     }
 187 }
 188
 189 static void update_duplicate_context_after_me(MpegEncContext *dst,
 190                                               MpegEncContext *src)
 191 {
 192 #define COPY(a) dst->a= src->a
 193     COPY(pict_type);
 194     COPY(current_picture);
 195     COPY(f_code);
 196     COPY(b_code);
 197     COPY(qscale);
 198     COPY(lambda);
 199     COPY(lambda2);
 200     COPY(picture_in_gop_number);
 201     COPY(gop_picture_number);
 202     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 203     COPY(progressive_frame);    // FIXME don't set in encode_header
 204     COPY(partitioned_frame);    // FIXME don't set in encode_header
 205 #undef COPY
 206 }
 207
 208 /**
 209  * Set the given MpegEncContext to defaults for encoding.
 210  * the changed fields will not depend upon the prior state of the MpegEncContext.
 211  */
 212 static void MPV_encode_defaults(MpegEncContext *s)
 213 {
 214     int i;
 215     ff_MPV_common_defaults(s);
 216
 217     for (i = -16; i < 16; i++) {
 218         default_fcode_tab[i + MAX_MV] = 1;
 219     }
 220     s->me.mv_penalty = default_mv_penalty;
 221     s->fcode_tab     = default_fcode_tab;
 222
 223     s->input_picture_number  = 0;
 224     s->picture_in_gop_number = 0;
 225 }
 226
 227 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 228     if (ARCH_X86)
 229         ff_dct_encode_init_x86(s);
 230
 231     if (CONFIG_H263_ENCODER)
 232         ff_h263dsp_init(&s->h263dsp);
 233     if (!s->dct_quantize)
 234         s->dct_quantize = ff_dct_quantize_c;
 235     if (!s->denoise_dct)
 236         s->denoise_dct  = denoise_dct_c;
 237     s->fast_dct_quantize = s->dct_quantize;
 238     if (s->avctx->trellis)
 239         s->dct_quantize  = dct_quantize_trellis_c;
 240
 241     return 0;
 242 }
 243
 244 /* init video encoder */
 245 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 246 {
 247     MpegEncContext *s = avctx->priv_data;
 248     int i, ret;
 249
 250     MPV_encode_defaults(s);
 251
 252     switch (avctx->codec_id) {
 253     case AV_CODEC_ID_MPEG2VIDEO:
 254         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 255             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 256             av_log(avctx, AV_LOG_ERROR,
 257                    "only YUV420 and YUV422 are supported\n");
 258             return -1;
 259         }
 260         break;
 261     case AV_CODEC_ID_MJPEG:
 262     case AV_CODEC_ID_AMV:
 263         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
 264             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
 265             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
 266             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 267               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
 268               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
 269              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 270             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 271             return -1;
 272         }
 273         break;
 274     default:
 275         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 276             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 277             return -1;
 278         }
 279     }
 280
 281     switch (avctx->pix_fmt) {
 282     case AV_PIX_FMT_YUVJ444P:
 283     case AV_PIX_FMT_YUV444P:
 284         s->chroma_format = CHROMA_444;
 285         break;
 286     case AV_PIX_FMT_YUVJ422P:
 287     case AV_PIX_FMT_YUV422P:
 288         s->chroma_format = CHROMA_422;
 289         break;
 290     case AV_PIX_FMT_YUVJ420P:
 291     case AV_PIX_FMT_YUV420P:
 292     default:
 293         s->chroma_format = CHROMA_420;
 294         break;
 295     }
 296
 297     s->bit_rate = avctx->bit_rate;
 298     s->width    = avctx->width;
 299     s->height   = avctx->height;
 300     if (avctx->gop_size > 600 &&
 301         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 302         av_log(avctx, AV_LOG_WARNING,
 303                "keyframe interval too large!, reducing it from %d to %d\n",
 304                avctx->gop_size, 600);
 305         avctx->gop_size = 600;
 306     }
 307     s->gop_size     = avctx->gop_size;
 308     s->avctx        = avctx;
 309     s->flags        = avctx->flags;
 310     s->flags2       = avctx->flags2;
 311     if (avctx->max_b_frames > MAX_B_FRAMES) {
 312         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 313                "is %d.\n", MAX_B_FRAMES);
 314         avctx->max_b_frames = MAX_B_FRAMES;
 315     }
 316     s->max_b_frames = avctx->max_b_frames;
 317     s->codec_id     = avctx->codec->id;
 318     s->strict_std_compliance = avctx->strict_std_compliance;
 319     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 320     s->mpeg_quant         = avctx->mpeg_quant;
 321     s->rtp_mode           = !!avctx->rtp_payload_size;
 322     s->intra_dc_precision = avctx->intra_dc_precision;
 323     s->user_specified_pts = AV_NOPTS_VALUE;
 324
 325     if (s->gop_size <= 1) {
 326         s->intra_only = 1;
 327         s->gop_size   = 12;
 328     } else {
 329         s->intra_only = 0;
 330     }
 331
 332     s->me_method = avctx->me_method;
 333
 334     /* Fixed QSCALE */
 335     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 336
 337     s->adaptive_quant = (s->avctx->lumi_masking ||
 338                          s->avctx->dark_masking ||
 339                          s->avctx->temporal_cplx_masking ||
 340                          s->avctx->spatial_cplx_masking  ||
 341                          s->avctx->p_masking      ||
 342                          s->avctx->border_masking ||
 343                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 344                         !s->fixed_qscale;
 345
 346     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 347
 348     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 349         switch(avctx->codec_id) {
 350         case AV_CODEC_ID_MPEG1VIDEO:
 351         case AV_CODEC_ID_MPEG2VIDEO:
 352             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 353             break;
 354         case AV_CODEC_ID_MPEG4:
 355         case AV_CODEC_ID_MSMPEG4V1:
 356         case AV_CODEC_ID_MSMPEG4V2:
 357         case AV_CODEC_ID_MSMPEG4V3:
 358             if       (avctx->rc_max_rate >= 15000000) {
 359                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 360             } else if(avctx->rc_max_rate >=  2000000) {
 361                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 362             } else if(avctx->rc_max_rate >=   384000) {
 363                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 364             } else
 365                 avctx->rc_buffer_size = 40;
 366             avctx->rc_buffer_size *= 16384;
 367             break;
 368         }
 369         if (avctx->rc_buffer_size) {
 370             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 371         }
 372     }
 373
 374     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 375         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 376         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
 377             return -1;
 378     }
 379
 380     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 381         av_log(avctx, AV_LOG_INFO,
 382                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 383     }
 384
 385     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 386         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 387         return -1;
 388     }
 389
 390     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 391         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 392         return -1;
 393     }
 394
 395     if (avctx->rc_max_rate &&
 396         avctx->rc_max_rate == avctx->bit_rate &&
 397         avctx->rc_max_rate != avctx->rc_min_rate) {
 398         av_log(avctx, AV_LOG_INFO,
 399                "impossible bitrate constraints, this will fail\n");
 400     }
 401
 402     if (avctx->rc_buffer_size &&
 403         avctx->bit_rate * (int64_t)avctx->time_base.num >
 404             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 405         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 406         return -1;
 407     }
 408
 409     if (!s->fixed_qscale &&
 410         avctx->bit_rate * av_q2d(avctx->time_base) >
 411             avctx->bit_rate_tolerance) {
 412         av_log(avctx, AV_LOG_WARNING,
 413                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 414         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
 415     }
 416
 417     if (s->avctx->rc_max_rate &&
 418         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 419         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 420          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 421         90000LL * (avctx->rc_buffer_size - 1) >
 422             s->avctx->rc_max_rate * 0xFFFFLL) {
 423         av_log(avctx, AV_LOG_INFO,
 424                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 425                "specified vbv buffer is too large for the given bitrate!\n");
 426     }
 427
 428     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 429         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 430         s->codec_id != AV_CODEC_ID_FLV1) {
 431         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 432         return -1;
 433     }
 434
 435     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 436         av_log(avctx, AV_LOG_ERROR,
 437                "OBMC is only supported with simple mb decision\n");
 438         return -1;
 439     }
 440
 441     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 442         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 443         return -1;
 444     }
 445
 446     if (s->max_b_frames                    &&
 447         s->codec_id != AV_CODEC_ID_MPEG4      &&
 448         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 449         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 450         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 451         return -1;
 452     }
 453     if (s->max_b_frames < 0) {
 454         av_log(avctx, AV_LOG_ERROR,
 455                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 456         return -1;
 457     }
 458
 459     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 460          s->codec_id == AV_CODEC_ID_H263  ||
 461          s->codec_id == AV_CODEC_ID_H263P) &&
 462         (avctx->sample_aspect_ratio.num > 255 ||
 463          avctx->sample_aspect_ratio.den > 255)) {
 464         av_log(avctx, AV_LOG_WARNING,
 465                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 466                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 467         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 468                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 469     }
 470
 471     if ((s->codec_id == AV_CODEC_ID_H263  ||
 472          s->codec_id == AV_CODEC_ID_H263P) &&
 473         (avctx->width  > 2048 ||
 474          avctx->height > 1152 )) {
 475         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 476         return -1;
 477     }
 478     if ((s->codec_id == AV_CODEC_ID_H263  ||
 479          s->codec_id == AV_CODEC_ID_H263P) &&
 480         ((avctx->width &3) ||
 481          (avctx->height&3) )) {
 482         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 483         return -1;
 484     }
 485
 486     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 487         (avctx->width  > 4095 ||
 488          avctx->height > 4095 )) {
 489         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 490         return -1;
 491     }
 492
 493     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 494         (avctx->width  > 16383 ||
 495          avctx->height > 16383 )) {
 496         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 497         return -1;
 498     }
 499
 500     if (s->codec_id == AV_CODEC_ID_RV10 &&
 501         (avctx->width &15 ||
 502          avctx->height&15 )) {
 503         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 504         return AVERROR(EINVAL);
 505     }
 506
 507     if (s->codec_id == AV_CODEC_ID_RV20 &&
 508         (avctx->width &3 ||
 509          avctx->height&3 )) {
 510         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 511         return AVERROR(EINVAL);
 512     }
 513
 514     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 515          s->codec_id == AV_CODEC_ID_WMV2) &&
 516          avctx->width & 1) {
 517          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 518          return -1;
 519     }
 520
 521     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 522         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 523         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 524         return -1;
 525     }
 526
 527     // FIXME mpeg2 uses that too
 528     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 529                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 530         av_log(avctx, AV_LOG_ERROR,
 531                "mpeg2 style quantization not supported by codec\n");
 532         return -1;
 533     }
 534
 535     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 536         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 537         return -1;
 538     }
 539
 540     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 541         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 542         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 543         return -1;
 544     }
 545
 546     if (s->avctx->scenechange_threshold < 1000000000 &&
 547         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 548         av_log(avctx, AV_LOG_ERROR,
 549                "closed gop with scene change detection are not supported yet, "
 550                "set threshold to 1000000000\n");
 551         return -1;
 552     }
 553
 554     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 555         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 556             av_log(avctx, AV_LOG_ERROR,
 557                   "low delay forcing is only available for mpeg2\n");
 558             return -1;
 559         }
 560         if (s->max_b_frames != 0) {
 561             av_log(avctx, AV_LOG_ERROR,
 562                    "b frames cannot be used with low delay\n");
 563             return -1;
 564         }
 565     }
 566
 567     if (s->q_scale_type == 1) {
 568         if (avctx->qmax > 12) {
 569             av_log(avctx, AV_LOG_ERROR,
 570                    "non linear quant only supports qmax <= 12 currently\n");
 571             return -1;
 572         }
 573     }
 574
 575     if (s->avctx->thread_count > 1         &&
 576         s->codec_id != AV_CODEC_ID_MPEG4      &&
 577         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 578         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 579         s->codec_id != AV_CODEC_ID_MJPEG      &&
 580         (s->codec_id != AV_CODEC_ID_H263P)) {
 581         av_log(avctx, AV_LOG_ERROR,
 582                "multi threaded encoding not supported by codec\n");
 583         return -1;
 584     }
 585
 586     if (s->avctx->thread_count < 1) {
 587         av_log(avctx, AV_LOG_ERROR,
 588                "automatic thread number detection not supported by codec, "
 589                "patch welcome\n");
 590         return -1;
 591     }
 592
 593     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 594         s->rtp_mode = 1;
 595
 596     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 597         s->h263_slice_structured = 1;
 598
 599     if (!avctx->time_base.den || !avctx->time_base.num) {
 600         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 601         return -1;
 602     }
 603
 604     i = (INT_MAX / 2 + 128) >> 8;
 605     if (avctx->mb_threshold >= i) {
 606         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 607                i - 1);
 608         return -1;
 609     }
 610
 611     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 612         av_log(avctx, AV_LOG_INFO,
 613                "notice: b_frame_strategy only affects the first pass\n");
 614         avctx->b_frame_strategy = 0;
 615     }
 616
 617     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 618     if (i > 1) {
 619         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 620         avctx->time_base.den /= i;
 621         avctx->time_base.num /= i;
 622         //return -1;
 623     }
 624
 625     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 626         // (a + x * 3 / 8) / x
 627         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 628         s->inter_quant_bias = 0;
 629     } else {
 630         s->intra_quant_bias = 0;
 631         // (a - x / 4) / x
 632         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 633     }
 634
 635     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 636         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 637         return AVERROR(EINVAL);
 638     }
 639
 640     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 641         s->intra_quant_bias = avctx->intra_quant_bias;
 642     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 643         s->inter_quant_bias = avctx->inter_quant_bias;
 644
 645     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 646
 647     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 648         s->avctx->time_base.den > (1 << 16) - 1) {
 649         av_log(avctx, AV_LOG_ERROR,
 650                "timebase %d/%d not supported by MPEG 4 standard, "
 651                "the maximum admitted value for the timebase denominator "
 652                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 653                (1 << 16) - 1);
 654         return -1;
 655     }
 656     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 657
 658     switch (avctx->codec->id) {
 659     case AV_CODEC_ID_MPEG1VIDEO:
 660         s->out_format = FMT_MPEG1;
 661         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 662         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 663         break;
 664     case AV_CODEC_ID_MPEG2VIDEO:
 665         s->out_format = FMT_MPEG1;
 666         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 667         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 668         s->rtp_mode   = 1;
 669         break;
 670     case AV_CODEC_ID_MJPEG:
 671     case AV_CODEC_ID_AMV:
 672         s->out_format = FMT_MJPEG;
 673         s->intra_only = 1; /* force intra only for jpeg */
 674         if (!CONFIG_MJPEG_ENCODER ||
 675             ff_mjpeg_encode_init(s) < 0)
 676             return -1;
 677         avctx->delay = 0;
 678         s->low_delay = 1;
 679         break;
 680     case AV_CODEC_ID_H261:
 681         if (!CONFIG_H261_ENCODER)
 682             return -1;
 683         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 684             av_log(avctx, AV_LOG_ERROR,
 685                    "The specified picture size of %dx%d is not valid for the "
 686                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 687                     s->width, s->height);
 688             return -1;
 689         }
 690         s->out_format = FMT_H261;
 691         avctx->delay  = 0;
 692         s->low_delay  = 1;
 693         break;
 694     case AV_CODEC_ID_H263:
 695         if (!CONFIG_H263_ENCODER)
 696             return -1;
 697         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 698                              s->width, s->height) == 8) {
 699             av_log(avctx, AV_LOG_ERROR,
 700                    "The specified picture size of %dx%d is not valid for "
 701                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 702                    "352x288, 704x576, and 1408x1152. "
 703                    "Try H.263+.\n", s->width, s->height);
 704             return -1;
 705         }
 706         s->out_format = FMT_H263;
 707         avctx->delay  = 0;
 708         s->low_delay  = 1;
 709         break;
 710     case AV_CODEC_ID_H263P:
 711         s->out_format = FMT_H263;
 712         s->h263_plus  = 1;
 713         /* Fx */
 714         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 715         s->modified_quant  = s->h263_aic;
 716         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 717         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 718
 719         /* /Fx */
 720         /* These are just to be sure */
 721         avctx->delay = 0;
 722         s->low_delay = 1;
 723         break;
 724     case AV_CODEC_ID_FLV1:
 725         s->out_format      = FMT_H263;
 726         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 727         s->unrestricted_mv = 1;
 728         s->rtp_mode  = 0; /* don't allow GOB */
 729         avctx->delay = 0;
 730         s->low_delay = 1;
 731         break;
 732     case AV_CODEC_ID_RV10:
 733         s->out_format = FMT_H263;
 734         avctx->delay  = 0;
 735         s->low_delay  = 1;
 736         break;
 737     case AV_CODEC_ID_RV20:
 738         s->out_format      = FMT_H263;
 739         avctx->delay       = 0;
 740         s->low_delay       = 1;
 741         s->modified_quant  = 1;
 742         s->h263_aic        = 1;
 743         s->h263_plus       = 1;
 744         s->loop_filter     = 1;
 745         s->unrestricted_mv = 0;
 746         break;
 747     case AV_CODEC_ID_MPEG4:
 748         s->out_format      = FMT_H263;
 749         s->h263_pred       = 1;
 750         s->unrestricted_mv = 1;
 751         s->low_delay       = s->max_b_frames ? 0 : 1;
 752         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 753         break;
 754     case AV_CODEC_ID_MSMPEG4V2:
 755         s->out_format      = FMT_H263;
 756         s->h263_pred       = 1;
 757         s->unrestricted_mv = 1;
 758         s->msmpeg4_version = 2;
 759         avctx->delay       = 0;
 760         s->low_delay       = 1;
 761         break;
 762     case AV_CODEC_ID_MSMPEG4V3:
 763         s->out_format        = FMT_H263;
 764         s->h263_pred         = 1;
 765         s->unrestricted_mv   = 1;
 766         s->msmpeg4_version   = 3;
 767         s->flipflop_rounding = 1;
 768         avctx->delay         = 0;
 769         s->low_delay         = 1;
 770         break;
 771     case AV_CODEC_ID_WMV1:
 772         s->out_format        = FMT_H263;
 773         s->h263_pred         = 1;
 774         s->unrestricted_mv   = 1;
 775         s->msmpeg4_version   = 4;
 776         s->flipflop_rounding = 1;
 777         avctx->delay         = 0;
 778         s->low_delay         = 1;
 779         break;
 780     case AV_CODEC_ID_WMV2:
 781         s->out_format        = FMT_H263;
 782         s->h263_pred         = 1;
 783         s->unrestricted_mv   = 1;
 784         s->msmpeg4_version   = 5;
 785         s->flipflop_rounding = 1;
 786         avctx->delay         = 0;
 787         s->low_delay         = 1;
 788         break;
 789     default:
 790         return -1;
 791     }
 792
 793     avctx->has_b_frames = !s->low_delay;
 794
 795     s->encoding = 1;
 796
 797     s->progressive_frame    =
 798     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 799                                                 CODEC_FLAG_INTERLACED_ME) ||
 800                                 s->alternate_scan);
 801
 802     /* init */
 803     if (ff_MPV_common_init(s) < 0)
 804         return -1;
 805
 806     s->avctx->coded_frame = s->current_picture.f;
 807
 808     if (s->msmpeg4_version) {
 809         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 810                           2 * 2 * (MAX_LEVEL + 1) *
 811                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 812     }
 813     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 814
 815     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 816     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 817     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 818     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 819     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 820     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 821     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 822                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 823     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 824                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 825
 826     if (s->avctx->noise_reduction) {
 827         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 828                           2 * 64 * sizeof(uint16_t), fail);
 829     }
 830
 831     ff_dct_encode_init(s);
 832
 833     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 834         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 835
 836     s->quant_precision = 5;
 837
 838     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 839     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 840
 841     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 842         ff_h261_encode_init(s);
 843     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 844         ff_h263_encode_init(s);
 845     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 846         ff_msmpeg4_encode_init(s);
 847     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 848         && s->out_format == FMT_MPEG1)
 849         ff_mpeg1_encode_init(s);
 850
 851     /* init q matrix */
 852     for (i = 0; i < 64; i++) {
 853         int j = s->dsp.idct_permutation[i];
 854         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 855             s->mpeg_quant) {
 856             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 857             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 858         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 859             s->intra_matrix[j] =
 860             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 861         } else {
 862             /* mpeg1/2 */
 863             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 864             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 865         }
 866         if (s->avctx->intra_matrix)
 867             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 868         if (s->avctx->inter_matrix)
 869             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 870     }
 871
 872     /* precompute matrix */
 873     /* for mjpeg, we do include qscale in the matrix */
 874     if (s->out_format != FMT_MJPEG) {
 875         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 876                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 877                           31, 1);
 878         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 879                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 880                           31, 0);
 881     }
 882
 883     if (ff_rate_control_init(s) < 0)
 884         return -1;
 885
 886 #if FF_API_ERROR_RATE
 887     FF_DISABLE_DEPRECATION_WARNINGS
 888     if (avctx->error_rate)
 889         s->error_rate = avctx->error_rate;
 890     FF_ENABLE_DEPRECATION_WARNINGS;
 891 #endif
 892
 893 #if FF_API_NORMALIZE_AQP
 894     FF_DISABLE_DEPRECATION_WARNINGS
 895     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
 896         s->mpv_flags |= FF_MPV_FLAG_NAQ;
 897     FF_ENABLE_DEPRECATION_WARNINGS;
 898 #endif
 899
 900 #if FF_API_MV0
 901     FF_DISABLE_DEPRECATION_WARNINGS
 902     if (avctx->flags & CODEC_FLAG_MV0)
 903         s->mpv_flags |= FF_MPV_FLAG_MV0;
 904     FF_ENABLE_DEPRECATION_WARNINGS
 905 #endif
 906
 907     if (avctx->b_frame_strategy == 2) {
 908         for (i = 0; i < s->max_b_frames + 2; i++) {
 909             s->tmp_frames[i] = av_frame_alloc();
 910             if (!s->tmp_frames[i])
 911                 return AVERROR(ENOMEM);
 912
 913             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 914             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 915             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 916
 917             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 918             if (ret < 0)
 919                 return ret;
 920         }
 921     }
 922
 923     return 0;
 924 fail:
 925     ff_MPV_encode_end(avctx);
 926     return AVERROR_UNKNOWN;
 927 }
 928
 929 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 930 {
 931     MpegEncContext *s = avctx->priv_data;
 932     int i;
 933
 934     ff_rate_control_uninit(s);
 935
 936     ff_MPV_common_end(s);
 937     if (CONFIG_MJPEG_ENCODER &&
 938         s->out_format == FMT_MJPEG)
 939         ff_mjpeg_encode_close(s);
 940
 941     av_freep(&avctx->extradata);
 942
 943     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 944         av_frame_free(&s->tmp_frames[i]);
 945
 946     ff_free_picture_tables(&s->new_picture);
 947     ff_mpeg_unref_picture(s, &s->new_picture);
 948
 949     av_freep(&s->avctx->stats_out);
 950     av_freep(&s->ac_stats);
 951
 952     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 953     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
 954     s->q_chroma_intra_matrix=   NULL;
 955     s->q_chroma_intra_matrix16= NULL;
 956     av_freep(&s->q_intra_matrix);
 957     av_freep(&s->q_inter_matrix);
 958     av_freep(&s->q_intra_matrix16);
 959     av_freep(&s->q_inter_matrix16);
 960     av_freep(&s->input_picture);
 961     av_freep(&s->reordered_input_picture);
 962     av_freep(&s->dct_offset);
 963
 964     return 0;
 965 }
 966
 967 static int get_sae(uint8_t *src, int ref, int stride)
 968 {
 969     int x,y;
 970     int acc = 0;
 971
 972     for (y = 0; y < 16; y++) {
 973         for (x = 0; x < 16; x++) {
 974             acc += FFABS(src[x + y * stride] - ref);
 975         }
 976     }
 977
 978     return acc;
 979 }
 980
 981 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 982                            uint8_t *ref, int stride)
 983 {
 984     int x, y, w, h;
 985     int acc = 0;
 986
 987     w = s->width  & ~15;
 988     h = s->height & ~15;
 989
 990     for (y = 0; y < h; y += 16) {
 991         for (x = 0; x < w; x += 16) {
 992             int offset = x + y * stride;
 993             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 994                                      16);
 995             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 996             int sae  = get_sae(src + offset, mean, stride);
 997
 998             acc += sae + 500 < sad;
 999         }
1000     }
1001     return acc;
1002 }
1003
1004
1005 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1006 {
1007     Picture *pic = NULL;
1008     int64_t pts;
1009     int i, display_picture_number = 0, ret;
1010     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1011                                                  (s->low_delay ? 0 : 1);
1012     int direct = 1;
1013
1014     if (pic_arg) {
1015         pts = pic_arg->pts;
1016         display_picture_number = s->input_picture_number++;
1017
1018         if (pts != AV_NOPTS_VALUE) {
1019             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1020                 int64_t last = s->user_specified_pts;
1021
1022                 if (pts <= last) {
1023                     av_log(s->avctx, AV_LOG_ERROR,
1024                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1025                            pts, last);
1026                     return AVERROR(EINVAL);
1027                 }
1028
1029                 if (!s->low_delay && display_picture_number == 1)
1030                     s->dts_delta = pts - last;
1031             }
1032             s->user_specified_pts = pts;
1033         } else {
1034             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1035                 s->user_specified_pts =
1036                 pts = s->user_specified_pts + 1;
1037                 av_log(s->avctx, AV_LOG_INFO,
1038                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1039                        pts);
1040             } else {
1041                 pts = display_picture_number;
1042             }
1043         }
1044     }
1045
1046     if (pic_arg) {
1047         if (!pic_arg->buf[0])
1048             direct = 0;
1049         if (pic_arg->linesize[0] != s->linesize)
1050             direct = 0;
1051         if (pic_arg->linesize[1] != s->uvlinesize)
1052             direct = 0;
1053         if (pic_arg->linesize[2] != s->uvlinesize)
1054             direct = 0;
1055         if ((s->width & 15) || (s->height & 15))
1056             direct = 0;
1057         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1058             direct = 0;
1059         if (s->linesize & (STRIDE_ALIGN-1))
1060             direct = 0;
1061
1062         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1063                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1064
1065         if (direct) {
1066             i = ff_find_unused_picture(s, 1);
1067             if (i < 0)
1068                 return i;
1069
1070             pic = &s->picture[i];
1071             pic->reference = 3;
1072
1073             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1074                 return ret;
1075             if (ff_alloc_picture(s, pic, 1) < 0) {
1076                 return -1;
1077             }
1078         } else {
1079             i = ff_find_unused_picture(s, 0);
1080             if (i < 0)
1081                 return i;
1082
1083             pic = &s->picture[i];
1084             pic->reference = 3;
1085
1086             if (ff_alloc_picture(s, pic, 0) < 0) {
1087                 return -1;
1088             }
1089
1090             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1091                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1092                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1093                 // empty
1094             } else {
1095                 int h_chroma_shift, v_chroma_shift;
1096                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1097                                                  &h_chroma_shift,
1098                                                  &v_chroma_shift);
1099
1100                 for (i = 0; i < 3; i++) {
1101                     int src_stride = pic_arg->linesize[i];
1102                     int dst_stride = i ? s->uvlinesize : s->linesize;
1103                     int h_shift = i ? h_chroma_shift : 0;
1104                     int v_shift = i ? v_chroma_shift : 0;
1105                     int w = s->width  >> h_shift;
1106                     int h = s->height >> v_shift;
1107                     uint8_t *src = pic_arg->data[i];
1108                     uint8_t *dst = pic->f->data[i];
1109
1110                     if (s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1111                         h = ((s->height + 15)/16*16) >> v_shift;
1112                     }
1113
1114                     if (!s->avctx->rc_buffer_size)
1115                         dst += INPLACE_OFFSET;
1116
1117                     if (src_stride == dst_stride)
1118                         memcpy(dst, src, src_stride * h);
1119                     else {
1120                         int h2 = h;
1121                         uint8_t *dst2 = dst;
1122                         while (h2--) {
1123                             memcpy(dst2, src, w);
1124                             dst2 += dst_stride;
1125                             src += src_stride;
1126                         }
1127                     }
1128                     if ((s->width & 15) || (s->height & 15)) {
1129                         s->dsp.draw_edges(dst, dst_stride,
1130                                           w, h,
1131                                           16>>h_shift,
1132                                           16>>v_shift,
1133                                           EDGE_BOTTOM);
1134                     }
1135                 }
1136             }
1137         }
1138         ret = av_frame_copy_props(pic->f, pic_arg);
1139         if (ret < 0)
1140             return ret;
1141
1142         pic->f->display_picture_number = display_picture_number;
1143         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1144     }
1145
1146     /* shift buffer entries */
1147     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1148         s->input_picture[i - 1] = s->input_picture[i];
1149
1150     s->input_picture[encoding_delay] = (Picture*) pic;
1151
1152     return 0;
1153 }
1154
1155 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1156 {
1157     int x, y, plane;
1158     int score = 0;
1159     int64_t score64 = 0;
1160
1161     for (plane = 0; plane < 3; plane++) {
1162         const int stride = p->f->linesize[plane];
1163         const int bw = plane ? 1 : 2;
1164         for (y = 0; y < s->mb_height * bw; y++) {
1165             for (x = 0; x < s->mb_width * bw; x++) {
1166                 int off = p->shared ? 0 : 16;
1167                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1168                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1169                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1170
1171                 switch (FFABS(s->avctx->frame_skip_exp)) {
1172                 case 0: score    =  FFMAX(score, v);          break;
1173                 case 1: score   += FFABS(v);                  break;
1174                 case 2: score64 += v * (int64_t)v;                       break;
1175                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1176                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1177                 }
1178             }
1179         }
1180     }
1181     emms_c();
1182
1183     if (score)
1184         score64 = score;
1185     if (s->avctx->frame_skip_exp < 0)
1186         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1187                       -1.0/s->avctx->frame_skip_exp);
1188
1189     if (score64 < s->avctx->frame_skip_threshold)
1190         return 1;
1191     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1192         return 1;
1193     return 0;
1194 }
1195
1196 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1197 {
1198     AVPacket pkt = { 0 };
1199     int ret, got_output;
1200
1201     av_init_packet(&pkt);
1202     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1203     if (ret < 0)
1204         return ret;
1205
1206     ret = pkt.size;
1207     av_free_packet(&pkt);
1208     return ret;
1209 }
1210
1211 static int estimate_best_b_count(MpegEncContext *s)
1212 {
1213     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1214     AVCodecContext *c = avcodec_alloc_context3(NULL);
1215     const int scale = s->avctx->brd_scale;
1216     int i, j, out_size, p_lambda, b_lambda, lambda2;
1217     int64_t best_rd  = INT64_MAX;
1218     int best_b_count = -1;
1219
1220     av_assert0(scale >= 0 && scale <= 3);
1221
1222     //emms_c();
1223     //s->next_picture_ptr->quality;
1224     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1225     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1226     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1227     if (!b_lambda) // FIXME we should do this somewhere else
1228         b_lambda = p_lambda;
1229     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1230                FF_LAMBDA_SHIFT;
1231
1232     c->width        = s->width  >> scale;
1233     c->height       = s->height >> scale;
1234     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1235     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1236     c->mb_decision  = s->avctx->mb_decision;
1237     c->me_cmp       = s->avctx->me_cmp;
1238     c->mb_cmp       = s->avctx->mb_cmp;
1239     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1240     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1241     c->time_base    = s->avctx->time_base;
1242     c->max_b_frames = s->max_b_frames;
1243
1244     if (avcodec_open2(c, codec, NULL) < 0)
1245         return -1;
1246
1247     for (i = 0; i < s->max_b_frames + 2; i++) {
1248         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1249                                                 s->next_picture_ptr;
1250
1251         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1252             pre_input = *pre_input_ptr;
1253
1254             if (!pre_input.shared && i) {
1255                 pre_input.f->data[0] += INPLACE_OFFSET;
1256                 pre_input.f->data[1] += INPLACE_OFFSET;
1257                 pre_input.f->data[2] += INPLACE_OFFSET;
1258             }
1259
1260             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1261                                  pre_input.f->data[0], pre_input.f->linesize[0],
1262                                  c->width,      c->height);
1263             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1264                                  pre_input.f->data[1], pre_input.f->linesize[1],
1265                                  c->width >> 1, c->height >> 1);
1266             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1267                                  pre_input.f->data[2], pre_input.f->linesize[2],
1268                                  c->width >> 1, c->height >> 1);
1269         }
1270     }
1271
1272     for (j = 0; j < s->max_b_frames + 1; j++) {
1273         int64_t rd = 0;
1274
1275         if (!s->input_picture[j])
1276             break;
1277
1278         c->error[0] = c->error[1] = c->error[2] = 0;
1279
1280         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1281         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1282
1283         out_size = encode_frame(c, s->tmp_frames[0]);
1284
1285         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1286
1287         for (i = 0; i < s->max_b_frames + 1; i++) {
1288             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1289
1290             s->tmp_frames[i + 1]->pict_type = is_p ?
1291                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1292             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1293
1294             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1295
1296             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1297         }
1298
1299         /* get the delayed frames */
1300         while (out_size) {
1301             out_size = encode_frame(c, NULL);
1302             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1303         }
1304
1305         rd += c->error[0] + c->error[1] + c->error[2];
1306
1307         if (rd < best_rd) {
1308             best_rd = rd;
1309             best_b_count = j;
1310         }
1311     }
1312
1313     avcodec_close(c);
1314     av_freep(&c);
1315
1316     return best_b_count;
1317 }
1318
1319 static int select_input_picture(MpegEncContext *s)
1320 {
1321     int i, ret;
1322
1323     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1324         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1325     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1326
1327     /* set next picture type & ordering */
1328     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1329         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1330             if (s->picture_in_gop_number < s->gop_size &&
1331                 s->next_picture_ptr &&
1332                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1333                 // FIXME check that te gop check above is +-1 correct
1334                 av_frame_unref(s->input_picture[0]->f);
1335
1336                 ff_vbv_update(s, 0);
1337
1338                 goto no_output_pic;
1339             }
1340         }
1341
1342         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1343             s->next_picture_ptr == NULL || s->intra_only) {
1344             s->reordered_input_picture[0] = s->input_picture[0];
1345             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1346             s->reordered_input_picture[0]->f->coded_picture_number =
1347                 s->coded_picture_number++;
1348         } else {
1349             int b_frames;
1350
1351             if (s->flags & CODEC_FLAG_PASS2) {
1352                 for (i = 0; i < s->max_b_frames + 1; i++) {
1353                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1354
1355                     if (pict_num >= s->rc_context.num_entries)
1356                         break;
1357                     if (!s->input_picture[i]) {
1358                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1359                         break;
1360                     }
1361
1362                     s->input_picture[i]->f->pict_type =
1363                         s->rc_context.entry[pict_num].new_pict_type;
1364                 }
1365             }
1366
1367             if (s->avctx->b_frame_strategy == 0) {
1368                 b_frames = s->max_b_frames;
1369                 while (b_frames && !s->input_picture[b_frames])
1370                     b_frames--;
1371             } else if (s->avctx->b_frame_strategy == 1) {
1372                 for (i = 1; i < s->max_b_frames + 1; i++) {
1373                     if (s->input_picture[i] &&
1374                         s->input_picture[i]->b_frame_score == 0) {
1375                         s->input_picture[i]->b_frame_score =
1376                             get_intra_count(s,
1377                                             s->input_picture[i    ]->f->data[0],
1378                                             s->input_picture[i - 1]->f->data[0],
1379                                             s->linesize) + 1;
1380                     }
1381                 }
1382                 for (i = 0; i < s->max_b_frames + 1; i++) {
1383                     if (s->input_picture[i] == NULL ||
1384                         s->input_picture[i]->b_frame_score - 1 >
1385                             s->mb_num / s->avctx->b_sensitivity)
1386                         break;
1387                 }
1388
1389                 b_frames = FFMAX(0, i - 1);
1390
1391                 /* reset scores */
1392                 for (i = 0; i < b_frames + 1; i++) {
1393                     s->input_picture[i]->b_frame_score = 0;
1394                 }
1395             } else if (s->avctx->b_frame_strategy == 2) {
1396                 b_frames = estimate_best_b_count(s);
1397             } else {
1398                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1399                 b_frames = 0;
1400             }
1401
1402             emms_c();
1403
1404             for (i = b_frames - 1; i >= 0; i--) {
1405                 int type = s->input_picture[i]->f->pict_type;
1406                 if (type && type != AV_PICTURE_TYPE_B)
1407                     b_frames = i;
1408             }
1409             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1410                 b_frames == s->max_b_frames) {
1411                 av_log(s->avctx, AV_LOG_ERROR,
1412                        "warning, too many b frames in a row\n");
1413             }
1414
1415             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1416                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1417                     s->gop_size > s->picture_in_gop_number) {
1418                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1419                 } else {
1420                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1421                         b_frames = 0;
1422                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1423                 }
1424             }
1425
1426             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1427                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1428                 b_frames--;
1429
1430             s->reordered_input_picture[0] = s->input_picture[b_frames];
1431             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1432                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1433             s->reordered_input_picture[0]->f->coded_picture_number =
1434                 s->coded_picture_number++;
1435             for (i = 0; i < b_frames; i++) {
1436                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1437                 s->reordered_input_picture[i + 1]->f->pict_type =
1438                     AV_PICTURE_TYPE_B;
1439                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1440                     s->coded_picture_number++;
1441             }
1442         }
1443     }
1444 no_output_pic:
1445     if (s->reordered_input_picture[0]) {
1446         s->reordered_input_picture[0]->reference =
1447            s->reordered_input_picture[0]->f->pict_type !=
1448                AV_PICTURE_TYPE_B ? 3 : 0;
1449
1450         ff_mpeg_unref_picture(s, &s->new_picture);
1451         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1452             return ret;
1453
1454         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1455             // input is a shared pix, so we can't modifiy it -> alloc a new
1456             // one & ensure that the shared one is reuseable
1457
1458             Picture *pic;
1459             int i = ff_find_unused_picture(s, 0);
1460             if (i < 0)
1461                 return i;
1462             pic = &s->picture[i];
1463
1464             pic->reference = s->reordered_input_picture[0]->reference;
1465             if (ff_alloc_picture(s, pic, 0) < 0) {
1466                 return -1;
1467             }
1468
1469             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1470             if (ret < 0)
1471                 return ret;
1472
1473             /* mark us unused / free shared pic */
1474             av_frame_unref(s->reordered_input_picture[0]->f);
1475             s->reordered_input_picture[0]->shared = 0;
1476
1477             s->current_picture_ptr = pic;
1478         } else {
1479             // input is not a shared pix -> reuse buffer for current_pix
1480             s->current_picture_ptr = s->reordered_input_picture[0];
1481             for (i = 0; i < 4; i++) {
1482                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1483             }
1484         }
1485         ff_mpeg_unref_picture(s, &s->current_picture);
1486         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1487                                        s->current_picture_ptr)) < 0)
1488             return ret;
1489
1490         s->picture_number = s->new_picture.f->display_picture_number;
1491     } else {
1492         ff_mpeg_unref_picture(s, &s->new_picture);
1493     }
1494     return 0;
1495 }
1496
1497 static void frame_end(MpegEncContext *s)
1498 {
1499     if (s->unrestricted_mv &&
1500         s->current_picture.reference &&
1501         !s->intra_only) {
1502         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1503         int hshift = desc->log2_chroma_w;
1504         int vshift = desc->log2_chroma_h;
1505         s->dsp.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0],
1506                           s->h_edge_pos, s->v_edge_pos,
1507                           EDGE_WIDTH, EDGE_WIDTH,
1508                           EDGE_TOP | EDGE_BOTTOM);
1509         s->dsp.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1],
1510                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1511                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1512                           EDGE_TOP | EDGE_BOTTOM);
1513         s->dsp.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2],
1514                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1515                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1516                           EDGE_TOP | EDGE_BOTTOM);
1517     }
1518
1519     emms_c();
1520
1521     s->last_pict_type                 = s->pict_type;
1522     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1523     if (s->pict_type!= AV_PICTURE_TYPE_B)
1524         s->last_non_b_pict_type = s->pict_type;
1525
1526     s->avctx->coded_frame = s->current_picture_ptr->f;
1527
1528 }
1529
1530 static void update_noise_reduction(MpegEncContext *s)
1531 {
1532     int intra, i;
1533
1534     for (intra = 0; intra < 2; intra++) {
1535         if (s->dct_count[intra] > (1 << 16)) {
1536             for (i = 0; i < 64; i++) {
1537                 s->dct_error_sum[intra][i] >>= 1;
1538             }
1539             s->dct_count[intra] >>= 1;
1540         }
1541
1542         for (i = 0; i < 64; i++) {
1543             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1544                                        s->dct_count[intra] +
1545                                        s->dct_error_sum[intra][i] / 2) /
1546                                       (s->dct_error_sum[intra][i] + 1);
1547         }
1548     }
1549 }
1550
1551 static int frame_start(MpegEncContext *s)
1552 {
1553     int ret;
1554
1555     /* mark & release old frames */
1556     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1557         s->last_picture_ptr != s->next_picture_ptr &&
1558         s->last_picture_ptr->f->buf[0]) {
1559         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1560     }
1561
1562     s->current_picture_ptr->f->pict_type = s->pict_type;
1563     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1564
1565     ff_mpeg_unref_picture(s, &s->current_picture);
1566     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1567                                    s->current_picture_ptr)) < 0)
1568         return ret;
1569
1570     if (s->pict_type != AV_PICTURE_TYPE_B) {
1571         s->last_picture_ptr = s->next_picture_ptr;
1572         if (!s->droppable)
1573             s->next_picture_ptr = s->current_picture_ptr;
1574     }
1575
1576     if (s->last_picture_ptr) {
1577         ff_mpeg_unref_picture(s, &s->last_picture);
1578         if (s->last_picture_ptr->f->buf[0] &&
1579             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1580                                        s->last_picture_ptr)) < 0)
1581             return ret;
1582     }
1583     if (s->next_picture_ptr) {
1584         ff_mpeg_unref_picture(s, &s->next_picture);
1585         if (s->next_picture_ptr->f->buf[0] &&
1586             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1587                                        s->next_picture_ptr)) < 0)
1588             return ret;
1589     }
1590
1591     if (s->picture_structure!= PICT_FRAME) {
1592         int i;
1593         for (i = 0; i < 4; i++) {
1594             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1595                 s->current_picture.f->data[i] +=
1596                     s->current_picture.f->linesize[i];
1597             }
1598             s->current_picture.f->linesize[i] *= 2;
1599             s->last_picture.f->linesize[i]    *= 2;
1600             s->next_picture.f->linesize[i]    *= 2;
1601         }
1602     }
1603
1604     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1605         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1606         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1607     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1608         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1609         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1610     } else {
1611         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1612         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1613     }
1614
1615     if (s->dct_error_sum) {
1616         av_assert2(s->avctx->noise_reduction && s->encoding);
1617         update_noise_reduction(s);
1618     }
1619
1620     return 0;
1621 }
1622
1623 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1624                           const AVFrame *pic_arg, int *got_packet)
1625 {
1626     MpegEncContext *s = avctx->priv_data;
1627     int i, stuffing_count, ret;
1628     int context_count = s->slice_context_count;
1629
1630     s->picture_in_gop_number++;
1631
1632     if (load_input_picture(s, pic_arg) < 0)
1633         return -1;
1634
1635     if (select_input_picture(s) < 0) {
1636         return -1;
1637     }
1638
1639     /* output? */
1640     if (s->new_picture.f->data[0]) {
1641         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1642             return ret;
1643         if (s->mb_info) {
1644             s->mb_info_ptr = av_packet_new_side_data(pkt,
1645                                  AV_PKT_DATA_H263_MB_INFO,
1646                                  s->mb_width*s->mb_height*12);
1647             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1648         }
1649
1650         for (i = 0; i < context_count; i++) {
1651             int start_y = s->thread_context[i]->start_mb_y;
1652             int   end_y = s->thread_context[i]->  end_mb_y;
1653             int h       = s->mb_height;
1654             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1655             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1656
1657             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1658         }
1659
1660         s->pict_type = s->new_picture.f->pict_type;
1661         //emms_c();
1662         ret = frame_start(s);
1663         if (ret < 0)
1664             return ret;
1665 vbv_retry:
1666         if (encode_picture(s, s->picture_number) < 0)
1667             return -1;
1668
1669         avctx->header_bits = s->header_bits;
1670         avctx->mv_bits     = s->mv_bits;
1671         avctx->misc_bits   = s->misc_bits;
1672         avctx->i_tex_bits  = s->i_tex_bits;
1673         avctx->p_tex_bits  = s->p_tex_bits;
1674         avctx->i_count     = s->i_count;
1675         // FIXME f/b_count in avctx
1676         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1677         avctx->skip_count  = s->skip_count;
1678
1679         frame_end(s);
1680
1681         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1682             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1683
1684         if (avctx->rc_buffer_size) {
1685             RateControlContext *rcc = &s->rc_context;
1686             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1687
1688             if (put_bits_count(&s->pb) > max_size &&
1689                 s->lambda < s->avctx->lmax) {
1690                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1691                                        (s->qscale + 1) / s->qscale);
1692                 if (s->adaptive_quant) {
1693                     int i;
1694                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1695                         s->lambda_table[i] =
1696                             FFMAX(s->lambda_table[i] + 1,
1697                                   s->lambda_table[i] * (s->qscale + 1) /
1698                                   s->qscale);
1699                 }
1700                 s->mb_skipped = 0;        // done in frame_start()
1701                 // done in encode_picture() so we must undo it
1702                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1703                     if (s->flipflop_rounding          ||
1704                         s->codec_id == AV_CODEC_ID_H263P ||
1705                         s->codec_id == AV_CODEC_ID_MPEG4)
1706                         s->no_rounding ^= 1;
1707                 }
1708                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1709                     s->time_base       = s->last_time_base;
1710                     s->last_non_b_time = s->time - s->pp_time;
1711                 }
1712                 for (i = 0; i < context_count; i++) {
1713                     PutBitContext *pb = &s->thread_context[i]->pb;
1714                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1715                 }
1716                 goto vbv_retry;
1717             }
1718
1719             av_assert0(s->avctx->rc_max_rate);
1720         }
1721
1722         if (s->flags & CODEC_FLAG_PASS1)
1723             ff_write_pass1_stats(s);
1724
1725         for (i = 0; i < 4; i++) {
1726             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1727             avctx->error[i] += s->current_picture_ptr->f->error[i];
1728         }
1729
1730         if (s->flags & CODEC_FLAG_PASS1)
1731             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1732                    avctx->i_tex_bits + avctx->p_tex_bits ==
1733                        put_bits_count(&s->pb));
1734         flush_put_bits(&s->pb);
1735         s->frame_bits  = put_bits_count(&s->pb);
1736
1737         stuffing_count = ff_vbv_update(s, s->frame_bits);
1738         s->stuffing_bits = 8*stuffing_count;
1739         if (stuffing_count) {
1740             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1741                     stuffing_count + 50) {
1742                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1743                 return -1;
1744             }
1745
1746             switch (s->codec_id) {
1747             case AV_CODEC_ID_MPEG1VIDEO:
1748             case AV_CODEC_ID_MPEG2VIDEO:
1749                 while (stuffing_count--) {
1750                     put_bits(&s->pb, 8, 0);
1751                 }
1752             break;
1753             case AV_CODEC_ID_MPEG4:
1754                 put_bits(&s->pb, 16, 0);
1755                 put_bits(&s->pb, 16, 0x1C3);
1756                 stuffing_count -= 4;
1757                 while (stuffing_count--) {
1758                     put_bits(&s->pb, 8, 0xFF);
1759                 }
1760             break;
1761             default:
1762                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1763             }
1764             flush_put_bits(&s->pb);
1765             s->frame_bits  = put_bits_count(&s->pb);
1766         }
1767
1768         /* update mpeg1/2 vbv_delay for CBR */
1769         if (s->avctx->rc_max_rate                          &&
1770             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1771             s->out_format == FMT_MPEG1                     &&
1772             90000LL * (avctx->rc_buffer_size - 1) <=
1773                 s->avctx->rc_max_rate * 0xFFFFLL) {
1774             int vbv_delay, min_delay;
1775             double inbits  = s->avctx->rc_max_rate *
1776                              av_q2d(s->avctx->time_base);
1777             int    minbits = s->frame_bits - 8 *
1778                              (s->vbv_delay_ptr - s->pb.buf - 1);
1779             double bits    = s->rc_context.buffer_index + minbits - inbits;
1780
1781             if (bits < 0)
1782                 av_log(s->avctx, AV_LOG_ERROR,
1783                        "Internal error, negative bits\n");
1784
1785             assert(s->repeat_first_field == 0);
1786
1787             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1788             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1789                         s->avctx->rc_max_rate;
1790
1791             vbv_delay = FFMAX(vbv_delay, min_delay);
1792
1793             av_assert0(vbv_delay < 0xFFFF);
1794
1795             s->vbv_delay_ptr[0] &= 0xF8;
1796             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1797             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1798             s->vbv_delay_ptr[2] &= 0x07;
1799             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1800             avctx->vbv_delay     = vbv_delay * 300;
1801         }
1802         s->total_bits     += s->frame_bits;
1803         avctx->frame_bits  = s->frame_bits;
1804
1805         pkt->pts = s->current_picture.f->pts;
1806         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1807             if (!s->current_picture.f->coded_picture_number)
1808                 pkt->dts = pkt->pts - s->dts_delta;
1809             else
1810                 pkt->dts = s->reordered_pts;
1811             s->reordered_pts = pkt->pts;
1812         } else
1813             pkt->dts = pkt->pts;
1814         if (s->current_picture.f->key_frame)
1815             pkt->flags |= AV_PKT_FLAG_KEY;
1816         if (s->mb_info)
1817             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1818     } else {
1819         s->frame_bits = 0;
1820     }
1821
1822     /* release non-reference frames */
1823     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1824         if (!s->picture[i].reference)
1825             ff_mpeg_unref_picture(s, &s->picture[i]);
1826     }
1827
1828     av_assert1((s->frame_bits & 7) == 0);
1829
1830     pkt->size = s->frame_bits / 8;
1831     *got_packet = !!pkt->size;
1832     return 0;
1833 }
1834
1835 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1836                                                 int n, int threshold)
1837 {
1838     static const char tab[64] = {
1839         3, 2, 2, 1, 1, 1, 1, 1,
1840         1, 1, 1, 1, 1, 1, 1, 1,
1841         1, 1, 1, 1, 1, 1, 1, 1,
1842         0, 0, 0, 0, 0, 0, 0, 0,
1843         0, 0, 0, 0, 0, 0, 0, 0,
1844         0, 0, 0, 0, 0, 0, 0, 0,
1845         0, 0, 0, 0, 0, 0, 0, 0,
1846         0, 0, 0, 0, 0, 0, 0, 0
1847     };
1848     int score = 0;
1849     int run = 0;
1850     int i;
1851     int16_t *block = s->block[n];
1852     const int last_index = s->block_last_index[n];
1853     int skip_dc;
1854
1855     if (threshold < 0) {
1856         skip_dc = 0;
1857         threshold = -threshold;
1858     } else
1859         skip_dc = 1;
1860
1861     /* Are all we could set to zero already zero? */
1862     if (last_index <= skip_dc - 1)
1863         return;
1864
1865     for (i = 0; i <= last_index; i++) {
1866         const int j = s->intra_scantable.permutated[i];
1867         const int level = FFABS(block[j]);
1868         if (level == 1) {
1869             if (skip_dc && i == 0)
1870                 continue;
1871             score += tab[run];
1872             run = 0;
1873         } else if (level > 1) {
1874             return;
1875         } else {
1876             run++;
1877         }
1878     }
1879     if (score >= threshold)
1880         return;
1881     for (i = skip_dc; i <= last_index; i++) {
1882         const int j = s->intra_scantable.permutated[i];
1883         block[j] = 0;
1884     }
1885     if (block[0])
1886         s->block_last_index[n] = 0;
1887     else
1888         s->block_last_index[n] = -1;
1889 }
1890
1891 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1892                                int last_index)
1893 {
1894     int i;
1895     const int maxlevel = s->max_qcoeff;
1896     const int minlevel = s->min_qcoeff;
1897     int overflow = 0;
1898
1899     if (s->mb_intra) {
1900         i = 1; // skip clipping of intra dc
1901     } else
1902         i = 0;
1903
1904     for (; i <= last_index; i++) {
1905         const int j = s->intra_scantable.permutated[i];
1906         int level = block[j];
1907
1908         if (level > maxlevel) {
1909             level = maxlevel;
1910             overflow++;
1911         } else if (level < minlevel) {
1912             level = minlevel;
1913             overflow++;
1914         }
1915
1916         block[j] = level;
1917     }
1918
1919     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1920         av_log(s->avctx, AV_LOG_INFO,
1921                "warning, clipping %d dct coefficients to %d..%d\n",
1922                overflow, minlevel, maxlevel);
1923 }
1924
1925 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1926 {
1927     int x, y;
1928     // FIXME optimize
1929     for (y = 0; y < 8; y++) {
1930         for (x = 0; x < 8; x++) {
1931             int x2, y2;
1932             int sum = 0;
1933             int sqr = 0;
1934             int count = 0;
1935
1936             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1937                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1938                     int v = ptr[x2 + y2 * stride];
1939                     sum += v;
1940                     sqr += v * v;
1941                     count++;
1942                 }
1943             }
1944             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1945         }
1946     }
1947 }
1948
1949 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1950                                                 int motion_x, int motion_y,
1951                                                 int mb_block_height,
1952                                                 int mb_block_width,
1953                                                 int mb_block_count)
1954 {
1955     int16_t weight[12][64];
1956     int16_t orig[12][64];
1957     const int mb_x = s->mb_x;
1958     const int mb_y = s->mb_y;
1959     int i;
1960     int skip_dct[12];
1961     int dct_offset = s->linesize * 8; // default for progressive frames
1962     int uv_dct_offset = s->uvlinesize * 8;
1963     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1964     ptrdiff_t wrap_y, wrap_c;
1965
1966     for (i = 0; i < mb_block_count; i++)
1967         skip_dct[i] = s->skipdct;
1968
1969     if (s->adaptive_quant) {
1970         const int last_qp = s->qscale;
1971         const int mb_xy = mb_x + mb_y * s->mb_stride;
1972
1973         s->lambda = s->lambda_table[mb_xy];
1974         update_qscale(s);
1975
1976         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1977             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1978             s->dquant = s->qscale - last_qp;
1979
1980             if (s->out_format == FMT_H263) {
1981                 s->dquant = av_clip(s->dquant, -2, 2);
1982
1983                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1984                     if (!s->mb_intra) {
1985                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1986                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1987                                 s->dquant = 0;
1988                         }
1989                         if (s->mv_type == MV_TYPE_8X8)
1990                             s->dquant = 0;
1991                     }
1992                 }
1993             }
1994         }
1995         ff_set_qscale(s, last_qp + s->dquant);
1996     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1997         ff_set_qscale(s, s->qscale + s->dquant);
1998
1999     wrap_y = s->linesize;
2000     wrap_c = s->uvlinesize;
2001     ptr_y  = s->new_picture.f->data[0] +
2002              (mb_y * 16 * wrap_y)              + mb_x * 16;
2003     ptr_cb = s->new_picture.f->data[1] +
2004              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2005     ptr_cr = s->new_picture.f->data[2] +
2006              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2007
2008     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2009         uint8_t *ebuf = s->edge_emu_buffer + 32;
2010         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2011         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2012         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2013                                  wrap_y, wrap_y,
2014                                  16, 16, mb_x * 16, mb_y * 16,
2015                                  s->width, s->height);
2016         ptr_y = ebuf;
2017         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2018                                  wrap_c, wrap_c,
2019                                  mb_block_width, mb_block_height,
2020                                  mb_x * mb_block_width, mb_y * mb_block_height,
2021                                  cw, ch);
2022         ptr_cb = ebuf + 18 * wrap_y;
2023         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr,
2024                                  wrap_c, wrap_c,
2025                                  mb_block_width, mb_block_height,
2026                                  mb_x * mb_block_width, mb_y * mb_block_height,
2027                                  cw, ch);
2028         ptr_cr = ebuf + 18 * wrap_y + 16;
2029     }
2030
2031     if (s->mb_intra) {
2032         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2033             int progressive_score, interlaced_score;
2034
2035             s->interlaced_dct = 0;
2036             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2037                                                     NULL, wrap_y, 8) +
2038                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2039                                                     NULL, wrap_y, 8) - 400;
2040
2041             if (progressive_score > 0) {
2042                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2043                                                        NULL, wrap_y * 2, 8) +
2044                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2045                                                        NULL, wrap_y * 2, 8);
2046                 if (progressive_score > interlaced_score) {
2047                     s->interlaced_dct = 1;
2048
2049                     dct_offset = wrap_y;
2050                     uv_dct_offset = wrap_c;
2051                     wrap_y <<= 1;
2052                     if (s->chroma_format == CHROMA_422 ||
2053                         s->chroma_format == CHROMA_444)
2054                         wrap_c <<= 1;
2055                 }
2056             }
2057         }
2058
2059         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2060         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2061         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2062         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2063
2064         if (s->flags & CODEC_FLAG_GRAY) {
2065             skip_dct[4] = 1;
2066             skip_dct[5] = 1;
2067         } else {
2068             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2069             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2070             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2071                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2072                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2073             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2074                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2075                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2076                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2077                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2078                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2079                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2080             }
2081         }
2082     } else {
2083         op_pixels_func (*op_pix)[4];
2084         qpel_mc_func (*op_qpix)[16];
2085         uint8_t *dest_y, *dest_cb, *dest_cr;
2086
2087         dest_y  = s->dest[0];
2088         dest_cb = s->dest[1];
2089         dest_cr = s->dest[2];
2090
2091         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2092             op_pix  = s->hdsp.put_pixels_tab;
2093             op_qpix = s->dsp.put_qpel_pixels_tab;
2094         } else {
2095             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2096             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
2097         }
2098
2099         if (s->mv_dir & MV_DIR_FORWARD) {
2100             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2101                           s->last_picture.f->data,
2102                           op_pix, op_qpix);
2103             op_pix  = s->hdsp.avg_pixels_tab;
2104             op_qpix = s->dsp.avg_qpel_pixels_tab;
2105         }
2106         if (s->mv_dir & MV_DIR_BACKWARD) {
2107             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2108                           s->next_picture.f->data,
2109                           op_pix, op_qpix);
2110         }
2111
2112         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2113             int progressive_score, interlaced_score;
2114
2115             s->interlaced_dct = 0;
2116             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2117                                                     ptr_y,              wrap_y,
2118                                                     8) +
2119                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2120                                                     ptr_y + wrap_y * 8, wrap_y,
2121                                                     8) - 400;
2122
2123             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2124                 progressive_score -= 400;
2125
2126             if (progressive_score > 0) {
2127                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2128                                                        ptr_y,
2129                                                        wrap_y * 2, 8) +
2130                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2131                                                        ptr_y + wrap_y,
2132                                                        wrap_y * 2, 8);
2133
2134                 if (progressive_score > interlaced_score) {
2135                     s->interlaced_dct = 1;
2136
2137                     dct_offset = wrap_y;
2138                     uv_dct_offset = wrap_c;
2139                     wrap_y <<= 1;
2140                     if (s->chroma_format == CHROMA_422)
2141                         wrap_c <<= 1;
2142                 }
2143             }
2144         }
2145
2146         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2147         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2148         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2149                            dest_y + dct_offset, wrap_y);
2150         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2151                            dest_y + dct_offset + 8, wrap_y);
2152
2153         if (s->flags & CODEC_FLAG_GRAY) {
2154             skip_dct[4] = 1;
2155             skip_dct[5] = 1;
2156         } else {
2157             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2158             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2159             if (!s->chroma_y_shift) { /* 422 */
2160                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2161                                    dest_cb + uv_dct_offset, wrap_c);
2162                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2163                                    dest_cr + uv_dct_offset, wrap_c);
2164             }
2165         }
2166         /* pre quantization */
2167         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2168                 2 * s->qscale * s->qscale) {
2169             // FIXME optimize
2170             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2171                               wrap_y, 8) < 20 * s->qscale)
2172                 skip_dct[0] = 1;
2173             if (s->dsp.sad[1](NULL, ptr_y + 8,
2174                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2175                 skip_dct[1] = 1;
2176             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2177                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2178                 skip_dct[2] = 1;
2179             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2180                               dest_y + dct_offset + 8,
2181                               wrap_y, 8) < 20 * s->qscale)
2182                 skip_dct[3] = 1;
2183             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2184                               wrap_c, 8) < 20 * s->qscale)
2185                 skip_dct[4] = 1;
2186             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2187                               wrap_c, 8) < 20 * s->qscale)
2188                 skip_dct[5] = 1;
2189             if (!s->chroma_y_shift) { /* 422 */
2190                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2191                                   dest_cb + uv_dct_offset,
2192                                   wrap_c, 8) < 20 * s->qscale)
2193                     skip_dct[6] = 1;
2194                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2195                                   dest_cr + uv_dct_offset,
2196                                   wrap_c, 8) < 20 * s->qscale)
2197                     skip_dct[7] = 1;
2198             }
2199         }
2200     }
2201
2202     if (s->quantizer_noise_shaping) {
2203         if (!skip_dct[0])
2204             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2205         if (!skip_dct[1])
2206             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2207         if (!skip_dct[2])
2208             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2209         if (!skip_dct[3])
2210             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2211         if (!skip_dct[4])
2212             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2213         if (!skip_dct[5])
2214             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2215         if (!s->chroma_y_shift) { /* 422 */
2216             if (!skip_dct[6])
2217                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2218                                   wrap_c);
2219             if (!skip_dct[7])
2220                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2221                                   wrap_c);
2222         }
2223         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2224     }
2225
2226     /* DCT & quantize */
2227     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2228     {
2229         for (i = 0; i < mb_block_count; i++) {
2230             if (!skip_dct[i]) {
2231                 int overflow;
2232                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2233                 // FIXME we could decide to change to quantizer instead of
2234                 // clipping
2235                 // JS: I don't think that would be a good idea it could lower
2236                 //     quality instead of improve it. Just INTRADC clipping
2237                 //     deserves changes in quantizer
2238                 if (overflow)
2239                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2240             } else
2241                 s->block_last_index[i] = -1;
2242         }
2243         if (s->quantizer_noise_shaping) {
2244             for (i = 0; i < mb_block_count; i++) {
2245                 if (!skip_dct[i]) {
2246                     s->block_last_index[i] =
2247                         dct_quantize_refine(s, s->block[i], weight[i],
2248                                             orig[i], i, s->qscale);
2249                 }
2250             }
2251         }
2252
2253         if (s->luma_elim_threshold && !s->mb_intra)
2254             for (i = 0; i < 4; i++)
2255                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2256         if (s->chroma_elim_threshold && !s->mb_intra)
2257             for (i = 4; i < mb_block_count; i++)
2258                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2259
2260         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2261             for (i = 0; i < mb_block_count; i++) {
2262                 if (s->block_last_index[i] == -1)
2263                     s->coded_score[i] = INT_MAX / 256;
2264             }
2265         }
2266     }
2267
2268     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2269         s->block_last_index[4] =
2270         s->block_last_index[5] = 0;
2271         s->block[4][0] =
2272         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2273         if (!s->chroma_y_shift) { /* 422 / 444 */
2274             for (i=6; i<12; i++) {
2275                 s->block_last_index[i] = 0;
2276                 s->block[i][0] = s->block[4][0];
2277             }
2278         }
2279     }
2280
2281     // non c quantize code returns incorrect block_last_index FIXME
2282     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2283         for (i = 0; i < mb_block_count; i++) {
2284             int j;
2285             if (s->block_last_index[i] > 0) {
2286                 for (j = 63; j > 0; j--) {
2287                     if (s->block[i][s->intra_scantable.permutated[j]])
2288                         break;
2289                 }
2290                 s->block_last_index[i] = j;
2291             }
2292         }
2293     }
2294
2295     /* huffman encode */
2296     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2297     case AV_CODEC_ID_MPEG1VIDEO:
2298     case AV_CODEC_ID_MPEG2VIDEO:
2299         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2300             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2301         break;
2302     case AV_CODEC_ID_MPEG4:
2303         if (CONFIG_MPEG4_ENCODER)
2304             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2305         break;
2306     case AV_CODEC_ID_MSMPEG4V2:
2307     case AV_CODEC_ID_MSMPEG4V3:
2308     case AV_CODEC_ID_WMV1:
2309         if (CONFIG_MSMPEG4_ENCODER)
2310             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2311         break;
2312     case AV_CODEC_ID_WMV2:
2313         if (CONFIG_WMV2_ENCODER)
2314             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2315         break;
2316     case AV_CODEC_ID_H261:
2317         if (CONFIG_H261_ENCODER)
2318             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2319         break;
2320     case AV_CODEC_ID_H263:
2321     case AV_CODEC_ID_H263P:
2322     case AV_CODEC_ID_FLV1:
2323     case AV_CODEC_ID_RV10:
2324     case AV_CODEC_ID_RV20:
2325         if (CONFIG_H263_ENCODER)
2326             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2327         break;
2328     case AV_CODEC_ID_MJPEG:
2329     case AV_CODEC_ID_AMV:
2330         if (CONFIG_MJPEG_ENCODER)
2331             ff_mjpeg_encode_mb(s, s->block);
2332         break;
2333     default:
2334         av_assert1(0);
2335     }
2336 }
2337
2338 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2339 {
2340     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2341     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2342     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2343 }
2344
2345 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2346     int i;
2347
2348     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2349
2350     /* mpeg1 */
2351     d->mb_skip_run= s->mb_skip_run;
2352     for(i=0; i<3; i++)
2353         d->last_dc[i] = s->last_dc[i];
2354
2355     /* statistics */
2356     d->mv_bits= s->mv_bits;
2357     d->i_tex_bits= s->i_tex_bits;
2358     d->p_tex_bits= s->p_tex_bits;
2359     d->i_count= s->i_count;
2360     d->f_count= s->f_count;
2361     d->b_count= s->b_count;
2362     d->skip_count= s->skip_count;
2363     d->misc_bits= s->misc_bits;
2364     d->last_bits= 0;
2365
2366     d->mb_skipped= 0;
2367     d->qscale= s->qscale;
2368     d->dquant= s->dquant;
2369
2370     d->esc3_level_length= s->esc3_level_length;
2371 }
2372
2373 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2374     int i;
2375
2376     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2377     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2378
2379     /* mpeg1 */
2380     d->mb_skip_run= s->mb_skip_run;
2381     for(i=0; i<3; i++)
2382         d->last_dc[i] = s->last_dc[i];
2383
2384     /* statistics */
2385     d->mv_bits= s->mv_bits;
2386     d->i_tex_bits= s->i_tex_bits;
2387     d->p_tex_bits= s->p_tex_bits;
2388     d->i_count= s->i_count;
2389     d->f_count= s->f_count;
2390     d->b_count= s->b_count;
2391     d->skip_count= s->skip_count;
2392     d->misc_bits= s->misc_bits;
2393
2394     d->mb_intra= s->mb_intra;
2395     d->mb_skipped= s->mb_skipped;
2396     d->mv_type= s->mv_type;
2397     d->mv_dir= s->mv_dir;
2398     d->pb= s->pb;
2399     if(s->data_partitioning){
2400         d->pb2= s->pb2;
2401         d->tex_pb= s->tex_pb;
2402     }
2403     d->block= s->block;
2404     for(i=0; i<8; i++)
2405         d->block_last_index[i]= s->block_last_index[i];
2406     d->interlaced_dct= s->interlaced_dct;
2407     d->qscale= s->qscale;
2408
2409     d->esc3_level_length= s->esc3_level_length;
2410 }
2411
2412 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2413                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2414                            int *dmin, int *next_block, int motion_x, int motion_y)
2415 {
2416     int score;
2417     uint8_t *dest_backup[3];
2418
2419     copy_context_before_encode(s, backup, type);
2420
2421     s->block= s->blocks[*next_block];
2422     s->pb= pb[*next_block];
2423     if(s->data_partitioning){
2424         s->pb2   = pb2   [*next_block];
2425         s->tex_pb= tex_pb[*next_block];
2426     }
2427
2428     if(*next_block){
2429         memcpy(dest_backup, s->dest, sizeof(s->dest));
2430         s->dest[0] = s->rd_scratchpad;
2431         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2432         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2433         av_assert0(s->linesize >= 32); //FIXME
2434     }
2435
2436     encode_mb(s, motion_x, motion_y);
2437
2438     score= put_bits_count(&s->pb);
2439     if(s->data_partitioning){
2440         score+= put_bits_count(&s->pb2);
2441         score+= put_bits_count(&s->tex_pb);
2442     }
2443
2444     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2445         ff_MPV_decode_mb(s, s->block);
2446
2447         score *= s->lambda2;
2448         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2449     }
2450
2451     if(*next_block){
2452         memcpy(s->dest, dest_backup, sizeof(s->dest));
2453     }
2454
2455     if(score<*dmin){
2456         *dmin= score;
2457         *next_block^=1;
2458
2459         copy_context_after_encode(best, s, type);
2460     }
2461 }
2462
2463 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2464     uint32_t *sq = ff_square_tab + 256;
2465     int acc=0;
2466     int x,y;
2467
2468     if(w==16 && h==16)
2469         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2470     else if(w==8 && h==8)
2471         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2472
2473     for(y=0; y<h; y++){
2474         for(x=0; x<w; x++){
2475             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2476         }
2477     }
2478
2479     av_assert2(acc>=0);
2480
2481     return acc;
2482 }
2483
2484 static int sse_mb(MpegEncContext *s){
2485     int w= 16;
2486     int h= 16;
2487
2488     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2489     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2490
2491     if(w==16 && h==16)
2492       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2493         return  s->dsp.nsse[0](s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2494                +s->dsp.nsse[1](s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2495                +s->dsp.nsse[1](s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2496       }else{
2497         return  s->dsp.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2498                +s->dsp.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2499                +s->dsp.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2500       }
2501     else
2502         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2503                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2504                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2505 }
2506
2507 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2508     MpegEncContext *s= *(void**)arg;
2509
2510
2511     s->me.pre_pass=1;
2512     s->me.dia_size= s->avctx->pre_dia_size;
2513     s->first_slice_line=1;
2514     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2515         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2516             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2517         }
2518         s->first_slice_line=0;
2519     }
2520
2521     s->me.pre_pass=0;
2522
2523     return 0;
2524 }
2525
2526 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2527     MpegEncContext *s= *(void**)arg;
2528
2529     ff_check_alignment();
2530
2531     s->me.dia_size= s->avctx->dia_size;
2532     s->first_slice_line=1;
2533     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2534         s->mb_x=0; //for block init below
2535         ff_init_block_index(s);
2536         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2537             s->block_index[0]+=2;
2538             s->block_index[1]+=2;
2539             s->block_index[2]+=2;
2540             s->block_index[3]+=2;
2541
2542             /* compute motion vector & mb_type and store in context */
2543             if(s->pict_type==AV_PICTURE_TYPE_B)
2544                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2545             else
2546                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2547         }
2548         s->first_slice_line=0;
2549     }
2550     return 0;
2551 }
2552
2553 static int mb_var_thread(AVCodecContext *c, void *arg){
2554     MpegEncContext *s= *(void**)arg;
2555     int mb_x, mb_y;
2556
2557     ff_check_alignment();
2558
2559     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2560         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2561             int xx = mb_x * 16;
2562             int yy = mb_y * 16;
2563             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2564             int varc;
2565             int sum = s->dsp.pix_sum(pix, s->linesize);
2566
2567             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2568
2569             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2570             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2571             s->me.mb_var_sum_temp    += varc;
2572         }
2573     }
2574     return 0;
2575 }
2576
2577 static void write_slice_end(MpegEncContext *s){
2578     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2579         if(s->partitioned_frame){
2580             ff_mpeg4_merge_partitions(s);
2581         }
2582
2583         ff_mpeg4_stuffing(&s->pb);
2584     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2585         ff_mjpeg_encode_stuffing(s);
2586     }
2587
2588     avpriv_align_put_bits(&s->pb);
2589     flush_put_bits(&s->pb);
2590
2591     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2592         s->misc_bits+= get_bits_diff(s);
2593 }
2594
2595 static void write_mb_info(MpegEncContext *s)
2596 {
2597     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2598     int offset = put_bits_count(&s->pb);
2599     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2600     int gobn = s->mb_y / s->gob_index;
2601     int pred_x, pred_y;
2602     if (CONFIG_H263_ENCODER)
2603         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2604     bytestream_put_le32(&ptr, offset);
2605     bytestream_put_byte(&ptr, s->qscale);
2606     bytestream_put_byte(&ptr, gobn);
2607     bytestream_put_le16(&ptr, mba);
2608     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2609     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2610     /* 4MV not implemented */
2611     bytestream_put_byte(&ptr, 0); /* hmv2 */
2612     bytestream_put_byte(&ptr, 0); /* vmv2 */
2613 }
2614
2615 static void update_mb_info(MpegEncContext *s, int startcode)
2616 {
2617     if (!s->mb_info)
2618         return;
2619     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2620         s->mb_info_size += 12;
2621         s->prev_mb_info = s->last_mb_info;
2622     }
2623     if (startcode) {
2624         s->prev_mb_info = put_bits_count(&s->pb)/8;
2625         /* This might have incremented mb_info_size above, and we return without
2626          * actually writing any info into that slot yet. But in that case,
2627          * this will be called again at the start of the after writing the
2628          * start code, actually writing the mb info. */
2629         return;
2630     }
2631
2632     s->last_mb_info = put_bits_count(&s->pb)/8;
2633     if (!s->mb_info_size)
2634         s->mb_info_size += 12;
2635     write_mb_info(s);
2636 }
2637
2638 static int encode_thread(AVCodecContext *c, void *arg){
2639     MpegEncContext *s= *(void**)arg;
2640     int mb_x, mb_y, pdif = 0;
2641     int chr_h= 16>>s->chroma_y_shift;
2642     int i, j;
2643     MpegEncContext best_s, backup_s;
2644     uint8_t bit_buf[2][MAX_MB_BYTES];
2645     uint8_t bit_buf2[2][MAX_MB_BYTES];
2646     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2647     PutBitContext pb[2], pb2[2], tex_pb[2];
2648
2649     ff_check_alignment();
2650
2651     for(i=0; i<2; i++){
2652         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2653         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2654         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2655     }
2656
2657     s->last_bits= put_bits_count(&s->pb);
2658     s->mv_bits=0;
2659     s->misc_bits=0;
2660     s->i_tex_bits=0;
2661     s->p_tex_bits=0;
2662     s->i_count=0;
2663     s->f_count=0;
2664     s->b_count=0;
2665     s->skip_count=0;
2666
2667     for(i=0; i<3; i++){
2668         /* init last dc values */
2669         /* note: quant matrix value (8) is implied here */
2670         s->last_dc[i] = 128 << s->intra_dc_precision;
2671
2672         s->current_picture.f->error[i] = 0;
2673     }
2674     if(s->codec_id==AV_CODEC_ID_AMV){
2675         s->last_dc[0] = 128*8/13;
2676         s->last_dc[1] = 128*8/14;
2677         s->last_dc[2] = 128*8/14;
2678     }
2679     s->mb_skip_run = 0;
2680     memset(s->last_mv, 0, sizeof(s->last_mv));
2681
2682     s->last_mv_dir = 0;
2683
2684     switch(s->codec_id){
2685     case AV_CODEC_ID_H263:
2686     case AV_CODEC_ID_H263P:
2687     case AV_CODEC_ID_FLV1:
2688         if (CONFIG_H263_ENCODER)
2689             s->gob_index = ff_h263_get_gob_height(s);
2690         break;
2691     case AV_CODEC_ID_MPEG4:
2692         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2693             ff_mpeg4_init_partitions(s);
2694         break;
2695     }
2696
2697     s->resync_mb_x=0;
2698     s->resync_mb_y=0;
2699     s->first_slice_line = 1;
2700     s->ptr_lastgob = s->pb.buf;
2701     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2702         s->mb_x=0;
2703         s->mb_y= mb_y;
2704
2705         ff_set_qscale(s, s->qscale);
2706         ff_init_block_index(s);
2707
2708         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2709             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2710             int mb_type= s->mb_type[xy];
2711 //            int d;
2712             int dmin= INT_MAX;
2713             int dir;
2714
2715             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2716                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2717                 return -1;
2718             }
2719             if(s->data_partitioning){
2720                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2721                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2722                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2723                     return -1;
2724                 }
2725             }
2726
2727             s->mb_x = mb_x;
2728             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2729             ff_update_block_index(s);
2730
2731             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2732                 ff_h261_reorder_mb_index(s);
2733                 xy= s->mb_y*s->mb_stride + s->mb_x;
2734                 mb_type= s->mb_type[xy];
2735             }
2736
2737             /* write gob / video packet header  */
2738             if(s->rtp_mode){
2739                 int current_packet_size, is_gob_start;
2740
2741                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2742
2743                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2744
2745                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2746
2747                 switch(s->codec_id){
2748                 case AV_CODEC_ID_H263:
2749                 case AV_CODEC_ID_H263P:
2750                     if(!s->h263_slice_structured)
2751                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2752                     break;
2753                 case AV_CODEC_ID_MPEG2VIDEO:
2754                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2755                 case AV_CODEC_ID_MPEG1VIDEO:
2756                     if(s->mb_skip_run) is_gob_start=0;
2757                     break;
2758                 case AV_CODEC_ID_MJPEG:
2759                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2760                     break;
2761                 }
2762
2763                 if(is_gob_start){
2764                     if(s->start_mb_y != mb_y || mb_x!=0){
2765                         write_slice_end(s);
2766
2767                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2768                             ff_mpeg4_init_partitions(s);
2769                         }
2770                     }
2771
2772                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2773                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2774
2775                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2776                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2777                         int d = 100 / s->error_rate;
2778                         if(r % d == 0){
2779                             current_packet_size=0;
2780                             s->pb.buf_ptr= s->ptr_lastgob;
2781                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2782                         }
2783                     }
2784
2785                     if (s->avctx->rtp_callback){
2786                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2787                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2788                     }
2789                     update_mb_info(s, 1);
2790
2791                     switch(s->codec_id){
2792                     case AV_CODEC_ID_MPEG4:
2793                         if (CONFIG_MPEG4_ENCODER) {
2794                             ff_mpeg4_encode_video_packet_header(s);
2795                             ff_mpeg4_clean_buffers(s);
2796                         }
2797                     break;
2798                     case AV_CODEC_ID_MPEG1VIDEO:
2799                     case AV_CODEC_ID_MPEG2VIDEO:
2800                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2801                             ff_mpeg1_encode_slice_header(s);
2802                             ff_mpeg1_clean_buffers(s);
2803                         }
2804                     break;
2805                     case AV_CODEC_ID_H263:
2806                     case AV_CODEC_ID_H263P:
2807                         if (CONFIG_H263_ENCODER)
2808                             ff_h263_encode_gob_header(s, mb_y);
2809                     break;
2810                     }
2811
2812                     if(s->flags&CODEC_FLAG_PASS1){
2813                         int bits= put_bits_count(&s->pb);
2814                         s->misc_bits+= bits - s->last_bits;
2815                         s->last_bits= bits;
2816                     }
2817
2818                     s->ptr_lastgob += current_packet_size;
2819                     s->first_slice_line=1;
2820                     s->resync_mb_x=mb_x;
2821                     s->resync_mb_y=mb_y;
2822                 }
2823             }
2824
2825             if(  (s->resync_mb_x   == s->mb_x)
2826                && s->resync_mb_y+1 == s->mb_y){
2827                 s->first_slice_line=0;
2828             }
2829
2830             s->mb_skipped=0;
2831             s->dquant=0; //only for QP_RD
2832
2833             update_mb_info(s, 0);
2834
2835             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2836                 int next_block=0;
2837                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2838
2839                 copy_context_before_encode(&backup_s, s, -1);
2840                 backup_s.pb= s->pb;
2841                 best_s.data_partitioning= s->data_partitioning;
2842                 best_s.partitioned_frame= s->partitioned_frame;
2843                 if(s->data_partitioning){
2844                     backup_s.pb2= s->pb2;
2845                     backup_s.tex_pb= s->tex_pb;
2846                 }
2847
2848                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2849                     s->mv_dir = MV_DIR_FORWARD;
2850                     s->mv_type = MV_TYPE_16X16;
2851                     s->mb_intra= 0;
2852                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2853                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2854                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2855                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2856                 }
2857                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2858                     s->mv_dir = MV_DIR_FORWARD;
2859                     s->mv_type = MV_TYPE_FIELD;
2860                     s->mb_intra= 0;
2861                     for(i=0; i<2; i++){
2862                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2863                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2864                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2865                     }
2866                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2867                                  &dmin, &next_block, 0, 0);
2868                 }
2869                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2870                     s->mv_dir = MV_DIR_FORWARD;
2871                     s->mv_type = MV_TYPE_16X16;
2872                     s->mb_intra= 0;
2873                     s->mv[0][0][0] = 0;
2874                     s->mv[0][0][1] = 0;
2875                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2876                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2877                 }
2878                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2879                     s->mv_dir = MV_DIR_FORWARD;
2880                     s->mv_type = MV_TYPE_8X8;
2881                     s->mb_intra= 0;
2882                     for(i=0; i<4; i++){
2883                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2884                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2885                     }
2886                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2887                                  &dmin, &next_block, 0, 0);
2888                 }
2889                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2890                     s->mv_dir = MV_DIR_FORWARD;
2891                     s->mv_type = MV_TYPE_16X16;
2892                     s->mb_intra= 0;
2893                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2894                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2895                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2896                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2897                 }
2898                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2899                     s->mv_dir = MV_DIR_BACKWARD;
2900                     s->mv_type = MV_TYPE_16X16;
2901                     s->mb_intra= 0;
2902                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2903                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2904                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2905                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2906                 }
2907                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2908                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2909                     s->mv_type = MV_TYPE_16X16;
2910                     s->mb_intra= 0;
2911                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2912                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2913                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2914                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2915                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2916                                  &dmin, &next_block, 0, 0);
2917                 }
2918                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2919                     s->mv_dir = MV_DIR_FORWARD;
2920                     s->mv_type = MV_TYPE_FIELD;
2921                     s->mb_intra= 0;
2922                     for(i=0; i<2; i++){
2923                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2924                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2925                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2926                     }
2927                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2928                                  &dmin, &next_block, 0, 0);
2929                 }
2930                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2931                     s->mv_dir = MV_DIR_BACKWARD;
2932                     s->mv_type = MV_TYPE_FIELD;
2933                     s->mb_intra= 0;
2934                     for(i=0; i<2; i++){
2935                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2936                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2937                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2938                     }
2939                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2940                                  &dmin, &next_block, 0, 0);
2941                 }
2942                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2943                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2944                     s->mv_type = MV_TYPE_FIELD;
2945                     s->mb_intra= 0;
2946                     for(dir=0; dir<2; dir++){
2947                         for(i=0; i<2; i++){
2948                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2949                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2950                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2951                         }
2952                     }
2953                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2954                                  &dmin, &next_block, 0, 0);
2955                 }
2956                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2957                     s->mv_dir = 0;
2958                     s->mv_type = MV_TYPE_16X16;
2959                     s->mb_intra= 1;
2960                     s->mv[0][0][0] = 0;
2961                     s->mv[0][0][1] = 0;
2962                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2963                                  &dmin, &next_block, 0, 0);
2964                     if(s->h263_pred || s->h263_aic){
2965                         if(best_s.mb_intra)
2966                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2967                         else
2968                             ff_clean_intra_table_entries(s); //old mode?
2969                     }
2970                 }
2971
2972                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2973                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2974                         const int last_qp= backup_s.qscale;
2975                         int qpi, qp, dc[6];
2976                         int16_t ac[6][16];
2977                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2978                         static const int dquant_tab[4]={-1,1,-2,2};
2979                         int storecoefs = s->mb_intra && s->dc_val[0];
2980
2981                         av_assert2(backup_s.dquant == 0);
2982
2983                         //FIXME intra
2984                         s->mv_dir= best_s.mv_dir;
2985                         s->mv_type = MV_TYPE_16X16;
2986                         s->mb_intra= best_s.mb_intra;
2987                         s->mv[0][0][0] = best_s.mv[0][0][0];
2988                         s->mv[0][0][1] = best_s.mv[0][0][1];
2989                         s->mv[1][0][0] = best_s.mv[1][0][0];
2990                         s->mv[1][0][1] = best_s.mv[1][0][1];
2991
2992                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2993                         for(; qpi<4; qpi++){
2994                             int dquant= dquant_tab[qpi];
2995                             qp= last_qp + dquant;
2996                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2997                                 continue;
2998                             backup_s.dquant= dquant;
2999                             if(storecoefs){
3000                                 for(i=0; i<6; i++){
3001                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3002                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3003                                 }
3004                             }
3005
3006                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3007                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3008                             if(best_s.qscale != qp){
3009                                 if(storecoefs){
3010                                     for(i=0; i<6; i++){
3011                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3012                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3013                                     }
3014                                 }
3015                             }
3016                         }
3017                     }
3018                 }
3019                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3020                     int mx= s->b_direct_mv_table[xy][0];
3021                     int my= s->b_direct_mv_table[xy][1];
3022
3023                     backup_s.dquant = 0;
3024                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3025                     s->mb_intra= 0;
3026                     ff_mpeg4_set_direct_mv(s, mx, my);
3027                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3028                                  &dmin, &next_block, mx, my);
3029                 }
3030                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3031                     backup_s.dquant = 0;
3032                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3033                     s->mb_intra= 0;
3034                     ff_mpeg4_set_direct_mv(s, 0, 0);
3035                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3036                                  &dmin, &next_block, 0, 0);
3037                 }
3038                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3039                     int coded=0;
3040                     for(i=0; i<6; i++)
3041                         coded |= s->block_last_index[i];
3042                     if(coded){
3043                         int mx,my;
3044                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3045                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3046                             mx=my=0; //FIXME find the one we actually used
3047                             ff_mpeg4_set_direct_mv(s, mx, my);
3048                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3049                             mx= s->mv[1][0][0];
3050                             my= s->mv[1][0][1];
3051                         }else{
3052                             mx= s->mv[0][0][0];
3053                             my= s->mv[0][0][1];
3054                         }
3055
3056                         s->mv_dir= best_s.mv_dir;
3057                         s->mv_type = best_s.mv_type;
3058                         s->mb_intra= 0;
3059 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3060                         s->mv[0][0][1] = best_s.mv[0][0][1];
3061                         s->mv[1][0][0] = best_s.mv[1][0][0];
3062                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3063                         backup_s.dquant= 0;
3064                         s->skipdct=1;
3065                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3066                                         &dmin, &next_block, mx, my);
3067                         s->skipdct=0;
3068                     }
3069                 }
3070
3071                 s->current_picture.qscale_table[xy] = best_s.qscale;
3072
3073                 copy_context_after_encode(s, &best_s, -1);
3074
3075                 pb_bits_count= put_bits_count(&s->pb);
3076                 flush_put_bits(&s->pb);
3077                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3078                 s->pb= backup_s.pb;
3079
3080                 if(s->data_partitioning){
3081                     pb2_bits_count= put_bits_count(&s->pb2);
3082                     flush_put_bits(&s->pb2);
3083                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3084                     s->pb2= backup_s.pb2;
3085
3086                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3087                     flush_put_bits(&s->tex_pb);
3088                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3089                     s->tex_pb= backup_s.tex_pb;
3090                 }
3091                 s->last_bits= put_bits_count(&s->pb);
3092
3093                 if (CONFIG_H263_ENCODER &&
3094                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3095                     ff_h263_update_motion_val(s);
3096
3097                 if(next_block==0){ //FIXME 16 vs linesize16
3098                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3099                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3100                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3101                 }
3102
3103                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3104                     ff_MPV_decode_mb(s, s->block);
3105             } else {
3106                 int motion_x = 0, motion_y = 0;
3107                 s->mv_type=MV_TYPE_16X16;
3108                 // only one MB-Type possible
3109
3110                 switch(mb_type){
3111                 case CANDIDATE_MB_TYPE_INTRA:
3112                     s->mv_dir = 0;
3113                     s->mb_intra= 1;
3114                     motion_x= s->mv[0][0][0] = 0;
3115                     motion_y= s->mv[0][0][1] = 0;
3116                     break;
3117                 case CANDIDATE_MB_TYPE_INTER:
3118                     s->mv_dir = MV_DIR_FORWARD;
3119                     s->mb_intra= 0;
3120                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3121                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3122                     break;
3123                 case CANDIDATE_MB_TYPE_INTER_I:
3124                     s->mv_dir = MV_DIR_FORWARD;
3125                     s->mv_type = MV_TYPE_FIELD;
3126                     s->mb_intra= 0;
3127                     for(i=0; i<2; i++){
3128                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3129                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3130                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3131                     }
3132                     break;
3133                 case CANDIDATE_MB_TYPE_INTER4V:
3134                     s->mv_dir = MV_DIR_FORWARD;
3135                     s->mv_type = MV_TYPE_8X8;
3136                     s->mb_intra= 0;
3137                     for(i=0; i<4; i++){
3138                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3139                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3140                     }
3141                     break;
3142                 case CANDIDATE_MB_TYPE_DIRECT:
3143                     if (CONFIG_MPEG4_ENCODER) {
3144                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3145                         s->mb_intra= 0;
3146                         motion_x=s->b_direct_mv_table[xy][0];
3147                         motion_y=s->b_direct_mv_table[xy][1];
3148                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3149                     }
3150                     break;
3151                 case CANDIDATE_MB_TYPE_DIRECT0:
3152                     if (CONFIG_MPEG4_ENCODER) {
3153                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3154                         s->mb_intra= 0;
3155                         ff_mpeg4_set_direct_mv(s, 0, 0);
3156                     }
3157                     break;
3158                 case CANDIDATE_MB_TYPE_BIDIR:
3159                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3160                     s->mb_intra= 0;
3161                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3162                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3163                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3164                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3165                     break;
3166                 case CANDIDATE_MB_TYPE_BACKWARD:
3167                     s->mv_dir = MV_DIR_BACKWARD;
3168                     s->mb_intra= 0;
3169                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3170                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3171                     break;
3172                 case CANDIDATE_MB_TYPE_FORWARD:
3173                     s->mv_dir = MV_DIR_FORWARD;
3174                     s->mb_intra= 0;
3175                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3176                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3177                     break;
3178                 case CANDIDATE_MB_TYPE_FORWARD_I:
3179                     s->mv_dir = MV_DIR_FORWARD;
3180                     s->mv_type = MV_TYPE_FIELD;
3181                     s->mb_intra= 0;
3182                     for(i=0; i<2; i++){
3183                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3184                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3185                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3186                     }
3187                     break;
3188                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3189                     s->mv_dir = MV_DIR_BACKWARD;
3190                     s->mv_type = MV_TYPE_FIELD;
3191                     s->mb_intra= 0;
3192                     for(i=0; i<2; i++){
3193                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3194                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3195                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3196                     }
3197                     break;
3198                 case CANDIDATE_MB_TYPE_BIDIR_I:
3199                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3200                     s->mv_type = MV_TYPE_FIELD;
3201                     s->mb_intra= 0;
3202                     for(dir=0; dir<2; dir++){
3203                         for(i=0; i<2; i++){
3204                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3205                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3206                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3207                         }
3208                     }
3209                     break;
3210                 default:
3211                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3212                 }
3213
3214                 encode_mb(s, motion_x, motion_y);
3215
3216                 // RAL: Update last macroblock type
3217                 s->last_mv_dir = s->mv_dir;
3218
3219                 if (CONFIG_H263_ENCODER &&
3220                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3221                     ff_h263_update_motion_val(s);
3222
3223                 ff_MPV_decode_mb(s, s->block);
3224             }
3225
3226             /* clean the MV table in IPS frames for direct mode in B frames */
3227             if(s->mb_intra /* && I,P,S_TYPE */){
3228                 s->p_mv_table[xy][0]=0;
3229                 s->p_mv_table[xy][1]=0;
3230             }
3231
3232             if(s->flags&CODEC_FLAG_PSNR){
3233                 int w= 16;
3234                 int h= 16;
3235
3236                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3237                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3238
3239                 s->current_picture.f->error[0] += sse(
3240                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3241                     s->dest[0], w, h, s->linesize);
3242                 s->current_picture.f->error[1] += sse(
3243                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3244                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3245                 s->current_picture.f->error[2] += sse(
3246                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3247                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3248             }
3249             if(s->loop_filter){
3250                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3251                     ff_h263_loop_filter(s);
3252             }
3253             av_dlog(s->avctx, "MB %d %d bits\n",
3254                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3255         }
3256     }
3257
3258     //not beautiful here but we must write it before flushing so it has to be here
3259     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3260         ff_msmpeg4_encode_ext_header(s);
3261
3262     write_slice_end(s);
3263
3264     /* Send the last GOB if RTP */
3265     if (s->avctx->rtp_callback) {
3266         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3267         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3268         /* Call the RTP callback to send the last GOB */
3269         emms_c();
3270         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3271     }
3272
3273     return 0;
3274 }
3275
3276 #define MERGE(field) dst->field += src->field; src->field=0
3277 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3278     MERGE(me.scene_change_score);
3279     MERGE(me.mc_mb_var_sum_temp);
3280     MERGE(me.mb_var_sum_temp);
3281 }
3282
3283 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3284     int i;
3285
3286     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3287     MERGE(dct_count[1]);
3288     MERGE(mv_bits);
3289     MERGE(i_tex_bits);
3290     MERGE(p_tex_bits);
3291     MERGE(i_count);
3292     MERGE(f_count);
3293     MERGE(b_count);
3294     MERGE(skip_count);
3295     MERGE(misc_bits);
3296     MERGE(er.error_count);
3297     MERGE(padding_bug_score);
3298     MERGE(current_picture.f->error[0]);
3299     MERGE(current_picture.f->error[1]);
3300     MERGE(current_picture.f->error[2]);
3301
3302     if(dst->avctx->noise_reduction){
3303         for(i=0; i<64; i++){
3304             MERGE(dct_error_sum[0][i]);
3305             MERGE(dct_error_sum[1][i]);
3306         }
3307     }
3308
3309     assert(put_bits_count(&src->pb) % 8 ==0);
3310     assert(put_bits_count(&dst->pb) % 8 ==0);
3311     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3312     flush_put_bits(&dst->pb);
3313 }
3314
3315 static int estimate_qp(MpegEncContext *s, int dry_run){
3316     if (s->next_lambda){
3317         s->current_picture_ptr->f->quality =
3318         s->current_picture.f->quality = s->next_lambda;
3319         if(!dry_run) s->next_lambda= 0;
3320     } else if (!s->fixed_qscale) {
3321         s->current_picture_ptr->f->quality =
3322         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3323         if (s->current_picture.f->quality < 0)
3324             return -1;
3325     }
3326
3327     if(s->adaptive_quant){
3328         switch(s->codec_id){
3329         case AV_CODEC_ID_MPEG4:
3330             if (CONFIG_MPEG4_ENCODER)
3331                 ff_clean_mpeg4_qscales(s);
3332             break;
3333         case AV_CODEC_ID_H263:
3334         case AV_CODEC_ID_H263P:
3335         case AV_CODEC_ID_FLV1:
3336             if (CONFIG_H263_ENCODER)
3337                 ff_clean_h263_qscales(s);
3338             break;
3339         default:
3340             ff_init_qscale_tab(s);
3341         }
3342
3343         s->lambda= s->lambda_table[0];
3344         //FIXME broken
3345     }else
3346         s->lambda = s->current_picture.f->quality;
3347     update_qscale(s);
3348     return 0;
3349 }
3350
3351 /* must be called before writing the header */
3352 static void set_frame_distances(MpegEncContext * s){
3353     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3354     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3355
3356     if(s->pict_type==AV_PICTURE_TYPE_B){
3357         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3358         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3359     }else{
3360         s->pp_time= s->time - s->last_non_b_time;
3361         s->last_non_b_time= s->time;
3362         assert(s->picture_number==0 || s->pp_time > 0);
3363     }
3364 }
3365
3366 static int encode_picture(MpegEncContext *s, int picture_number)
3367 {
3368     int i, ret;
3369     int bits;
3370     int context_count = s->slice_context_count;
3371
3372     s->picture_number = picture_number;
3373
3374     /* Reset the average MB variance */
3375     s->me.mb_var_sum_temp    =
3376     s->me.mc_mb_var_sum_temp = 0;
3377
3378     /* we need to initialize some time vars before we can encode b-frames */
3379     // RAL: Condition added for MPEG1VIDEO
3380     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3381         set_frame_distances(s);
3382     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3383         ff_set_mpeg4_time(s);
3384
3385     s->me.scene_change_score=0;
3386
3387 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3388
3389     if(s->pict_type==AV_PICTURE_TYPE_I){
3390         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3391         else                        s->no_rounding=0;
3392     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3393         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3394             s->no_rounding ^= 1;
3395     }
3396
3397     if(s->flags & CODEC_FLAG_PASS2){
3398         if (estimate_qp(s,1) < 0)
3399             return -1;
3400         ff_get_2pass_fcode(s);
3401     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3402         if(s->pict_type==AV_PICTURE_TYPE_B)
3403             s->lambda= s->last_lambda_for[s->pict_type];
3404         else
3405             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3406         update_qscale(s);
3407     }
3408
3409     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3410         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3411         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3412         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3413         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3414     }
3415
3416     s->mb_intra=0; //for the rate distortion & bit compare functions
3417     for(i=1; i<context_count; i++){
3418         ret = ff_update_duplicate_context(s->thread_context[i], s);
3419         if (ret < 0)
3420             return ret;
3421     }
3422
3423     if(ff_init_me(s)<0)
3424         return -1;
3425
3426     /* Estimate motion for every MB */
3427     if(s->pict_type != AV_PICTURE_TYPE_I){
3428         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3429         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3430         if (s->pict_type != AV_PICTURE_TYPE_B) {
3431             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3432                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3433             }
3434         }
3435
3436         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3437     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3438         /* I-Frame */
3439         for(i=0; i<s->mb_stride*s->mb_height; i++)
3440             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3441
3442         if(!s->fixed_qscale){
3443             /* finding spatial complexity for I-frame rate control */
3444             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3445         }
3446     }
3447     for(i=1; i<context_count; i++){
3448         merge_context_after_me(s, s->thread_context[i]);
3449     }
3450     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3451     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3452     emms_c();
3453
3454     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3455         s->pict_type= AV_PICTURE_TYPE_I;
3456         for(i=0; i<s->mb_stride*s->mb_height; i++)
3457             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3458         if(s->msmpeg4_version >= 3)
3459             s->no_rounding=1;
3460         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3461                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3462     }
3463
3464     if(!s->umvplus){
3465         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3466             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3467
3468             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3469                 int a,b;
3470                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3471                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3472                 s->f_code= FFMAX3(s->f_code, a, b);
3473             }
3474
3475             ff_fix_long_p_mvs(s);
3476             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3477             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3478                 int j;
3479                 for(i=0; i<2; i++){
3480                     for(j=0; j<2; j++)
3481                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3482                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3483                 }
3484             }
3485         }
3486
3487         if(s->pict_type==AV_PICTURE_TYPE_B){
3488             int a, b;
3489
3490             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3491             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3492             s->f_code = FFMAX(a, b);
3493
3494             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3495             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3496             s->b_code = FFMAX(a, b);
3497
3498             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3499             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3500             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3501             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3502             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3503                 int dir, j;
3504                 for(dir=0; dir<2; dir++){
3505                     for(i=0; i<2; i++){
3506                         for(j=0; j<2; j++){
3507                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3508                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3509                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3510                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3511                         }
3512                     }
3513                 }
3514             }
3515         }
3516     }
3517
3518     if (estimate_qp(s, 0) < 0)
3519         return -1;
3520
3521     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3522         s->qscale= 3; //reduce clipping problems
3523
3524     if (s->out_format == FMT_MJPEG) {
3525         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3526         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3527
3528         if (s->avctx->intra_matrix) {
3529             chroma_matrix =
3530             luma_matrix = s->avctx->intra_matrix;
3531         }
3532         if (s->avctx->chroma_intra_matrix)
3533             chroma_matrix = s->avctx->chroma_intra_matrix;
3534
3535         /* for mjpeg, we do include qscale in the matrix */
3536         for(i=1;i<64;i++){
3537             int j= s->dsp.idct_permutation[i];
3538
3539             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3540             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3541         }
3542         s->y_dc_scale_table=
3543         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3544         s->chroma_intra_matrix[0] =
3545         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3546         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3547                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3548         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3549                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3550         s->qscale= 8;
3551     }
3552     if(s->codec_id == AV_CODEC_ID_AMV){
3553         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3554         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3555         for(i=1;i<64;i++){
3556             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3557
3558             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3559             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3560         }
3561         s->y_dc_scale_table= y;
3562         s->c_dc_scale_table= c;
3563         s->intra_matrix[0] = 13;
3564         s->chroma_intra_matrix[0] = 14;
3565         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3566                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3567         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3568                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3569         s->qscale= 8;
3570     }
3571
3572     //FIXME var duplication
3573     s->current_picture_ptr->f->key_frame =
3574     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3575     s->current_picture_ptr->f->pict_type =
3576     s->current_picture.f->pict_type = s->pict_type;
3577
3578     if (s->current_picture.f->key_frame)
3579         s->picture_in_gop_number=0;
3580
3581     s->mb_x = s->mb_y = 0;
3582     s->last_bits= put_bits_count(&s->pb);
3583     switch(s->out_format) {
3584     case FMT_MJPEG:
3585         if (CONFIG_MJPEG_ENCODER)
3586             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3587                                            s->intra_matrix, s->chroma_intra_matrix);
3588         break;
3589     case FMT_H261:
3590         if (CONFIG_H261_ENCODER)
3591             ff_h261_encode_picture_header(s, picture_number);
3592         break;
3593     case FMT_H263:
3594         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3595             ff_wmv2_encode_picture_header(s, picture_number);
3596         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3597             ff_msmpeg4_encode_picture_header(s, picture_number);
3598         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3599             ff_mpeg4_encode_picture_header(s, picture_number);
3600         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3601             ff_rv10_encode_picture_header(s, picture_number);
3602         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3603             ff_rv20_encode_picture_header(s, picture_number);
3604         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3605             ff_flv_encode_picture_header(s, picture_number);
3606         else if (CONFIG_H263_ENCODER)
3607             ff_h263_encode_picture_header(s, picture_number);
3608         break;
3609     case FMT_MPEG1:
3610         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3611             ff_mpeg1_encode_picture_header(s, picture_number);
3612         break;
3613     default:
3614         av_assert0(0);
3615     }
3616     bits= put_bits_count(&s->pb);
3617     s->header_bits= bits - s->last_bits;
3618
3619     for(i=1; i<context_count; i++){
3620         update_duplicate_context_after_me(s->thread_context[i], s);
3621     }
3622     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3623     for(i=1; i<context_count; i++){
3624         merge_context_after_encode(s, s->thread_context[i]);
3625     }
3626     emms_c();
3627     return 0;
3628 }
3629
3630 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3631     const int intra= s->mb_intra;
3632     int i;
3633
3634     s->dct_count[intra]++;
3635
3636     for(i=0; i<64; i++){
3637         int level= block[i];
3638
3639         if(level){
3640             if(level>0){
3641                 s->dct_error_sum[intra][i] += level;
3642                 level -= s->dct_offset[intra][i];
3643                 if(level<0) level=0;
3644             }else{
3645                 s->dct_error_sum[intra][i] -= level;
3646                 level += s->dct_offset[intra][i];
3647                 if(level>0) level=0;
3648             }
3649             block[i]= level;
3650         }
3651     }
3652 }
3653
3654 static int dct_quantize_trellis_c(MpegEncContext *s,
3655                                   int16_t *block, int n,
3656                                   int qscale, int *overflow){
3657     const int *qmat;
3658     const uint8_t *scantable= s->intra_scantable.scantable;
3659     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3660     int max=0;
3661     unsigned int threshold1, threshold2;
3662     int bias=0;
3663     int run_tab[65];
3664     int level_tab[65];
3665     int score_tab[65];
3666     int survivor[65];
3667     int survivor_count;
3668     int last_run=0;
3669     int last_level=0;
3670     int last_score= 0;
3671     int last_i;
3672     int coeff[2][64];
3673     int coeff_count[64];
3674     int qmul, qadd, start_i, last_non_zero, i, dc;
3675     const int esc_length= s->ac_esc_length;
3676     uint8_t * length;
3677     uint8_t * last_length;
3678     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3679
3680     s->dsp.fdct (block);
3681
3682     if(s->dct_error_sum)
3683         s->denoise_dct(s, block);
3684     qmul= qscale*16;
3685     qadd= ((qscale-1)|1)*8;
3686
3687     if (s->mb_intra) {
3688         int q;
3689         if (!s->h263_aic) {
3690             if (n < 4)
3691                 q = s->y_dc_scale;
3692             else
3693                 q = s->c_dc_scale;
3694             q = q << 3;
3695         } else{
3696             /* For AIC we skip quant/dequant of INTRADC */
3697             q = 1 << 3;
3698             qadd=0;
3699         }
3700
3701         /* note: block[0] is assumed to be positive */
3702         block[0] = (block[0] + (q >> 1)) / q;
3703         start_i = 1;
3704         last_non_zero = 0;
3705         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3706         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3707             bias= 1<<(QMAT_SHIFT-1);
3708         length     = s->intra_ac_vlc_length;
3709         last_length= s->intra_ac_vlc_last_length;
3710     } else {
3711         start_i = 0;
3712         last_non_zero = -1;
3713         qmat = s->q_inter_matrix[qscale];
3714         length     = s->inter_ac_vlc_length;
3715         last_length= s->inter_ac_vlc_last_length;
3716     }
3717     last_i= start_i;
3718
3719     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3720     threshold2= (threshold1<<1);
3721
3722     for(i=63; i>=start_i; i--) {
3723         const int j = scantable[i];
3724         int level = block[j] * qmat[j];
3725
3726         if(((unsigned)(level+threshold1))>threshold2){
3727             last_non_zero = i;
3728             break;
3729         }
3730     }
3731
3732     for(i=start_i; i<=last_non_zero; i++) {
3733         const int j = scantable[i];
3734         int level = block[j] * qmat[j];
3735
3736 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3737 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3738         if(((unsigned)(level+threshold1))>threshold2){
3739             if(level>0){
3740                 level= (bias + level)>>QMAT_SHIFT;
3741                 coeff[0][i]= level;
3742                 coeff[1][i]= level-1;
3743 //                coeff[2][k]= level-2;
3744             }else{
3745                 level= (bias - level)>>QMAT_SHIFT;
3746                 coeff[0][i]= -level;
3747                 coeff[1][i]= -level+1;
3748 //                coeff[2][k]= -level+2;
3749             }
3750             coeff_count[i]= FFMIN(level, 2);
3751             av_assert2(coeff_count[i]);
3752             max |=level;
3753         }else{
3754             coeff[0][i]= (level>>31)|1;
3755             coeff_count[i]= 1;
3756         }
3757     }
3758
3759     *overflow= s->max_qcoeff < max; //overflow might have happened
3760
3761     if(last_non_zero < start_i){
3762         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3763         return last_non_zero;
3764     }
3765
3766     score_tab[start_i]= 0;
3767     survivor[0]= start_i;
3768     survivor_count= 1;
3769
3770     for(i=start_i; i<=last_non_zero; i++){
3771         int level_index, j, zero_distortion;
3772         int dct_coeff= FFABS(block[ scantable[i] ]);
3773         int best_score=256*256*256*120;
3774
3775         if (s->dsp.fdct == ff_fdct_ifast)
3776             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3777         zero_distortion= dct_coeff*dct_coeff;
3778
3779         for(level_index=0; level_index < coeff_count[i]; level_index++){
3780             int distortion;
3781             int level= coeff[level_index][i];
3782             const int alevel= FFABS(level);
3783             int unquant_coeff;
3784
3785             av_assert2(level);
3786
3787             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3788                 unquant_coeff= alevel*qmul + qadd;
3789             }else{ //MPEG1
3790                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3791                 if(s->mb_intra){
3792                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3793                         unquant_coeff =   (unquant_coeff - 1) | 1;
3794                 }else{
3795                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3796                         unquant_coeff =   (unquant_coeff - 1) | 1;
3797                 }
3798                 unquant_coeff<<= 3;
3799             }
3800
3801             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3802             level+=64;
3803             if((level&(~127)) == 0){
3804                 for(j=survivor_count-1; j>=0; j--){
3805                     int run= i - survivor[j];
3806                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3807                     score += score_tab[i-run];
3808
3809                     if(score < best_score){
3810                         best_score= score;
3811                         run_tab[i+1]= run;
3812                         level_tab[i+1]= level-64;
3813                     }
3814                 }
3815
3816                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3817                     for(j=survivor_count-1; j>=0; j--){
3818                         int run= i - survivor[j];
3819                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3820                         score += score_tab[i-run];
3821                         if(score < last_score){
3822                             last_score= score;
3823                             last_run= run;
3824                             last_level= level-64;
3825                             last_i= i+1;
3826                         }
3827                     }
3828                 }
3829             }else{
3830                 distortion += esc_length*lambda;
3831                 for(j=survivor_count-1; j>=0; j--){
3832                     int run= i - survivor[j];
3833                     int score= distortion + score_tab[i-run];
3834
3835                     if(score < best_score){
3836                         best_score= score;
3837                         run_tab[i+1]= run;
3838                         level_tab[i+1]= level-64;
3839                     }
3840                 }
3841
3842                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3843                   for(j=survivor_count-1; j>=0; j--){
3844                         int run= i - survivor[j];
3845                         int score= distortion + score_tab[i-run];
3846                         if(score < last_score){
3847                             last_score= score;
3848                             last_run= run;
3849                             last_level= level-64;
3850                             last_i= i+1;
3851                         }
3852                     }
3853                 }
3854             }
3855         }
3856
3857         score_tab[i+1]= best_score;
3858
3859         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3860         if(last_non_zero <= 27){
3861             for(; survivor_count; survivor_count--){
3862                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3863                     break;
3864             }
3865         }else{
3866             for(; survivor_count; survivor_count--){
3867                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3868                     break;
3869             }
3870         }
3871
3872         survivor[ survivor_count++ ]= i+1;
3873     }
3874
3875     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3876         last_score= 256*256*256*120;
3877         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3878             int score= score_tab[i];
3879             if(i) score += lambda*2; //FIXME exacter?
3880
3881             if(score < last_score){
3882                 last_score= score;
3883                 last_i= i;
3884                 last_level= level_tab[i];
3885                 last_run= run_tab[i];
3886             }
3887         }
3888     }
3889
3890     s->coded_score[n] = last_score;
3891
3892     dc= FFABS(block[0]);
3893     last_non_zero= last_i - 1;
3894     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3895
3896     if(last_non_zero < start_i)
3897         return last_non_zero;
3898
3899     if(last_non_zero == 0 && start_i == 0){
3900         int best_level= 0;
3901         int best_score= dc * dc;
3902
3903         for(i=0; i<coeff_count[0]; i++){
3904             int level= coeff[i][0];
3905             int alevel= FFABS(level);
3906             int unquant_coeff, score, distortion;
3907
3908             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3909                     unquant_coeff= (alevel*qmul + qadd)>>3;
3910             }else{ //MPEG1
3911                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3912                     unquant_coeff =   (unquant_coeff - 1) | 1;
3913             }
3914             unquant_coeff = (unquant_coeff + 4) >> 3;
3915             unquant_coeff<<= 3 + 3;
3916
3917             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3918             level+=64;
3919             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3920             else                    score= distortion + esc_length*lambda;
3921
3922             if(score < best_score){
3923                 best_score= score;
3924                 best_level= level - 64;
3925             }
3926         }
3927         block[0]= best_level;
3928         s->coded_score[n] = best_score - dc*dc;
3929         if(best_level == 0) return -1;
3930         else                return last_non_zero;
3931     }
3932
3933     i= last_i;
3934     av_assert2(last_level);
3935
3936     block[ perm_scantable[last_non_zero] ]= last_level;
3937     i -= last_run + 1;
3938
3939     for(; i>start_i; i -= run_tab[i] + 1){
3940         block[ perm_scantable[i-1] ]= level_tab[i];
3941     }
3942
3943     return last_non_zero;
3944 }
3945
3946 //#define REFINE_STATS 1
3947 static int16_t basis[64][64];
3948
3949 static void build_basis(uint8_t *perm){
3950     int i, j, x, y;
3951     emms_c();
3952     for(i=0; i<8; i++){
3953         for(j=0; j<8; j++){
3954             for(y=0; y<8; y++){
3955                 for(x=0; x<8; x++){
3956                     double s= 0.25*(1<<BASIS_SHIFT);
3957                     int index= 8*i + j;
3958                     int perm_index= perm[index];
3959                     if(i==0) s*= sqrt(0.5);
3960                     if(j==0) s*= sqrt(0.5);
3961                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3962                 }
3963             }
3964         }
3965     }
3966 }
3967
3968 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3969                         int16_t *block, int16_t *weight, int16_t *orig,
3970                         int n, int qscale){
3971     int16_t rem[64];
3972     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3973     const uint8_t *scantable= s->intra_scantable.scantable;
3974     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3975 //    unsigned int threshold1, threshold2;
3976 //    int bias=0;
3977     int run_tab[65];
3978     int prev_run=0;
3979     int prev_level=0;
3980     int qmul, qadd, start_i, last_non_zero, i, dc;
3981     uint8_t * length;
3982     uint8_t * last_length;
3983     int lambda;
3984     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3985 #ifdef REFINE_STATS
3986 static int count=0;
3987 static int after_last=0;
3988 static int to_zero=0;
3989 static int from_zero=0;
3990 static int raise=0;
3991 static int lower=0;
3992 static int messed_sign=0;
3993 #endif
3994
3995     if(basis[0][0] == 0)
3996         build_basis(s->dsp.idct_permutation);
3997
3998     qmul= qscale*2;
3999     qadd= (qscale-1)|1;
4000     if (s->mb_intra) {
4001         if (!s->h263_aic) {
4002             if (n < 4)
4003                 q = s->y_dc_scale;
4004             else
4005                 q = s->c_dc_scale;
4006         } else{
4007             /* For AIC we skip quant/dequant of INTRADC */
4008             q = 1;
4009             qadd=0;
4010         }
4011         q <<= RECON_SHIFT-3;
4012         /* note: block[0] is assumed to be positive */
4013         dc= block[0]*q;
4014 //        block[0] = (block[0] + (q >> 1)) / q;
4015         start_i = 1;
4016 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4017 //            bias= 1<<(QMAT_SHIFT-1);
4018         length     = s->intra_ac_vlc_length;
4019         last_length= s->intra_ac_vlc_last_length;
4020     } else {
4021         dc= 0;
4022         start_i = 0;
4023         length     = s->inter_ac_vlc_length;
4024         last_length= s->inter_ac_vlc_last_length;
4025     }
4026     last_non_zero = s->block_last_index[n];
4027
4028 #ifdef REFINE_STATS
4029 {START_TIMER
4030 #endif
4031     dc += (1<<(RECON_SHIFT-1));
4032     for(i=0; i<64; i++){
4033         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4034     }
4035 #ifdef REFINE_STATS
4036 STOP_TIMER("memset rem[]")}
4037 #endif
4038     sum=0;
4039     for(i=0; i<64; i++){
4040         int one= 36;
4041         int qns=4;
4042         int w;
4043
4044         w= FFABS(weight[i]) + qns*one;
4045         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4046
4047         weight[i] = w;
4048 //        w=weight[i] = (63*qns + (w/2)) / w;
4049
4050         av_assert2(w>0);
4051         av_assert2(w<(1<<6));
4052         sum += w*w;
4053     }
4054     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4055 #ifdef REFINE_STATS
4056 {START_TIMER
4057 #endif
4058     run=0;
4059     rle_index=0;
4060     for(i=start_i; i<=last_non_zero; i++){
4061         int j= perm_scantable[i];
4062         const int level= block[j];
4063         int coeff;
4064
4065         if(level){
4066             if(level<0) coeff= qmul*level - qadd;
4067             else        coeff= qmul*level + qadd;
4068             run_tab[rle_index++]=run;
4069             run=0;
4070
4071             s->dsp.add_8x8basis(rem, basis[j], coeff);
4072         }else{
4073             run++;
4074         }
4075     }
4076 #ifdef REFINE_STATS
4077 if(last_non_zero>0){
4078 STOP_TIMER("init rem[]")
4079 }
4080 }
4081
4082 {START_TIMER
4083 #endif
4084     for(;;){
4085         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4086         int best_coeff=0;
4087         int best_change=0;
4088         int run2, best_unquant_change=0, analyze_gradient;
4089 #ifdef REFINE_STATS
4090 {START_TIMER
4091 #endif
4092         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4093
4094         if(analyze_gradient){
4095 #ifdef REFINE_STATS
4096 {START_TIMER
4097 #endif
4098             for(i=0; i<64; i++){
4099                 int w= weight[i];
4100
4101                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4102             }
4103 #ifdef REFINE_STATS
4104 STOP_TIMER("rem*w*w")}
4105 {START_TIMER
4106 #endif
4107             s->dsp.fdct(d1);
4108 #ifdef REFINE_STATS
4109 STOP_TIMER("dct")}
4110 #endif
4111         }
4112
4113         if(start_i){
4114             const int level= block[0];
4115             int change, old_coeff;
4116
4117             av_assert2(s->mb_intra);
4118
4119             old_coeff= q*level;
4120
4121             for(change=-1; change<=1; change+=2){
4122                 int new_level= level + change;
4123                 int score, new_coeff;
4124
4125                 new_coeff= q*new_level;
4126                 if(new_coeff >= 2048 || new_coeff < 0)
4127                     continue;
4128
4129                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4130                 if(score<best_score){
4131                     best_score= score;
4132                     best_coeff= 0;
4133                     best_change= change;
4134                     best_unquant_change= new_coeff - old_coeff;
4135                 }
4136             }
4137         }
4138
4139         run=0;
4140         rle_index=0;
4141         run2= run_tab[rle_index++];
4142         prev_level=0;
4143         prev_run=0;
4144
4145         for(i=start_i; i<64; i++){
4146             int j= perm_scantable[i];
4147             const int level= block[j];
4148             int change, old_coeff;
4149
4150             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4151                 break;
4152
4153             if(level){
4154                 if(level<0) old_coeff= qmul*level - qadd;
4155                 else        old_coeff= qmul*level + qadd;
4156                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4157             }else{
4158                 old_coeff=0;
4159                 run2--;
4160                 av_assert2(run2>=0 || i >= last_non_zero );
4161             }
4162
4163             for(change=-1; change<=1; change+=2){
4164                 int new_level= level + change;
4165                 int score, new_coeff, unquant_change;
4166
4167                 score=0;
4168                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4169                    continue;
4170
4171                 if(new_level){
4172                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4173                     else            new_coeff= qmul*new_level + qadd;
4174                     if(new_coeff >= 2048 || new_coeff <= -2048)
4175                         continue;
4176                     //FIXME check for overflow
4177
4178                     if(level){
4179                         if(level < 63 && level > -63){
4180                             if(i < last_non_zero)
4181                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4182                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4183                             else
4184                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4185                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4186                         }
4187                     }else{
4188                         av_assert2(FFABS(new_level)==1);
4189
4190                         if(analyze_gradient){
4191                             int g= d1[ scantable[i] ];
4192                             if(g && (g^new_level) >= 0)
4193                                 continue;
4194                         }
4195
4196                         if(i < last_non_zero){
4197                             int next_i= i + run2 + 1;
4198                             int next_level= block[ perm_scantable[next_i] ] + 64;
4199
4200                             if(next_level&(~127))
4201                                 next_level= 0;
4202
4203                             if(next_i < last_non_zero)
4204                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4205                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4206                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4207                             else
4208                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4209                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4210                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4211                         }else{
4212                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4213                             if(prev_level){
4214                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4215                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4216                             }
4217                         }
4218                     }
4219                 }else{
4220                     new_coeff=0;
4221                     av_assert2(FFABS(level)==1);
4222
4223                     if(i < last_non_zero){
4224                         int next_i= i + run2 + 1;
4225                         int next_level= block[ perm_scantable[next_i] ] + 64;
4226
4227                         if(next_level&(~127))
4228                             next_level= 0;
4229
4230                         if(next_i < last_non_zero)
4231                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4232                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4233                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4234                         else
4235                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4236                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4237                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4238                     }else{
4239                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4240                         if(prev_level){
4241                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4242                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4243                         }
4244                     }
4245                 }
4246
4247                 score *= lambda;
4248
4249                 unquant_change= new_coeff - old_coeff;
4250                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4251
4252                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4253                 if(score<best_score){
4254                     best_score= score;
4255                     best_coeff= i;
4256                     best_change= change;
4257                     best_unquant_change= unquant_change;
4258                 }
4259             }
4260             if(level){
4261                 prev_level= level + 64;
4262                 if(prev_level&(~127))
4263                     prev_level= 0;
4264                 prev_run= run;
4265                 run=0;
4266             }else{
4267                 run++;
4268             }
4269         }
4270 #ifdef REFINE_STATS
4271 STOP_TIMER("iterative step")}
4272 #endif
4273
4274         if(best_change){
4275             int j= perm_scantable[ best_coeff ];
4276
4277             block[j] += best_change;
4278
4279             if(best_coeff > last_non_zero){
4280                 last_non_zero= best_coeff;
4281                 av_assert2(block[j]);
4282 #ifdef REFINE_STATS
4283 after_last++;
4284 #endif
4285             }else{
4286 #ifdef REFINE_STATS
4287 if(block[j]){
4288     if(block[j] - best_change){
4289         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4290             raise++;
4291         }else{
4292             lower++;
4293         }
4294     }else{
4295         from_zero++;
4296     }
4297 }else{
4298     to_zero++;
4299 }
4300 #endif
4301                 for(; last_non_zero>=start_i; last_non_zero--){
4302                     if(block[perm_scantable[last_non_zero]])
4303                         break;
4304                 }
4305             }
4306 #ifdef REFINE_STATS
4307 count++;
4308 if(256*256*256*64 % count == 0){
4309     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4310 }
4311 #endif
4312             run=0;
4313             rle_index=0;
4314             for(i=start_i; i<=last_non_zero; i++){
4315                 int j= perm_scantable[i];
4316                 const int level= block[j];
4317
4318                  if(level){
4319                      run_tab[rle_index++]=run;
4320                      run=0;
4321                  }else{
4322                      run++;
4323                  }
4324             }
4325
4326             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4327         }else{
4328             break;
4329         }
4330     }
4331 #ifdef REFINE_STATS
4332 if(last_non_zero>0){
4333 STOP_TIMER("iterative search")
4334 }
4335 }
4336 #endif
4337
4338     return last_non_zero;
4339 }
4340
4341 int ff_dct_quantize_c(MpegEncContext *s,
4342                         int16_t *block, int n,
4343                         int qscale, int *overflow)
4344 {
4345     int i, j, level, last_non_zero, q, start_i;
4346     const int *qmat;
4347     const uint8_t *scantable= s->intra_scantable.scantable;
4348     int bias;
4349     int max=0;
4350     unsigned int threshold1, threshold2;
4351
4352     s->dsp.fdct (block);
4353
4354     if(s->dct_error_sum)
4355         s->denoise_dct(s, block);
4356
4357     if (s->mb_intra) {
4358         if (!s->h263_aic) {
4359             if (n < 4)
4360                 q = s->y_dc_scale;
4361             else
4362                 q = s->c_dc_scale;
4363             q = q << 3;
4364         } else
4365             /* For AIC we skip quant/dequant of INTRADC */
4366             q = 1 << 3;
4367
4368         /* note: block[0] is assumed to be positive */
4369         block[0] = (block[0] + (q >> 1)) / q;
4370         start_i = 1;
4371         last_non_zero = 0;
4372         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4373         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4374     } else {
4375         start_i = 0;
4376         last_non_zero = -1;
4377         qmat = s->q_inter_matrix[qscale];
4378         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4379     }
4380     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4381     threshold2= (threshold1<<1);
4382     for(i=63;i>=start_i;i--) {
4383         j = scantable[i];
4384         level = block[j] * qmat[j];
4385
4386         if(((unsigned)(level+threshold1))>threshold2){
4387             last_non_zero = i;
4388             break;
4389         }else{
4390             block[j]=0;
4391         }
4392     }
4393     for(i=start_i; i<=last_non_zero; i++) {
4394         j = scantable[i];
4395         level = block[j] * qmat[j];
4396
4397 //        if(   bias+level >= (1<<QMAT_SHIFT)
4398 //           || bias-level >= (1<<QMAT_SHIFT)){
4399         if(((unsigned)(level+threshold1))>threshold2){
4400             if(level>0){
4401                 level= (bias + level)>>QMAT_SHIFT;
4402                 block[j]= level;
4403             }else{
4404                 level= (bias - level)>>QMAT_SHIFT;
4405                 block[j]= -level;
4406             }
4407             max |=level;
4408         }else{
4409             block[j]=0;
4410         }
4411     }
4412     *overflow= s->max_qcoeff < max; //overflow might have happened
4413
4414     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4415     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4416         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4417
4418     return last_non_zero;
4419 }
4420
4421 #define OFFSET(x) offsetof(MpegEncContext, x)
4422 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4423 static const AVOption h263_options[] = {
4424     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4425     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4426     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4427     FF_MPV_COMMON_OPTS
4428     { NULL },
4429 };
4430
4431 static const AVClass h263_class = {
4432     .class_name = "H.263 encoder",
4433     .item_name  = av_default_item_name,
4434     .option     = h263_options,
4435     .version    = LIBAVUTIL_VERSION_INT,
4436 };
4437
4438 AVCodec ff_h263_encoder = {
4439     .name           = "h263",
4440     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4441     .type           = AVMEDIA_TYPE_VIDEO,
4442     .id             = AV_CODEC_ID_H263,
4443     .priv_data_size = sizeof(MpegEncContext),
4444     .init           = ff_MPV_encode_init,
4445     .encode2        = ff_MPV_encode_picture,
4446     .close          = ff_MPV_encode_end,
4447     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4448     .priv_class     = &h263_class,
4449 };
4450
4451 static const AVOption h263p_options[] = {
4452     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4453     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4454     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4455     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4456     FF_MPV_COMMON_OPTS
4457     { NULL },
4458 };
4459 static const AVClass h263p_class = {
4460     .class_name = "H.263p encoder",
4461     .item_name  = av_default_item_name,
4462     .option     = h263p_options,
4463     .version    = LIBAVUTIL_VERSION_INT,
4464 };
4465
4466 AVCodec ff_h263p_encoder = {
4467     .name           = "h263p",
4468     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4469     .type           = AVMEDIA_TYPE_VIDEO,
4470     .id             = AV_CODEC_ID_H263P,
4471     .priv_data_size = sizeof(MpegEncContext),
4472     .init           = ff_MPV_encode_init,
4473     .encode2        = ff_MPV_encode_picture,
4474     .close          = ff_MPV_encode_end,
4475     .capabilities   = CODEC_CAP_SLICE_THREADS,
4476     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4477     .priv_class     = &h263p_class,
4478 };
4479
4480 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4481
4482 AVCodec ff_msmpeg4v2_encoder = {
4483     .name           = "msmpeg4v2",
4484     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4485     .type           = AVMEDIA_TYPE_VIDEO,
4486     .id             = AV_CODEC_ID_MSMPEG4V2,
4487     .priv_data_size = sizeof(MpegEncContext),
4488     .init           = ff_MPV_encode_init,
4489     .encode2        = ff_MPV_encode_picture,
4490     .close          = ff_MPV_encode_end,
4491     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4492     .priv_class     = &msmpeg4v2_class,
4493 };
4494
4495 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4496
4497 AVCodec ff_msmpeg4v3_encoder = {
4498     .name           = "msmpeg4",
4499     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4500     .type           = AVMEDIA_TYPE_VIDEO,
4501     .id             = AV_CODEC_ID_MSMPEG4V3,
4502     .priv_data_size = sizeof(MpegEncContext),
4503     .init           = ff_MPV_encode_init,
4504     .encode2        = ff_MPV_encode_picture,
4505     .close          = ff_MPV_encode_end,
4506     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4507     .priv_class     = &msmpeg4v3_class,
4508 };
4509
4510 FF_MPV_GENERIC_CLASS(wmv1)
4511
4512 AVCodec ff_wmv1_encoder = {
4513     .name           = "wmv1",
4514     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4515     .type           = AVMEDIA_TYPE_VIDEO,
4516     .id             = AV_CODEC_ID_WMV1,
4517     .priv_data_size = sizeof(MpegEncContext),
4518     .init           = ff_MPV_encode_init,
4519     .encode2        = ff_MPV_encode_picture,
4520     .close          = ff_MPV_encode_end,
4521     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4522     .priv_class     = &wmv1_class,
4523 };