git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include <stdint.h>
  31
  32 #include "libavutil/internal.h"
  33 #include "libavutil/intmath.h"
  34 #include "libavutil/mathematics.h"
  35 #include "libavutil/pixdesc.h"
  36 #include "libavutil/opt.h"
  37 #include "libavutil/timer.h"
  38 #include "avcodec.h"
  39 #include "dct.h"
  40 #include "dsputil.h"
  41 #include "mpeg12.h"
  42 #include "mpegvideo.h"
  43 #include "h261.h"
  44 #include "h263.h"
  45 #include "mathops.h"
  46 #include "mjpegenc.h"
  47 #include "msmpeg4.h"
  48 #include "faandct.h"
  49 #include "thread.h"
  50 #include "aandcttab.h"
  51 #include "flv.h"
  52 #include "mpeg4video.h"
  53 #include "internal.h"
  54 #include "bytestream.h"
  55 #include <limits.h>
  56 #include "sp5x.h"
  57
  58 static int encode_picture(MpegEncContext *s, int picture_number);
  59 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  60 static int sse_mb(MpegEncContext *s);
  61 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  62 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  63
  64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  66
  67 const AVOption ff_mpv_generic_options[] = {
  68     FF_MPV_COMMON_OPTS
  69     { NULL },
  70 };
  71
  72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  73                        uint16_t (*qmat16)[2][64],
  74                        const uint16_t *quant_matrix,
  75                        int bias, int qmin, int qmax, int intra)
  76 {
  77     int qscale;
  78     int shift = 0;
  79
  80     for (qscale = qmin; qscale <= qmax; qscale++) {
  81         int i;
  82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  83             dsp->fdct == ff_jpeg_fdct_islow_10 ||
  84             dsp->fdct == ff_faandct) {
  85             for (i = 0; i < 64; i++) {
  86                 const int j = dsp->idct_permutation[i];
  87                 /* 16 <= qscale * quant_matrix[i] <= 7905
  88                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  89                  *             19952 <=              x  <= 249205026
  90                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  91                  *           3444240 >= (1 << 36) / (x) >= 275 */
  92
  93                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  94                                         (qscale * quant_matrix[j]));
  95             }
  96         } else if (dsp->fdct == ff_fdct_ifast) {
  97             for (i = 0; i < 64; i++) {
  98                 const int j = dsp->idct_permutation[i];
  99                 /* 16 <= qscale * quant_matrix[i] <= 7905
 100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 101                  *             19952 <=              x  <= 249205026
 102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 103                  *           3444240 >= (1 << 36) / (x) >= 275 */
 104
 105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 106                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
 107             }
 108         } else {
 109             for (i = 0; i < 64; i++) {
 110                 const int j = dsp->idct_permutation[i];
 111                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 112                  * Assume x = qscale * quant_matrix[i]
 113                  * So             16 <=              x  <= 7905
 114                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 115                  * so          32768 >= (1 << 19) / (x) >= 67 */
 116                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 117                                         (qscale * quant_matrix[j]));
 118                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 119                 //                    (qscale * quant_matrix[i]);
 120                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 121                                        (qscale * quant_matrix[j]);
 122
 123                 if (qmat16[qscale][0][i] == 0 ||
 124                     qmat16[qscale][0][i] == 128 * 256)
 125                     qmat16[qscale][0][i] = 128 * 256 - 1;
 126                 qmat16[qscale][1][i] =
 127                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 128                                 qmat16[qscale][0][i]);
 129             }
 130         }
 131
 132         for (i = intra; i < 64; i++) {
 133             int64_t max = 8191;
 134             if (dsp->fdct == ff_fdct_ifast) {
 135                 max = (8191LL * ff_aanscales[i]) >> 14;
 136             }
 137             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 138                 shift++;
 139             }
 140         }
 141     }
 142     if (shift) {
 143         av_log(NULL, AV_LOG_INFO,
 144                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 145                QMAT_SHIFT - shift);
 146     }
 147 }
 148
 149 static inline void update_qscale(MpegEncContext *s)
 150 {
 151     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 152                 (FF_LAMBDA_SHIFT + 7);
 153     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 154
 155     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 156                  FF_LAMBDA_SHIFT;
 157 }
 158
 159 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 160 {
 161     int i;
 162
 163     if (matrix) {
 164         put_bits(pb, 1, 1);
 165         for (i = 0; i < 64; i++) {
 166             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 167         }
 168     } else
 169         put_bits(pb, 1, 0);
 170 }
 171
 172 /**
 173  * init s->current_picture.qscale_table from s->lambda_table
 174  */
 175 void ff_init_qscale_tab(MpegEncContext *s)
 176 {
 177     int8_t * const qscale_table = s->current_picture.qscale_table;
 178     int i;
 179
 180     for (i = 0; i < s->mb_num; i++) {
 181         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 182         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 183         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 184                                                   s->avctx->qmax);
 185     }
 186 }
 187
 188 static void update_duplicate_context_after_me(MpegEncContext *dst,
 189                                               MpegEncContext *src)
 190 {
 191 #define COPY(a) dst->a= src->a
 192     COPY(pict_type);
 193     COPY(current_picture);
 194     COPY(f_code);
 195     COPY(b_code);
 196     COPY(qscale);
 197     COPY(lambda);
 198     COPY(lambda2);
 199     COPY(picture_in_gop_number);
 200     COPY(gop_picture_number);
 201     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 202     COPY(progressive_frame);    // FIXME don't set in encode_header
 203     COPY(partitioned_frame);    // FIXME don't set in encode_header
 204 #undef COPY
 205 }
 206
 207 /**
 208  * Set the given MpegEncContext to defaults for encoding.
 209  * the changed fields will not depend upon the prior state of the MpegEncContext.
 210  */
 211 static void MPV_encode_defaults(MpegEncContext *s)
 212 {
 213     int i;
 214     ff_MPV_common_defaults(s);
 215
 216     for (i = -16; i < 16; i++) {
 217         default_fcode_tab[i + MAX_MV] = 1;
 218     }
 219     s->me.mv_penalty = default_mv_penalty;
 220     s->fcode_tab     = default_fcode_tab;
 221
 222     s->input_picture_number  = 0;
 223     s->picture_in_gop_number = 0;
 224 }
 225
 226 av_cold int ff_dct_encode_init(MpegEncContext *s) {
 227     if (ARCH_X86)
 228         ff_dct_encode_init_x86(s);
 229
 230     if (CONFIG_H263_ENCODER)
 231         ff_h263dsp_init(&s->h263dsp);
 232     if (!s->dct_quantize)
 233         s->dct_quantize = ff_dct_quantize_c;
 234     if (!s->denoise_dct)
 235         s->denoise_dct  = denoise_dct_c;
 236     s->fast_dct_quantize = s->dct_quantize;
 237     if (s->avctx->trellis)
 238         s->dct_quantize  = dct_quantize_trellis_c;
 239
 240     return 0;
 241 }
 242
 243 /* init video encoder */
 244 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
 245 {
 246     MpegEncContext *s = avctx->priv_data;
 247     int i, ret;
 248
 249     MPV_encode_defaults(s);
 250
 251     switch (avctx->codec_id) {
 252     case AV_CODEC_ID_MPEG2VIDEO:
 253         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 254             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
 255             av_log(avctx, AV_LOG_ERROR,
 256                    "only YUV420 and YUV422 are supported\n");
 257             return -1;
 258         }
 259         break;
 260     case AV_CODEC_ID_MJPEG:
 261     case AV_CODEC_ID_AMV:
 262         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
 263             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
 264             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
 265             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
 266               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
 267               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
 268              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 269             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 270             return -1;
 271         }
 272         break;
 273     default:
 274         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
 275             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 276             return -1;
 277         }
 278     }
 279
 280     switch (avctx->pix_fmt) {
 281     case AV_PIX_FMT_YUVJ444P:
 282     case AV_PIX_FMT_YUV444P:
 283         s->chroma_format = CHROMA_444;
 284         break;
 285     case AV_PIX_FMT_YUVJ422P:
 286     case AV_PIX_FMT_YUV422P:
 287         s->chroma_format = CHROMA_422;
 288         break;
 289     case AV_PIX_FMT_YUVJ420P:
 290     case AV_PIX_FMT_YUV420P:
 291     default:
 292         s->chroma_format = CHROMA_420;
 293         break;
 294     }
 295
 296     s->bit_rate = avctx->bit_rate;
 297     s->width    = avctx->width;
 298     s->height   = avctx->height;
 299     if (avctx->gop_size > 600 &&
 300         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 301         av_log(avctx, AV_LOG_WARNING,
 302                "keyframe interval too large!, reducing it from %d to %d\n",
 303                avctx->gop_size, 600);
 304         avctx->gop_size = 600;
 305     }
 306     s->gop_size     = avctx->gop_size;
 307     s->avctx        = avctx;
 308     s->flags        = avctx->flags;
 309     s->flags2       = avctx->flags2;
 310     if (avctx->max_b_frames > MAX_B_FRAMES) {
 311         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 312                "is %d.\n", MAX_B_FRAMES);
 313         avctx->max_b_frames = MAX_B_FRAMES;
 314     }
 315     s->max_b_frames = avctx->max_b_frames;
 316     s->codec_id     = avctx->codec->id;
 317     s->strict_std_compliance = avctx->strict_std_compliance;
 318     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 319     s->mpeg_quant         = avctx->mpeg_quant;
 320     s->rtp_mode           = !!avctx->rtp_payload_size;
 321     s->intra_dc_precision = avctx->intra_dc_precision;
 322     s->user_specified_pts = AV_NOPTS_VALUE;
 323
 324     if (s->gop_size <= 1) {
 325         s->intra_only = 1;
 326         s->gop_size   = 12;
 327     } else {
 328         s->intra_only = 0;
 329     }
 330
 331     s->me_method = avctx->me_method;
 332
 333     /* Fixed QSCALE */
 334     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 335
 336     s->adaptive_quant = (s->avctx->lumi_masking ||
 337                          s->avctx->dark_masking ||
 338                          s->avctx->temporal_cplx_masking ||
 339                          s->avctx->spatial_cplx_masking  ||
 340                          s->avctx->p_masking      ||
 341                          s->avctx->border_masking ||
 342                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 343                         !s->fixed_qscale;
 344
 345     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 346
 347     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 348         switch(avctx->codec_id) {
 349         case AV_CODEC_ID_MPEG1VIDEO:
 350         case AV_CODEC_ID_MPEG2VIDEO:
 351             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
 352             break;
 353         case AV_CODEC_ID_MPEG4:
 354         case AV_CODEC_ID_MSMPEG4V1:
 355         case AV_CODEC_ID_MSMPEG4V2:
 356         case AV_CODEC_ID_MSMPEG4V3:
 357             if       (avctx->rc_max_rate >= 15000000) {
 358                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
 359             } else if(avctx->rc_max_rate >=  2000000) {
 360                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
 361             } else if(avctx->rc_max_rate >=   384000) {
 362                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
 363             } else
 364                 avctx->rc_buffer_size = 40;
 365             avctx->rc_buffer_size *= 16384;
 366             break;
 367         }
 368         if (avctx->rc_buffer_size) {
 369             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 370         }
 371     }
 372
 373     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 374         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 375         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
 376             return -1;
 377     }
 378
 379     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 380         av_log(avctx, AV_LOG_INFO,
 381                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 382     }
 383
 384     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 385         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 386         return -1;
 387     }
 388
 389     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 390         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 391         return -1;
 392     }
 393
 394     if (avctx->rc_max_rate &&
 395         avctx->rc_max_rate == avctx->bit_rate &&
 396         avctx->rc_max_rate != avctx->rc_min_rate) {
 397         av_log(avctx, AV_LOG_INFO,
 398                "impossible bitrate constraints, this will fail\n");
 399     }
 400
 401     if (avctx->rc_buffer_size &&
 402         avctx->bit_rate * (int64_t)avctx->time_base.num >
 403             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 404         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 405         return -1;
 406     }
 407
 408     if (!s->fixed_qscale &&
 409         avctx->bit_rate * av_q2d(avctx->time_base) >
 410             avctx->bit_rate_tolerance) {
 411         av_log(avctx, AV_LOG_ERROR,
 412                "bitrate tolerance %d too small for bitrate %d\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 413         return -1;
 414     }
 415
 416     if (s->avctx->rc_max_rate &&
 417         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 418         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 419          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 420         90000LL * (avctx->rc_buffer_size - 1) >
 421             s->avctx->rc_max_rate * 0xFFFFLL) {
 422         av_log(avctx, AV_LOG_INFO,
 423                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 424                "specified vbv buffer is too large for the given bitrate!\n");
 425     }
 426
 427     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
 428         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 429         s->codec_id != AV_CODEC_ID_FLV1) {
 430         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 431         return -1;
 432     }
 433
 434     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 435         av_log(avctx, AV_LOG_ERROR,
 436                "OBMC is only supported with simple mb decision\n");
 437         return -1;
 438     }
 439
 440     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 441         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 442         return -1;
 443     }
 444
 445     if (s->max_b_frames                    &&
 446         s->codec_id != AV_CODEC_ID_MPEG4      &&
 447         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 448         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 449         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 450         return -1;
 451     }
 452     if (s->max_b_frames < 0) {
 453         av_log(avctx, AV_LOG_ERROR,
 454                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 455         return -1;
 456     }
 457
 458     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 459          s->codec_id == AV_CODEC_ID_H263  ||
 460          s->codec_id == AV_CODEC_ID_H263P) &&
 461         (avctx->sample_aspect_ratio.num > 255 ||
 462          avctx->sample_aspect_ratio.den > 255)) {
 463         av_log(avctx, AV_LOG_WARNING,
 464                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 465                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 466         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 467                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 468     }
 469
 470     if ((s->codec_id == AV_CODEC_ID_H263  ||
 471          s->codec_id == AV_CODEC_ID_H263P) &&
 472         (avctx->width  > 2048 ||
 473          avctx->height > 1152 )) {
 474         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 475         return -1;
 476     }
 477     if ((s->codec_id == AV_CODEC_ID_H263  ||
 478          s->codec_id == AV_CODEC_ID_H263P) &&
 479         ((avctx->width &3) ||
 480          (avctx->height&3) )) {
 481         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
 482         return -1;
 483     }
 484
 485     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
 486         (avctx->width  > 4095 ||
 487          avctx->height > 4095 )) {
 488         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
 489         return -1;
 490     }
 491
 492     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
 493         (avctx->width  > 16383 ||
 494          avctx->height > 16383 )) {
 495         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
 496         return -1;
 497     }
 498
 499     if (s->codec_id == AV_CODEC_ID_RV10 &&
 500         (avctx->width &15 ||
 501          avctx->height&15 )) {
 502         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 503         return AVERROR(EINVAL);
 504     }
 505
 506     if (s->codec_id == AV_CODEC_ID_RV20 &&
 507         (avctx->width &3 ||
 508          avctx->height&3 )) {
 509         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 510         return AVERROR(EINVAL);
 511     }
 512
 513     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 514          s->codec_id == AV_CODEC_ID_WMV2) &&
 515          avctx->width & 1) {
 516          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 517          return -1;
 518     }
 519
 520     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 521         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 522         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 523         return -1;
 524     }
 525
 526     // FIXME mpeg2 uses that too
 527     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
 528                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
 529         av_log(avctx, AV_LOG_ERROR,
 530                "mpeg2 style quantization not supported by codec\n");
 531         return -1;
 532     }
 533
 534     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 535         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 536         return -1;
 537     }
 538
 539     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 540         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 541         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 542         return -1;
 543     }
 544
 545     if (s->avctx->scenechange_threshold < 1000000000 &&
 546         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 547         av_log(avctx, AV_LOG_ERROR,
 548                "closed gop with scene change detection are not supported yet, "
 549                "set threshold to 1000000000\n");
 550         return -1;
 551     }
 552
 553     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 554         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 555             av_log(avctx, AV_LOG_ERROR,
 556                   "low delay forcing is only available for mpeg2\n");
 557             return -1;
 558         }
 559         if (s->max_b_frames != 0) {
 560             av_log(avctx, AV_LOG_ERROR,
 561                    "b frames cannot be used with low delay\n");
 562             return -1;
 563         }
 564     }
 565
 566     if (s->q_scale_type == 1) {
 567         if (avctx->qmax > 12) {
 568             av_log(avctx, AV_LOG_ERROR,
 569                    "non linear quant only supports qmax <= 12 currently\n");
 570             return -1;
 571         }
 572     }
 573
 574     if (s->avctx->thread_count > 1         &&
 575         s->codec_id != AV_CODEC_ID_MPEG4      &&
 576         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
 577         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 578         s->codec_id != AV_CODEC_ID_MJPEG      &&
 579         (s->codec_id != AV_CODEC_ID_H263P)) {
 580         av_log(avctx, AV_LOG_ERROR,
 581                "multi threaded encoding not supported by codec\n");
 582         return -1;
 583     }
 584
 585     if (s->avctx->thread_count < 1) {
 586         av_log(avctx, AV_LOG_ERROR,
 587                "automatic thread number detection not supported by codec, "
 588                "patch welcome\n");
 589         return -1;
 590     }
 591
 592     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
 593         s->rtp_mode = 1;
 594
 595     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
 596         s->h263_slice_structured = 1;
 597
 598     if (!avctx->time_base.den || !avctx->time_base.num) {
 599         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 600         return -1;
 601     }
 602
 603     i = (INT_MAX / 2 + 128) >> 8;
 604     if (avctx->mb_threshold >= i) {
 605         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 606                i - 1);
 607         return -1;
 608     }
 609
 610     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 611         av_log(avctx, AV_LOG_INFO,
 612                "notice: b_frame_strategy only affects the first pass\n");
 613         avctx->b_frame_strategy = 0;
 614     }
 615
 616     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 617     if (i > 1) {
 618         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 619         avctx->time_base.den /= i;
 620         avctx->time_base.num /= i;
 621         //return -1;
 622     }
 623
 624     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
 625         // (a + x * 3 / 8) / x
 626         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 627         s->inter_quant_bias = 0;
 628     } else {
 629         s->intra_quant_bias = 0;
 630         // (a - x / 4) / x
 631         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 632     }
 633
 634     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 635         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 636         return AVERROR(EINVAL);
 637     }
 638
 639     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 640         s->intra_quant_bias = avctx->intra_quant_bias;
 641     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 642         s->inter_quant_bias = avctx->inter_quant_bias;
 643
 644     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 645
 646     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 647         s->avctx->time_base.den > (1 << 16) - 1) {
 648         av_log(avctx, AV_LOG_ERROR,
 649                "timebase %d/%d not supported by MPEG 4 standard, "
 650                "the maximum admitted value for the timebase denominator "
 651                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 652                (1 << 16) - 1);
 653         return -1;
 654     }
 655     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 656
 657     switch (avctx->codec->id) {
 658     case AV_CODEC_ID_MPEG1VIDEO:
 659         s->out_format = FMT_MPEG1;
 660         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 661         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 662         break;
 663     case AV_CODEC_ID_MPEG2VIDEO:
 664         s->out_format = FMT_MPEG1;
 665         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 666         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 667         s->rtp_mode   = 1;
 668         break;
 669     case AV_CODEC_ID_MJPEG:
 670     case AV_CODEC_ID_AMV:
 671         s->out_format = FMT_MJPEG;
 672         s->intra_only = 1; /* force intra only for jpeg */
 673         if (!CONFIG_MJPEG_ENCODER ||
 674             ff_mjpeg_encode_init(s) < 0)
 675             return -1;
 676         avctx->delay = 0;
 677         s->low_delay = 1;
 678         break;
 679     case AV_CODEC_ID_H261:
 680         if (!CONFIG_H261_ENCODER)
 681             return -1;
 682         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 683             av_log(avctx, AV_LOG_ERROR,
 684                    "The specified picture size of %dx%d is not valid for the "
 685                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 686                     s->width, s->height);
 687             return -1;
 688         }
 689         s->out_format = FMT_H261;
 690         avctx->delay  = 0;
 691         s->low_delay  = 1;
 692         break;
 693     case AV_CODEC_ID_H263:
 694         if (!CONFIG_H263_ENCODER)
 695             return -1;
 696         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 697                              s->width, s->height) == 8) {
 698             av_log(avctx, AV_LOG_ERROR,
 699                    "The specified picture size of %dx%d is not valid for "
 700                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 701                    "352x288, 704x576, and 1408x1152. "
 702                    "Try H.263+.\n", s->width, s->height);
 703             return -1;
 704         }
 705         s->out_format = FMT_H263;
 706         avctx->delay  = 0;
 707         s->low_delay  = 1;
 708         break;
 709     case AV_CODEC_ID_H263P:
 710         s->out_format = FMT_H263;
 711         s->h263_plus  = 1;
 712         /* Fx */
 713         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 714         s->modified_quant  = s->h263_aic;
 715         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 716         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 717
 718         /* /Fx */
 719         /* These are just to be sure */
 720         avctx->delay = 0;
 721         s->low_delay = 1;
 722         break;
 723     case AV_CODEC_ID_FLV1:
 724         s->out_format      = FMT_H263;
 725         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 726         s->unrestricted_mv = 1;
 727         s->rtp_mode  = 0; /* don't allow GOB */
 728         avctx->delay = 0;
 729         s->low_delay = 1;
 730         break;
 731     case AV_CODEC_ID_RV10:
 732         s->out_format = FMT_H263;
 733         avctx->delay  = 0;
 734         s->low_delay  = 1;
 735         break;
 736     case AV_CODEC_ID_RV20:
 737         s->out_format      = FMT_H263;
 738         avctx->delay       = 0;
 739         s->low_delay       = 1;
 740         s->modified_quant  = 1;
 741         s->h263_aic        = 1;
 742         s->h263_plus       = 1;
 743         s->loop_filter     = 1;
 744         s->unrestricted_mv = 0;
 745         break;
 746     case AV_CODEC_ID_MPEG4:
 747         s->out_format      = FMT_H263;
 748         s->h263_pred       = 1;
 749         s->unrestricted_mv = 1;
 750         s->low_delay       = s->max_b_frames ? 0 : 1;
 751         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 752         break;
 753     case AV_CODEC_ID_MSMPEG4V2:
 754         s->out_format      = FMT_H263;
 755         s->h263_pred       = 1;
 756         s->unrestricted_mv = 1;
 757         s->msmpeg4_version = 2;
 758         avctx->delay       = 0;
 759         s->low_delay       = 1;
 760         break;
 761     case AV_CODEC_ID_MSMPEG4V3:
 762         s->out_format        = FMT_H263;
 763         s->h263_pred         = 1;
 764         s->unrestricted_mv   = 1;
 765         s->msmpeg4_version   = 3;
 766         s->flipflop_rounding = 1;
 767         avctx->delay         = 0;
 768         s->low_delay         = 1;
 769         break;
 770     case AV_CODEC_ID_WMV1:
 771         s->out_format        = FMT_H263;
 772         s->h263_pred         = 1;
 773         s->unrestricted_mv   = 1;
 774         s->msmpeg4_version   = 4;
 775         s->flipflop_rounding = 1;
 776         avctx->delay         = 0;
 777         s->low_delay         = 1;
 778         break;
 779     case AV_CODEC_ID_WMV2:
 780         s->out_format        = FMT_H263;
 781         s->h263_pred         = 1;
 782         s->unrestricted_mv   = 1;
 783         s->msmpeg4_version   = 5;
 784         s->flipflop_rounding = 1;
 785         avctx->delay         = 0;
 786         s->low_delay         = 1;
 787         break;
 788     default:
 789         return -1;
 790     }
 791
 792     avctx->has_b_frames = !s->low_delay;
 793
 794     s->encoding = 1;
 795
 796     s->progressive_frame    =
 797     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 798                                                 CODEC_FLAG_INTERLACED_ME) ||
 799                                 s->alternate_scan);
 800
 801     /* init */
 802     if (ff_MPV_common_init(s) < 0)
 803         return -1;
 804
 805     s->avctx->coded_frame = &s->current_picture.f;
 806
 807     if (s->msmpeg4_version) {
 808         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
 809                           2 * 2 * (MAX_LEVEL + 1) *
 810                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
 811     }
 812     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
 813
 814     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
 815     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
 816     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
 817     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 818     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 819     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
 820     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
 821                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 822     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
 823                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
 824
 825     if (s->avctx->noise_reduction) {
 826         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
 827                           2 * 64 * sizeof(uint16_t), fail);
 828     }
 829
 830     ff_dct_encode_init(s);
 831
 832     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 833         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 834
 835     s->quant_precision = 5;
 836
 837     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 838     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 839
 840     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 841         ff_h261_encode_init(s);
 842     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 843         ff_h263_encode_init(s);
 844     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 845         ff_msmpeg4_encode_init(s);
 846     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 847         && s->out_format == FMT_MPEG1)
 848         ff_mpeg1_encode_init(s);
 849
 850     /* init q matrix */
 851     for (i = 0; i < 64; i++) {
 852         int j = s->dsp.idct_permutation[i];
 853         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 854             s->mpeg_quant) {
 855             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 856             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 857         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 858             s->intra_matrix[j] =
 859             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 860         } else {
 861             /* mpeg1/2 */
 862             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 863             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 864         }
 865         if (s->avctx->intra_matrix)
 866             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 867         if (s->avctx->inter_matrix)
 868             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 869     }
 870
 871     /* precompute matrix */
 872     /* for mjpeg, we do include qscale in the matrix */
 873     if (s->out_format != FMT_MJPEG) {
 874         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 875                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 876                           31, 1);
 877         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 878                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 879                           31, 0);
 880     }
 881
 882     if (ff_rate_control_init(s) < 0)
 883         return -1;
 884
 885 #if FF_API_ERROR_RATE
 886     FF_DISABLE_DEPRECATION_WARNINGS
 887     if (avctx->error_rate)
 888         s->error_rate = avctx->error_rate;
 889     FF_ENABLE_DEPRECATION_WARNINGS;
 890 #endif
 891
 892     if (avctx->b_frame_strategy == 2) {
 893         for (i = 0; i < s->max_b_frames + 2; i++) {
 894             s->tmp_frames[i] = av_frame_alloc();
 895             if (!s->tmp_frames[i])
 896                 return AVERROR(ENOMEM);
 897
 898             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
 899             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
 900             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
 901
 902             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
 903             if (ret < 0)
 904                 return ret;
 905         }
 906     }
 907
 908     return 0;
 909 fail:
 910     ff_MPV_encode_end(avctx);
 911     return AVERROR_UNKNOWN;
 912 }
 913
 914 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
 915 {
 916     MpegEncContext *s = avctx->priv_data;
 917     int i;
 918
 919     ff_rate_control_uninit(s);
 920
 921     ff_MPV_common_end(s);
 922     if (CONFIG_MJPEG_ENCODER &&
 923         s->out_format == FMT_MJPEG)
 924         ff_mjpeg_encode_close(s);
 925
 926     av_freep(&avctx->extradata);
 927
 928     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
 929         av_frame_free(&s->tmp_frames[i]);
 930
 931     ff_free_picture_tables(&s->new_picture);
 932     ff_mpeg_unref_picture(s, &s->new_picture);
 933
 934     av_freep(&s->avctx->stats_out);
 935     av_freep(&s->ac_stats);
 936
 937     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
 938     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
 939     s->q_chroma_intra_matrix=   NULL;
 940     s->q_chroma_intra_matrix16= NULL;
 941     av_freep(&s->q_intra_matrix);
 942     av_freep(&s->q_inter_matrix);
 943     av_freep(&s->q_intra_matrix16);
 944     av_freep(&s->q_inter_matrix16);
 945     av_freep(&s->input_picture);
 946     av_freep(&s->reordered_input_picture);
 947     av_freep(&s->dct_offset);
 948
 949     return 0;
 950 }
 951
 952 static int get_sae(uint8_t *src, int ref, int stride)
 953 {
 954     int x,y;
 955     int acc = 0;
 956
 957     for (y = 0; y < 16; y++) {
 958         for (x = 0; x < 16; x++) {
 959             acc += FFABS(src[x + y * stride] - ref);
 960         }
 961     }
 962
 963     return acc;
 964 }
 965
 966 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 967                            uint8_t *ref, int stride)
 968 {
 969     int x, y, w, h;
 970     int acc = 0;
 971
 972     w = s->width  & ~15;
 973     h = s->height & ~15;
 974
 975     for (y = 0; y < h; y += 16) {
 976         for (x = 0; x < w; x += 16) {
 977             int offset = x + y * stride;
 978             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 979                                      16);
 980             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 981             int sae  = get_sae(src + offset, mean, stride);
 982
 983             acc += sae + 500 < sad;
 984         }
 985     }
 986     return acc;
 987 }
 988
 989
 990 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
 991 {
 992     Picture *pic = NULL;
 993     int64_t pts;
 994     int i, display_picture_number = 0, ret;
 995     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
 996                                                  (s->low_delay ? 0 : 1);
 997     int direct = 1;
 998
 999     if (pic_arg) {
1000         pts = pic_arg->pts;
1001         display_picture_number = s->input_picture_number++;
1002
1003         if (pts != AV_NOPTS_VALUE) {
1004             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1005                 int64_t last = s->user_specified_pts;
1006
1007                 if (pts <= last) {
1008                     av_log(s->avctx, AV_LOG_ERROR,
1009                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1010                            pts, last);
1011                     return AVERROR(EINVAL);
1012                 }
1013
1014                 if (!s->low_delay && display_picture_number == 1)
1015                     s->dts_delta = pts - last;
1016             }
1017             s->user_specified_pts = pts;
1018         } else {
1019             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1020                 s->user_specified_pts =
1021                 pts = s->user_specified_pts + 1;
1022                 av_log(s->avctx, AV_LOG_INFO,
1023                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1024                        pts);
1025             } else {
1026                 pts = display_picture_number;
1027             }
1028         }
1029     }
1030
1031     if (pic_arg) {
1032         if (!pic_arg->buf[0])
1033             direct = 0;
1034         if (pic_arg->linesize[0] != s->linesize)
1035             direct = 0;
1036         if (pic_arg->linesize[1] != s->uvlinesize)
1037             direct = 0;
1038         if (pic_arg->linesize[2] != s->uvlinesize)
1039             direct = 0;
1040         if ((s->width & 15) || (s->height & 15))
1041             direct = 0;
1042
1043         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1044                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1045
1046         if (direct) {
1047             i = ff_find_unused_picture(s, 1);
1048             if (i < 0)
1049                 return i;
1050
1051             pic = &s->picture[i];
1052             pic->reference = 3;
1053
1054             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
1055                 return ret;
1056             if (ff_alloc_picture(s, pic, 1) < 0) {
1057                 return -1;
1058             }
1059         } else {
1060             i = ff_find_unused_picture(s, 0);
1061             if (i < 0)
1062                 return i;
1063
1064             pic = &s->picture[i];
1065             pic->reference = 3;
1066
1067             if (ff_alloc_picture(s, pic, 0) < 0) {
1068                 return -1;
1069             }
1070
1071             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1072                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1073                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1074                 // empty
1075             } else {
1076                 int h_chroma_shift, v_chroma_shift;
1077                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1078                                                  &h_chroma_shift,
1079                                                  &v_chroma_shift);
1080
1081                 for (i = 0; i < 3; i++) {
1082                     int src_stride = pic_arg->linesize[i];
1083                     int dst_stride = i ? s->uvlinesize : s->linesize;
1084                     int h_shift = i ? h_chroma_shift : 0;
1085                     int v_shift = i ? v_chroma_shift : 0;
1086                     int w = s->width  >> h_shift;
1087                     int h = s->height >> v_shift;
1088                     uint8_t *src = pic_arg->data[i];
1089                     uint8_t *dst = pic->f.data[i];
1090
1091                     if (s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1092                         h = ((s->height + 15)/16*16) >> v_shift;
1093                     }
1094
1095                     if (!s->avctx->rc_buffer_size)
1096                         dst += INPLACE_OFFSET;
1097
1098                     if (src_stride == dst_stride)
1099                         memcpy(dst, src, src_stride * h);
1100                     else {
1101                         int h2 = h;
1102                         uint8_t *dst2 = dst;
1103                         while (h2--) {
1104                             memcpy(dst2, src, w);
1105                             dst2 += dst_stride;
1106                             src += src_stride;
1107                         }
1108                     }
1109                     if ((s->width & 15) || (s->height & 15)) {
1110                         s->dsp.draw_edges(dst, dst_stride,
1111                                           w, h,
1112                                           16>>h_shift,
1113                                           16>>v_shift,
1114                                           EDGE_BOTTOM);
1115                     }
1116                 }
1117             }
1118         }
1119         ret = av_frame_copy_props(&pic->f, pic_arg);
1120         if (ret < 0)
1121             return ret;
1122
1123         pic->f.display_picture_number = display_picture_number;
1124         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
1125     }
1126
1127     /* shift buffer entries */
1128     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1129         s->input_picture[i - 1] = s->input_picture[i];
1130
1131     s->input_picture[encoding_delay] = (Picture*) pic;
1132
1133     return 0;
1134 }
1135
1136 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1137 {
1138     int x, y, plane;
1139     int score = 0;
1140     int64_t score64 = 0;
1141
1142     for (plane = 0; plane < 3; plane++) {
1143         const int stride = p->f.linesize[plane];
1144         const int bw = plane ? 1 : 2;
1145         for (y = 0; y < s->mb_height * bw; y++) {
1146             for (x = 0; x < s->mb_width * bw; x++) {
1147                 int off = p->shared ? 0 : 16;
1148                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1149                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1150                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1151
1152                 switch (FFABS(s->avctx->frame_skip_exp)) {
1153                 case 0: score    =  FFMAX(score, v);          break;
1154                 case 1: score   += FFABS(v);                  break;
1155                 case 2: score64 += v * (int64_t)v;                       break;
1156                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1157                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1158                 }
1159             }
1160         }
1161     }
1162     emms_c();
1163
1164     if (score)
1165         score64 = score;
1166     if (s->avctx->frame_skip_exp < 0)
1167         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1168                       -1.0/s->avctx->frame_skip_exp);
1169
1170     if (score64 < s->avctx->frame_skip_threshold)
1171         return 1;
1172     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1173         return 1;
1174     return 0;
1175 }
1176
1177 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1178 {
1179     AVPacket pkt = { 0 };
1180     int ret, got_output;
1181
1182     av_init_packet(&pkt);
1183     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1184     if (ret < 0)
1185         return ret;
1186
1187     ret = pkt.size;
1188     av_free_packet(&pkt);
1189     return ret;
1190 }
1191
1192 static int estimate_best_b_count(MpegEncContext *s)
1193 {
1194     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1195     AVCodecContext *c = avcodec_alloc_context3(NULL);
1196     const int scale = s->avctx->brd_scale;
1197     int i, j, out_size, p_lambda, b_lambda, lambda2;
1198     int64_t best_rd  = INT64_MAX;
1199     int best_b_count = -1;
1200
1201     av_assert0(scale >= 0 && scale <= 3);
1202
1203     //emms_c();
1204     //s->next_picture_ptr->quality;
1205     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1206     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1207     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1208     if (!b_lambda) // FIXME we should do this somewhere else
1209         b_lambda = p_lambda;
1210     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1211                FF_LAMBDA_SHIFT;
1212
1213     c->width        = s->width  >> scale;
1214     c->height       = s->height >> scale;
1215     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1216                       CODEC_FLAG_INPUT_PRESERVED;
1217     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1218     c->mb_decision  = s->avctx->mb_decision;
1219     c->me_cmp       = s->avctx->me_cmp;
1220     c->mb_cmp       = s->avctx->mb_cmp;
1221     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1222     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1223     c->time_base    = s->avctx->time_base;
1224     c->max_b_frames = s->max_b_frames;
1225
1226     if (avcodec_open2(c, codec, NULL) < 0)
1227         return -1;
1228
1229     for (i = 0; i < s->max_b_frames + 2; i++) {
1230         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1231                                                 s->next_picture_ptr;
1232
1233         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1234             pre_input = *pre_input_ptr;
1235
1236             if (!pre_input.shared && i) {
1237                 pre_input.f.data[0] += INPLACE_OFFSET;
1238                 pre_input.f.data[1] += INPLACE_OFFSET;
1239                 pre_input.f.data[2] += INPLACE_OFFSET;
1240             }
1241
1242             s->dsp.shrink[scale](s->tmp_frames[i]->data[0], s->tmp_frames[i]->linesize[0],
1243                                  pre_input.f.data[0], pre_input.f.linesize[0],
1244                                  c->width,      c->height);
1245             s->dsp.shrink[scale](s->tmp_frames[i]->data[1], s->tmp_frames[i]->linesize[1],
1246                                  pre_input.f.data[1], pre_input.f.linesize[1],
1247                                  c->width >> 1, c->height >> 1);
1248             s->dsp.shrink[scale](s->tmp_frames[i]->data[2], s->tmp_frames[i]->linesize[2],
1249                                  pre_input.f.data[2], pre_input.f.linesize[2],
1250                                  c->width >> 1, c->height >> 1);
1251         }
1252     }
1253
1254     for (j = 0; j < s->max_b_frames + 1; j++) {
1255         int64_t rd = 0;
1256
1257         if (!s->input_picture[j])
1258             break;
1259
1260         c->error[0] = c->error[1] = c->error[2] = 0;
1261
1262         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1263         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1264
1265         out_size = encode_frame(c, s->tmp_frames[0]);
1266
1267         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1268
1269         for (i = 0; i < s->max_b_frames + 1; i++) {
1270             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1271
1272             s->tmp_frames[i + 1]->pict_type = is_p ?
1273                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1274             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1275
1276             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1277
1278             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1279         }
1280
1281         /* get the delayed frames */
1282         while (out_size) {
1283             out_size = encode_frame(c, NULL);
1284             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1285         }
1286
1287         rd += c->error[0] + c->error[1] + c->error[2];
1288
1289         if (rd < best_rd) {
1290             best_rd = rd;
1291             best_b_count = j;
1292         }
1293     }
1294
1295     avcodec_close(c);
1296     av_freep(&c);
1297
1298     return best_b_count;
1299 }
1300
1301 static int select_input_picture(MpegEncContext *s)
1302 {
1303     int i, ret;
1304
1305     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1306         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1307     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1308
1309     /* set next picture type & ordering */
1310     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1311         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1312             if (s->picture_in_gop_number < s->gop_size &&
1313                 s->next_picture_ptr &&
1314                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1315                 // FIXME check that te gop check above is +-1 correct
1316                 av_frame_unref(&s->input_picture[0]->f);
1317
1318                 ff_vbv_update(s, 0);
1319
1320                 goto no_output_pic;
1321             }
1322         }
1323
1324         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1325             s->next_picture_ptr == NULL || s->intra_only) {
1326             s->reordered_input_picture[0] = s->input_picture[0];
1327             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1328             s->reordered_input_picture[0]->f.coded_picture_number =
1329                 s->coded_picture_number++;
1330         } else {
1331             int b_frames;
1332
1333             if (s->flags & CODEC_FLAG_PASS2) {
1334                 for (i = 0; i < s->max_b_frames + 1; i++) {
1335                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1336
1337                     if (pict_num >= s->rc_context.num_entries)
1338                         break;
1339                     if (!s->input_picture[i]) {
1340                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1341                         break;
1342                     }
1343
1344                     s->input_picture[i]->f.pict_type =
1345                         s->rc_context.entry[pict_num].new_pict_type;
1346                 }
1347             }
1348
1349             if (s->avctx->b_frame_strategy == 0) {
1350                 b_frames = s->max_b_frames;
1351                 while (b_frames && !s->input_picture[b_frames])
1352                     b_frames--;
1353             } else if (s->avctx->b_frame_strategy == 1) {
1354                 for (i = 1; i < s->max_b_frames + 1; i++) {
1355                     if (s->input_picture[i] &&
1356                         s->input_picture[i]->b_frame_score == 0) {
1357                         s->input_picture[i]->b_frame_score =
1358                             get_intra_count(s,
1359                                             s->input_picture[i    ]->f.data[0],
1360                                             s->input_picture[i - 1]->f.data[0],
1361                                             s->linesize) + 1;
1362                     }
1363                 }
1364                 for (i = 0; i < s->max_b_frames + 1; i++) {
1365                     if (s->input_picture[i] == NULL ||
1366                         s->input_picture[i]->b_frame_score - 1 >
1367                             s->mb_num / s->avctx->b_sensitivity)
1368                         break;
1369                 }
1370
1371                 b_frames = FFMAX(0, i - 1);
1372
1373                 /* reset scores */
1374                 for (i = 0; i < b_frames + 1; i++) {
1375                     s->input_picture[i]->b_frame_score = 0;
1376                 }
1377             } else if (s->avctx->b_frame_strategy == 2) {
1378                 b_frames = estimate_best_b_count(s);
1379             } else {
1380                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1381                 b_frames = 0;
1382             }
1383
1384             emms_c();
1385
1386             for (i = b_frames - 1; i >= 0; i--) {
1387                 int type = s->input_picture[i]->f.pict_type;
1388                 if (type && type != AV_PICTURE_TYPE_B)
1389                     b_frames = i;
1390             }
1391             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1392                 b_frames == s->max_b_frames) {
1393                 av_log(s->avctx, AV_LOG_ERROR,
1394                        "warning, too many b frames in a row\n");
1395             }
1396
1397             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1398                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1399                     s->gop_size > s->picture_in_gop_number) {
1400                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1401                 } else {
1402                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1403                         b_frames = 0;
1404                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1405                 }
1406             }
1407
1408             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1409                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1410                 b_frames--;
1411
1412             s->reordered_input_picture[0] = s->input_picture[b_frames];
1413             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1414                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1415             s->reordered_input_picture[0]->f.coded_picture_number =
1416                 s->coded_picture_number++;
1417             for (i = 0; i < b_frames; i++) {
1418                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1419                 s->reordered_input_picture[i + 1]->f.pict_type =
1420                     AV_PICTURE_TYPE_B;
1421                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1422                     s->coded_picture_number++;
1423             }
1424         }
1425     }
1426 no_output_pic:
1427     if (s->reordered_input_picture[0]) {
1428         s->reordered_input_picture[0]->reference =
1429            s->reordered_input_picture[0]->f.pict_type !=
1430                AV_PICTURE_TYPE_B ? 3 : 0;
1431
1432         ff_mpeg_unref_picture(s, &s->new_picture);
1433         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1434             return ret;
1435
1436         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1437             // input is a shared pix, so we can't modifiy it -> alloc a new
1438             // one & ensure that the shared one is reuseable
1439
1440             Picture *pic;
1441             int i = ff_find_unused_picture(s, 0);
1442             if (i < 0)
1443                 return i;
1444             pic = &s->picture[i];
1445
1446             pic->reference = s->reordered_input_picture[0]->reference;
1447             if (ff_alloc_picture(s, pic, 0) < 0) {
1448                 return -1;
1449             }
1450
1451             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1452             if (ret < 0)
1453                 return ret;
1454
1455             /* mark us unused / free shared pic */
1456             av_frame_unref(&s->reordered_input_picture[0]->f);
1457             s->reordered_input_picture[0]->shared = 0;
1458
1459             s->current_picture_ptr = pic;
1460         } else {
1461             // input is not a shared pix -> reuse buffer for current_pix
1462             s->current_picture_ptr = s->reordered_input_picture[0];
1463             for (i = 0; i < 4; i++) {
1464                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1465             }
1466         }
1467         ff_mpeg_unref_picture(s, &s->current_picture);
1468         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1469                                        s->current_picture_ptr)) < 0)
1470             return ret;
1471
1472         s->picture_number = s->new_picture.f.display_picture_number;
1473     } else {
1474         ff_mpeg_unref_picture(s, &s->new_picture);
1475     }
1476     return 0;
1477 }
1478
1479 static void frame_end(MpegEncContext *s)
1480 {
1481     if (s->unrestricted_mv &&
1482         s->current_picture.reference &&
1483         !s->intra_only) {
1484         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1485         int hshift = desc->log2_chroma_w;
1486         int vshift = desc->log2_chroma_h;
1487         s->dsp.draw_edges(s->current_picture.f.data[0], s->current_picture.f.linesize[0],
1488                           s->h_edge_pos, s->v_edge_pos,
1489                           EDGE_WIDTH, EDGE_WIDTH,
1490                           EDGE_TOP | EDGE_BOTTOM);
1491         s->dsp.draw_edges(s->current_picture.f.data[1], s->current_picture.f.linesize[1],
1492                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1493                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1494                           EDGE_TOP | EDGE_BOTTOM);
1495         s->dsp.draw_edges(s->current_picture.f.data[2], s->current_picture.f.linesize[2],
1496                           s->h_edge_pos >> hshift, s->v_edge_pos >> vshift,
1497                           EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1498                           EDGE_TOP | EDGE_BOTTOM);
1499     }
1500
1501     emms_c();
1502
1503     s->last_pict_type                 = s->pict_type;
1504     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f.quality;
1505     if (s->pict_type!= AV_PICTURE_TYPE_B)
1506         s->last_non_b_pict_type = s->pict_type;
1507
1508     s->avctx->coded_frame = &s->current_picture_ptr->f;
1509
1510 }
1511
1512 static void update_noise_reduction(MpegEncContext *s)
1513 {
1514     int intra, i;
1515
1516     for (intra = 0; intra < 2; intra++) {
1517         if (s->dct_count[intra] > (1 << 16)) {
1518             for (i = 0; i < 64; i++) {
1519                 s->dct_error_sum[intra][i] >>= 1;
1520             }
1521             s->dct_count[intra] >>= 1;
1522         }
1523
1524         for (i = 0; i < 64; i++) {
1525             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1526                                        s->dct_count[intra] +
1527                                        s->dct_error_sum[intra][i] / 2) /
1528                                       (s->dct_error_sum[intra][i] + 1);
1529         }
1530     }
1531 }
1532
1533 static int frame_start(MpegEncContext *s)
1534 {
1535     int ret;
1536
1537     /* mark & release old frames */
1538     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1539         s->last_picture_ptr != s->next_picture_ptr &&
1540         s->last_picture_ptr->f.buf[0]) {
1541         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1542     }
1543
1544     s->current_picture_ptr->f.pict_type = s->pict_type;
1545     s->current_picture_ptr->f.key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1546
1547     ff_mpeg_unref_picture(s, &s->current_picture);
1548     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1549                                    s->current_picture_ptr)) < 0)
1550         return ret;
1551
1552     if (s->pict_type != AV_PICTURE_TYPE_B) {
1553         s->last_picture_ptr = s->next_picture_ptr;
1554         if (!s->droppable)
1555             s->next_picture_ptr = s->current_picture_ptr;
1556     }
1557
1558     if (s->last_picture_ptr) {
1559         ff_mpeg_unref_picture(s, &s->last_picture);
1560         if (s->last_picture_ptr->f.buf[0] &&
1561             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1562                                        s->last_picture_ptr)) < 0)
1563             return ret;
1564     }
1565     if (s->next_picture_ptr) {
1566         ff_mpeg_unref_picture(s, &s->next_picture);
1567         if (s->next_picture_ptr->f.buf[0] &&
1568             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1569                                        s->next_picture_ptr)) < 0)
1570             return ret;
1571     }
1572
1573     if (s->picture_structure!= PICT_FRAME) {
1574         int i;
1575         for (i = 0; i < 4; i++) {
1576             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1577                 s->current_picture.f.data[i] +=
1578                     s->current_picture.f.linesize[i];
1579             }
1580             s->current_picture.f.linesize[i] *= 2;
1581             s->last_picture.f.linesize[i]    *= 2;
1582             s->next_picture.f.linesize[i]    *= 2;
1583         }
1584     }
1585
1586     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1587         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1588         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1589     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1590         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1591         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1592     } else {
1593         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1594         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1595     }
1596
1597     if (s->dct_error_sum) {
1598         av_assert2(s->avctx->noise_reduction && s->encoding);
1599         update_noise_reduction(s);
1600     }
1601
1602     return 0;
1603 }
1604
1605 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1606                           AVFrame *pic_arg, int *got_packet)
1607 {
1608     MpegEncContext *s = avctx->priv_data;
1609     int i, stuffing_count, ret;
1610     int context_count = s->slice_context_count;
1611
1612     s->picture_in_gop_number++;
1613
1614     if (load_input_picture(s, pic_arg) < 0)
1615         return -1;
1616
1617     if (select_input_picture(s) < 0) {
1618         return -1;
1619     }
1620
1621     /* output? */
1622     if (s->new_picture.f.data[0]) {
1623         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1624             return ret;
1625         if (s->mb_info) {
1626             s->mb_info_ptr = av_packet_new_side_data(pkt,
1627                                  AV_PKT_DATA_H263_MB_INFO,
1628                                  s->mb_width*s->mb_height*12);
1629             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1630         }
1631
1632         for (i = 0; i < context_count; i++) {
1633             int start_y = s->thread_context[i]->start_mb_y;
1634             int   end_y = s->thread_context[i]->  end_mb_y;
1635             int h       = s->mb_height;
1636             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1637             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1638
1639             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1640         }
1641
1642         s->pict_type = s->new_picture.f.pict_type;
1643         //emms_c();
1644         ret = frame_start(s);
1645         if (ret < 0)
1646             return ret;
1647 vbv_retry:
1648         if (encode_picture(s, s->picture_number) < 0)
1649             return -1;
1650
1651         avctx->header_bits = s->header_bits;
1652         avctx->mv_bits     = s->mv_bits;
1653         avctx->misc_bits   = s->misc_bits;
1654         avctx->i_tex_bits  = s->i_tex_bits;
1655         avctx->p_tex_bits  = s->p_tex_bits;
1656         avctx->i_count     = s->i_count;
1657         // FIXME f/b_count in avctx
1658         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1659         avctx->skip_count  = s->skip_count;
1660
1661         frame_end(s);
1662
1663         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1664             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1665
1666         if (avctx->rc_buffer_size) {
1667             RateControlContext *rcc = &s->rc_context;
1668             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1669
1670             if (put_bits_count(&s->pb) > max_size &&
1671                 s->lambda < s->avctx->lmax) {
1672                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1673                                        (s->qscale + 1) / s->qscale);
1674                 if (s->adaptive_quant) {
1675                     int i;
1676                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1677                         s->lambda_table[i] =
1678                             FFMAX(s->lambda_table[i] + 1,
1679                                   s->lambda_table[i] * (s->qscale + 1) /
1680                                   s->qscale);
1681                 }
1682                 s->mb_skipped = 0;        // done in frame_start()
1683                 // done in encode_picture() so we must undo it
1684                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1685                     if (s->flipflop_rounding          ||
1686                         s->codec_id == AV_CODEC_ID_H263P ||
1687                         s->codec_id == AV_CODEC_ID_MPEG4)
1688                         s->no_rounding ^= 1;
1689                 }
1690                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1691                     s->time_base       = s->last_time_base;
1692                     s->last_non_b_time = s->time - s->pp_time;
1693                 }
1694                 for (i = 0; i < context_count; i++) {
1695                     PutBitContext *pb = &s->thread_context[i]->pb;
1696                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1697                 }
1698                 goto vbv_retry;
1699             }
1700
1701             assert(s->avctx->rc_max_rate);
1702         }
1703
1704         if (s->flags & CODEC_FLAG_PASS1)
1705             ff_write_pass1_stats(s);
1706
1707         for (i = 0; i < 4; i++) {
1708             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1709             avctx->error[i] += s->current_picture_ptr->f.error[i];
1710         }
1711
1712         if (s->flags & CODEC_FLAG_PASS1)
1713             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1714                    avctx->i_tex_bits + avctx->p_tex_bits ==
1715                        put_bits_count(&s->pb));
1716         flush_put_bits(&s->pb);
1717         s->frame_bits  = put_bits_count(&s->pb);
1718
1719         stuffing_count = ff_vbv_update(s, s->frame_bits);
1720         s->stuffing_bits = 8*stuffing_count;
1721         if (stuffing_count) {
1722             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1723                     stuffing_count + 50) {
1724                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1725                 return -1;
1726             }
1727
1728             switch (s->codec_id) {
1729             case AV_CODEC_ID_MPEG1VIDEO:
1730             case AV_CODEC_ID_MPEG2VIDEO:
1731                 while (stuffing_count--) {
1732                     put_bits(&s->pb, 8, 0);
1733                 }
1734             break;
1735             case AV_CODEC_ID_MPEG4:
1736                 put_bits(&s->pb, 16, 0);
1737                 put_bits(&s->pb, 16, 0x1C3);
1738                 stuffing_count -= 4;
1739                 while (stuffing_count--) {
1740                     put_bits(&s->pb, 8, 0xFF);
1741                 }
1742             break;
1743             default:
1744                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1745             }
1746             flush_put_bits(&s->pb);
1747             s->frame_bits  = put_bits_count(&s->pb);
1748         }
1749
1750         /* update mpeg1/2 vbv_delay for CBR */
1751         if (s->avctx->rc_max_rate                          &&
1752             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1753             s->out_format == FMT_MPEG1                     &&
1754             90000LL * (avctx->rc_buffer_size - 1) <=
1755                 s->avctx->rc_max_rate * 0xFFFFLL) {
1756             int vbv_delay, min_delay;
1757             double inbits  = s->avctx->rc_max_rate *
1758                              av_q2d(s->avctx->time_base);
1759             int    minbits = s->frame_bits - 8 *
1760                              (s->vbv_delay_ptr - s->pb.buf - 1);
1761             double bits    = s->rc_context.buffer_index + minbits - inbits;
1762
1763             if (bits < 0)
1764                 av_log(s->avctx, AV_LOG_ERROR,
1765                        "Internal error, negative bits\n");
1766
1767             assert(s->repeat_first_field == 0);
1768
1769             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1770             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1771                         s->avctx->rc_max_rate;
1772
1773             vbv_delay = FFMAX(vbv_delay, min_delay);
1774
1775             av_assert0(vbv_delay < 0xFFFF);
1776
1777             s->vbv_delay_ptr[0] &= 0xF8;
1778             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1779             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1780             s->vbv_delay_ptr[2] &= 0x07;
1781             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1782             avctx->vbv_delay     = vbv_delay * 300;
1783         }
1784         s->total_bits     += s->frame_bits;
1785         avctx->frame_bits  = s->frame_bits;
1786
1787         pkt->pts = s->current_picture.f.pts;
1788         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1789             if (!s->current_picture.f.coded_picture_number)
1790                 pkt->dts = pkt->pts - s->dts_delta;
1791             else
1792                 pkt->dts = s->reordered_pts;
1793             s->reordered_pts = pkt->pts;
1794         } else
1795             pkt->dts = pkt->pts;
1796         if (s->current_picture.f.key_frame)
1797             pkt->flags |= AV_PKT_FLAG_KEY;
1798         if (s->mb_info)
1799             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1800     } else {
1801         s->frame_bits = 0;
1802     }
1803
1804     /* release non-reference frames */
1805     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1806         if (!s->picture[i].reference)
1807             ff_mpeg_unref_picture(s, &s->picture[i]);
1808     }
1809
1810     assert((s->frame_bits & 7) == 0);
1811
1812     pkt->size = s->frame_bits / 8;
1813     *got_packet = !!pkt->size;
1814     return 0;
1815 }
1816
1817 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1818                                                 int n, int threshold)
1819 {
1820     static const char tab[64] = {
1821         3, 2, 2, 1, 1, 1, 1, 1,
1822         1, 1, 1, 1, 1, 1, 1, 1,
1823         1, 1, 1, 1, 1, 1, 1, 1,
1824         0, 0, 0, 0, 0, 0, 0, 0,
1825         0, 0, 0, 0, 0, 0, 0, 0,
1826         0, 0, 0, 0, 0, 0, 0, 0,
1827         0, 0, 0, 0, 0, 0, 0, 0,
1828         0, 0, 0, 0, 0, 0, 0, 0
1829     };
1830     int score = 0;
1831     int run = 0;
1832     int i;
1833     int16_t *block = s->block[n];
1834     const int last_index = s->block_last_index[n];
1835     int skip_dc;
1836
1837     if (threshold < 0) {
1838         skip_dc = 0;
1839         threshold = -threshold;
1840     } else
1841         skip_dc = 1;
1842
1843     /* Are all we could set to zero already zero? */
1844     if (last_index <= skip_dc - 1)
1845         return;
1846
1847     for (i = 0; i <= last_index; i++) {
1848         const int j = s->intra_scantable.permutated[i];
1849         const int level = FFABS(block[j]);
1850         if (level == 1) {
1851             if (skip_dc && i == 0)
1852                 continue;
1853             score += tab[run];
1854             run = 0;
1855         } else if (level > 1) {
1856             return;
1857         } else {
1858             run++;
1859         }
1860     }
1861     if (score >= threshold)
1862         return;
1863     for (i = skip_dc; i <= last_index; i++) {
1864         const int j = s->intra_scantable.permutated[i];
1865         block[j] = 0;
1866     }
1867     if (block[0])
1868         s->block_last_index[n] = 0;
1869     else
1870         s->block_last_index[n] = -1;
1871 }
1872
1873 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1874                                int last_index)
1875 {
1876     int i;
1877     const int maxlevel = s->max_qcoeff;
1878     const int minlevel = s->min_qcoeff;
1879     int overflow = 0;
1880
1881     if (s->mb_intra) {
1882         i = 1; // skip clipping of intra dc
1883     } else
1884         i = 0;
1885
1886     for (; i <= last_index; i++) {
1887         const int j = s->intra_scantable.permutated[i];
1888         int level = block[j];
1889
1890         if (level > maxlevel) {
1891             level = maxlevel;
1892             overflow++;
1893         } else if (level < minlevel) {
1894             level = minlevel;
1895             overflow++;
1896         }
1897
1898         block[j] = level;
1899     }
1900
1901     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1902         av_log(s->avctx, AV_LOG_INFO,
1903                "warning, clipping %d dct coefficients to %d..%d\n",
1904                overflow, minlevel, maxlevel);
1905 }
1906
1907 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1908 {
1909     int x, y;
1910     // FIXME optimize
1911     for (y = 0; y < 8; y++) {
1912         for (x = 0; x < 8; x++) {
1913             int x2, y2;
1914             int sum = 0;
1915             int sqr = 0;
1916             int count = 0;
1917
1918             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1919                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1920                     int v = ptr[x2 + y2 * stride];
1921                     sum += v;
1922                     sqr += v * v;
1923                     count++;
1924                 }
1925             }
1926             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1927         }
1928     }
1929 }
1930
1931 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1932                                                 int motion_x, int motion_y,
1933                                                 int mb_block_height,
1934                                                 int mb_block_width,
1935                                                 int mb_block_count)
1936 {
1937     int16_t weight[12][64];
1938     int16_t orig[12][64];
1939     const int mb_x = s->mb_x;
1940     const int mb_y = s->mb_y;
1941     int i;
1942     int skip_dct[12];
1943     int dct_offset = s->linesize * 8; // default for progressive frames
1944     int uv_dct_offset = s->uvlinesize * 8;
1945     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1946     ptrdiff_t wrap_y, wrap_c;
1947
1948     for (i = 0; i < mb_block_count; i++)
1949         skip_dct[i] = s->skipdct;
1950
1951     if (s->adaptive_quant) {
1952         const int last_qp = s->qscale;
1953         const int mb_xy = mb_x + mb_y * s->mb_stride;
1954
1955         s->lambda = s->lambda_table[mb_xy];
1956         update_qscale(s);
1957
1958         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1959             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1960             s->dquant = s->qscale - last_qp;
1961
1962             if (s->out_format == FMT_H263) {
1963                 s->dquant = av_clip(s->dquant, -2, 2);
1964
1965                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1966                     if (!s->mb_intra) {
1967                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1968                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1969                                 s->dquant = 0;
1970                         }
1971                         if (s->mv_type == MV_TYPE_8X8)
1972                             s->dquant = 0;
1973                     }
1974                 }
1975             }
1976         }
1977         ff_set_qscale(s, last_qp + s->dquant);
1978     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1979         ff_set_qscale(s, s->qscale + s->dquant);
1980
1981     wrap_y = s->linesize;
1982     wrap_c = s->uvlinesize;
1983     ptr_y  = s->new_picture.f.data[0] +
1984              (mb_y * 16 * wrap_y)              + mb_x * 16;
1985     ptr_cb = s->new_picture.f.data[1] +
1986              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1987     ptr_cr = s->new_picture.f.data[2] +
1988              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1989
1990     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1991         uint8_t *ebuf = s->edge_emu_buffer + 32;
1992         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
1993         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
1994         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1995                                  wrap_y, wrap_y,
1996                                  16, 16, mb_x * 16, mb_y * 16,
1997                                  s->width, s->height);
1998         ptr_y = ebuf;
1999         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2000                                  wrap_c, wrap_c,
2001                                  mb_block_width, mb_block_height,
2002                                  mb_x * mb_block_width, mb_y * mb_block_height,
2003                                  cw, ch);
2004         ptr_cb = ebuf + 18 * wrap_y;
2005         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr,
2006                                  wrap_c, wrap_c,
2007                                  mb_block_width, mb_block_height,
2008                                  mb_x * mb_block_width, mb_y * mb_block_height,
2009                                  cw, ch);
2010         ptr_cr = ebuf + 18 * wrap_y + 16;
2011     }
2012
2013     if (s->mb_intra) {
2014         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2015             int progressive_score, interlaced_score;
2016
2017             s->interlaced_dct = 0;
2018             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
2019                                                     NULL, wrap_y, 8) +
2020                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2021                                                     NULL, wrap_y, 8) - 400;
2022
2023             if (progressive_score > 0) {
2024                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
2025                                                        NULL, wrap_y * 2, 8) +
2026                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
2027                                                        NULL, wrap_y * 2, 8);
2028                 if (progressive_score > interlaced_score) {
2029                     s->interlaced_dct = 1;
2030
2031                     dct_offset = wrap_y;
2032                     uv_dct_offset = wrap_c;
2033                     wrap_y <<= 1;
2034                     if (s->chroma_format == CHROMA_422 ||
2035                         s->chroma_format == CHROMA_444)
2036                         wrap_c <<= 1;
2037                 }
2038             }
2039         }
2040
2041         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
2042         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
2043         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
2044         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
2045
2046         if (s->flags & CODEC_FLAG_GRAY) {
2047             skip_dct[4] = 1;
2048             skip_dct[5] = 1;
2049         } else {
2050             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2051             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2052             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2053                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2054                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2055             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2056                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
2057                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
2058                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
2059                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
2060                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2061                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2062             }
2063         }
2064     } else {
2065         op_pixels_func (*op_pix)[4];
2066         qpel_mc_func (*op_qpix)[16];
2067         uint8_t *dest_y, *dest_cb, *dest_cr;
2068
2069         dest_y  = s->dest[0];
2070         dest_cb = s->dest[1];
2071         dest_cr = s->dest[2];
2072
2073         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2074             op_pix  = s->hdsp.put_pixels_tab;
2075             op_qpix = s->dsp.put_qpel_pixels_tab;
2076         } else {
2077             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2078             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
2079         }
2080
2081         if (s->mv_dir & MV_DIR_FORWARD) {
2082             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
2083                           s->last_picture.f.data,
2084                           op_pix, op_qpix);
2085             op_pix  = s->hdsp.avg_pixels_tab;
2086             op_qpix = s->dsp.avg_qpel_pixels_tab;
2087         }
2088         if (s->mv_dir & MV_DIR_BACKWARD) {
2089             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
2090                           s->next_picture.f.data,
2091                           op_pix, op_qpix);
2092         }
2093
2094         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2095             int progressive_score, interlaced_score;
2096
2097             s->interlaced_dct = 0;
2098             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
2099                                                     ptr_y,              wrap_y,
2100                                                     8) +
2101                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
2102                                                     ptr_y + wrap_y * 8, wrap_y,
2103                                                     8) - 400;
2104
2105             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2106                 progressive_score -= 400;
2107
2108             if (progressive_score > 0) {
2109                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
2110                                                        ptr_y,
2111                                                        wrap_y * 2, 8) +
2112                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
2113                                                        ptr_y + wrap_y,
2114                                                        wrap_y * 2, 8);
2115
2116                 if (progressive_score > interlaced_score) {
2117                     s->interlaced_dct = 1;
2118
2119                     dct_offset = wrap_y;
2120                     uv_dct_offset = wrap_c;
2121                     wrap_y <<= 1;
2122                     if (s->chroma_format == CHROMA_422)
2123                         wrap_c <<= 1;
2124                 }
2125             }
2126         }
2127
2128         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2129         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2130         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2131                            dest_y + dct_offset, wrap_y);
2132         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2133                            dest_y + dct_offset + 8, wrap_y);
2134
2135         if (s->flags & CODEC_FLAG_GRAY) {
2136             skip_dct[4] = 1;
2137             skip_dct[5] = 1;
2138         } else {
2139             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2140             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2141             if (!s->chroma_y_shift) { /* 422 */
2142                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2143                                    dest_cb + uv_dct_offset, wrap_c);
2144                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2145                                    dest_cr + uv_dct_offset, wrap_c);
2146             }
2147         }
2148         /* pre quantization */
2149         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2150                 2 * s->qscale * s->qscale) {
2151             // FIXME optimize
2152             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
2153                               wrap_y, 8) < 20 * s->qscale)
2154                 skip_dct[0] = 1;
2155             if (s->dsp.sad[1](NULL, ptr_y + 8,
2156                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2157                 skip_dct[1] = 1;
2158             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2159                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2160                 skip_dct[2] = 1;
2161             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2162                               dest_y + dct_offset + 8,
2163                               wrap_y, 8) < 20 * s->qscale)
2164                 skip_dct[3] = 1;
2165             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2166                               wrap_c, 8) < 20 * s->qscale)
2167                 skip_dct[4] = 1;
2168             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2169                               wrap_c, 8) < 20 * s->qscale)
2170                 skip_dct[5] = 1;
2171             if (!s->chroma_y_shift) { /* 422 */
2172                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
2173                                   dest_cb + uv_dct_offset,
2174                                   wrap_c, 8) < 20 * s->qscale)
2175                     skip_dct[6] = 1;
2176                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2177                                   dest_cr + uv_dct_offset,
2178                                   wrap_c, 8) < 20 * s->qscale)
2179                     skip_dct[7] = 1;
2180             }
2181         }
2182     }
2183
2184     if (s->quantizer_noise_shaping) {
2185         if (!skip_dct[0])
2186             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2187         if (!skip_dct[1])
2188             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2189         if (!skip_dct[2])
2190             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2191         if (!skip_dct[3])
2192             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2193         if (!skip_dct[4])
2194             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2195         if (!skip_dct[5])
2196             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2197         if (!s->chroma_y_shift) { /* 422 */
2198             if (!skip_dct[6])
2199                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2200                                   wrap_c);
2201             if (!skip_dct[7])
2202                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2203                                   wrap_c);
2204         }
2205         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2206     }
2207
2208     /* DCT & quantize */
2209     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2210     {
2211         for (i = 0; i < mb_block_count; i++) {
2212             if (!skip_dct[i]) {
2213                 int overflow;
2214                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2215                 // FIXME we could decide to change to quantizer instead of
2216                 // clipping
2217                 // JS: I don't think that would be a good idea it could lower
2218                 //     quality instead of improve it. Just INTRADC clipping
2219                 //     deserves changes in quantizer
2220                 if (overflow)
2221                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2222             } else
2223                 s->block_last_index[i] = -1;
2224         }
2225         if (s->quantizer_noise_shaping) {
2226             for (i = 0; i < mb_block_count; i++) {
2227                 if (!skip_dct[i]) {
2228                     s->block_last_index[i] =
2229                         dct_quantize_refine(s, s->block[i], weight[i],
2230                                             orig[i], i, s->qscale);
2231                 }
2232             }
2233         }
2234
2235         if (s->luma_elim_threshold && !s->mb_intra)
2236             for (i = 0; i < 4; i++)
2237                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2238         if (s->chroma_elim_threshold && !s->mb_intra)
2239             for (i = 4; i < mb_block_count; i++)
2240                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2241
2242         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2243             for (i = 0; i < mb_block_count; i++) {
2244                 if (s->block_last_index[i] == -1)
2245                     s->coded_score[i] = INT_MAX / 256;
2246             }
2247         }
2248     }
2249
2250     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2251         s->block_last_index[4] =
2252         s->block_last_index[5] = 0;
2253         s->block[4][0] =
2254         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2255         if (!s->chroma_y_shift) { /* 422 / 444 */
2256             for (i=6; i<12; i++) {
2257                 s->block_last_index[i] = 0;
2258                 s->block[i][0] = s->block[4][0];
2259             }
2260         }
2261     }
2262
2263     // non c quantize code returns incorrect block_last_index FIXME
2264     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2265         for (i = 0; i < mb_block_count; i++) {
2266             int j;
2267             if (s->block_last_index[i] > 0) {
2268                 for (j = 63; j > 0; j--) {
2269                     if (s->block[i][s->intra_scantable.permutated[j]])
2270                         break;
2271                 }
2272                 s->block_last_index[i] = j;
2273             }
2274         }
2275     }
2276
2277     /* huffman encode */
2278     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2279     case AV_CODEC_ID_MPEG1VIDEO:
2280     case AV_CODEC_ID_MPEG2VIDEO:
2281         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2282             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2283         break;
2284     case AV_CODEC_ID_MPEG4:
2285         if (CONFIG_MPEG4_ENCODER)
2286             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2287         break;
2288     case AV_CODEC_ID_MSMPEG4V2:
2289     case AV_CODEC_ID_MSMPEG4V3:
2290     case AV_CODEC_ID_WMV1:
2291         if (CONFIG_MSMPEG4_ENCODER)
2292             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2293         break;
2294     case AV_CODEC_ID_WMV2:
2295         if (CONFIG_WMV2_ENCODER)
2296             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2297         break;
2298     case AV_CODEC_ID_H261:
2299         if (CONFIG_H261_ENCODER)
2300             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2301         break;
2302     case AV_CODEC_ID_H263:
2303     case AV_CODEC_ID_H263P:
2304     case AV_CODEC_ID_FLV1:
2305     case AV_CODEC_ID_RV10:
2306     case AV_CODEC_ID_RV20:
2307         if (CONFIG_H263_ENCODER)
2308             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2309         break;
2310     case AV_CODEC_ID_MJPEG:
2311     case AV_CODEC_ID_AMV:
2312         if (CONFIG_MJPEG_ENCODER)
2313             ff_mjpeg_encode_mb(s, s->block);
2314         break;
2315     default:
2316         av_assert1(0);
2317     }
2318 }
2319
2320 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2321 {
2322     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2323     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2324     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2325 }
2326
2327 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2328     int i;
2329
2330     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2331
2332     /* mpeg1 */
2333     d->mb_skip_run= s->mb_skip_run;
2334     for(i=0; i<3; i++)
2335         d->last_dc[i] = s->last_dc[i];
2336
2337     /* statistics */
2338     d->mv_bits= s->mv_bits;
2339     d->i_tex_bits= s->i_tex_bits;
2340     d->p_tex_bits= s->p_tex_bits;
2341     d->i_count= s->i_count;
2342     d->f_count= s->f_count;
2343     d->b_count= s->b_count;
2344     d->skip_count= s->skip_count;
2345     d->misc_bits= s->misc_bits;
2346     d->last_bits= 0;
2347
2348     d->mb_skipped= 0;
2349     d->qscale= s->qscale;
2350     d->dquant= s->dquant;
2351
2352     d->esc3_level_length= s->esc3_level_length;
2353 }
2354
2355 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2356     int i;
2357
2358     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2359     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2360
2361     /* mpeg1 */
2362     d->mb_skip_run= s->mb_skip_run;
2363     for(i=0; i<3; i++)
2364         d->last_dc[i] = s->last_dc[i];
2365
2366     /* statistics */
2367     d->mv_bits= s->mv_bits;
2368     d->i_tex_bits= s->i_tex_bits;
2369     d->p_tex_bits= s->p_tex_bits;
2370     d->i_count= s->i_count;
2371     d->f_count= s->f_count;
2372     d->b_count= s->b_count;
2373     d->skip_count= s->skip_count;
2374     d->misc_bits= s->misc_bits;
2375
2376     d->mb_intra= s->mb_intra;
2377     d->mb_skipped= s->mb_skipped;
2378     d->mv_type= s->mv_type;
2379     d->mv_dir= s->mv_dir;
2380     d->pb= s->pb;
2381     if(s->data_partitioning){
2382         d->pb2= s->pb2;
2383         d->tex_pb= s->tex_pb;
2384     }
2385     d->block= s->block;
2386     for(i=0; i<8; i++)
2387         d->block_last_index[i]= s->block_last_index[i];
2388     d->interlaced_dct= s->interlaced_dct;
2389     d->qscale= s->qscale;
2390
2391     d->esc3_level_length= s->esc3_level_length;
2392 }
2393
2394 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2395                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2396                            int *dmin, int *next_block, int motion_x, int motion_y)
2397 {
2398     int score;
2399     uint8_t *dest_backup[3];
2400
2401     copy_context_before_encode(s, backup, type);
2402
2403     s->block= s->blocks[*next_block];
2404     s->pb= pb[*next_block];
2405     if(s->data_partitioning){
2406         s->pb2   = pb2   [*next_block];
2407         s->tex_pb= tex_pb[*next_block];
2408     }
2409
2410     if(*next_block){
2411         memcpy(dest_backup, s->dest, sizeof(s->dest));
2412         s->dest[0] = s->rd_scratchpad;
2413         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2414         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2415         assert(s->linesize >= 32); //FIXME
2416     }
2417
2418     encode_mb(s, motion_x, motion_y);
2419
2420     score= put_bits_count(&s->pb);
2421     if(s->data_partitioning){
2422         score+= put_bits_count(&s->pb2);
2423         score+= put_bits_count(&s->tex_pb);
2424     }
2425
2426     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2427         ff_MPV_decode_mb(s, s->block);
2428
2429         score *= s->lambda2;
2430         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2431     }
2432
2433     if(*next_block){
2434         memcpy(s->dest, dest_backup, sizeof(s->dest));
2435     }
2436
2437     if(score<*dmin){
2438         *dmin= score;
2439         *next_block^=1;
2440
2441         copy_context_after_encode(best, s, type);
2442     }
2443 }
2444
2445 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2446     uint32_t *sq = ff_squareTbl + 256;
2447     int acc=0;
2448     int x,y;
2449
2450     if(w==16 && h==16)
2451         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2452     else if(w==8 && h==8)
2453         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2454
2455     for(y=0; y<h; y++){
2456         for(x=0; x<w; x++){
2457             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2458         }
2459     }
2460
2461     av_assert2(acc>=0);
2462
2463     return acc;
2464 }
2465
2466 static int sse_mb(MpegEncContext *s){
2467     int w= 16;
2468     int h= 16;
2469
2470     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2471     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2472
2473     if(w==16 && h==16)
2474       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2475         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2476                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2477                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2478       }else{
2479         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2480                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2481                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2482       }
2483     else
2484         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2485                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2486                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2487 }
2488
2489 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2490     MpegEncContext *s= *(void**)arg;
2491
2492
2493     s->me.pre_pass=1;
2494     s->me.dia_size= s->avctx->pre_dia_size;
2495     s->first_slice_line=1;
2496     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2497         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2498             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2499         }
2500         s->first_slice_line=0;
2501     }
2502
2503     s->me.pre_pass=0;
2504
2505     return 0;
2506 }
2507
2508 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2509     MpegEncContext *s= *(void**)arg;
2510
2511     ff_check_alignment();
2512
2513     s->me.dia_size= s->avctx->dia_size;
2514     s->first_slice_line=1;
2515     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2516         s->mb_x=0; //for block init below
2517         ff_init_block_index(s);
2518         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2519             s->block_index[0]+=2;
2520             s->block_index[1]+=2;
2521             s->block_index[2]+=2;
2522             s->block_index[3]+=2;
2523
2524             /* compute motion vector & mb_type and store in context */
2525             if(s->pict_type==AV_PICTURE_TYPE_B)
2526                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2527             else
2528                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2529         }
2530         s->first_slice_line=0;
2531     }
2532     return 0;
2533 }
2534
2535 static int mb_var_thread(AVCodecContext *c, void *arg){
2536     MpegEncContext *s= *(void**)arg;
2537     int mb_x, mb_y;
2538
2539     ff_check_alignment();
2540
2541     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2542         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2543             int xx = mb_x * 16;
2544             int yy = mb_y * 16;
2545             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2546             int varc;
2547             int sum = s->dsp.pix_sum(pix, s->linesize);
2548
2549             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2550
2551             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2552             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2553             s->me.mb_var_sum_temp    += varc;
2554         }
2555     }
2556     return 0;
2557 }
2558
2559 static void write_slice_end(MpegEncContext *s){
2560     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2561         if(s->partitioned_frame){
2562             ff_mpeg4_merge_partitions(s);
2563         }
2564
2565         ff_mpeg4_stuffing(&s->pb);
2566     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2567         ff_mjpeg_encode_stuffing(s);
2568     }
2569
2570     avpriv_align_put_bits(&s->pb);
2571     flush_put_bits(&s->pb);
2572
2573     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2574         s->misc_bits+= get_bits_diff(s);
2575 }
2576
2577 static void write_mb_info(MpegEncContext *s)
2578 {
2579     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2580     int offset = put_bits_count(&s->pb);
2581     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2582     int gobn = s->mb_y / s->gob_index;
2583     int pred_x, pred_y;
2584     if (CONFIG_H263_ENCODER)
2585         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2586     bytestream_put_le32(&ptr, offset);
2587     bytestream_put_byte(&ptr, s->qscale);
2588     bytestream_put_byte(&ptr, gobn);
2589     bytestream_put_le16(&ptr, mba);
2590     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2591     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2592     /* 4MV not implemented */
2593     bytestream_put_byte(&ptr, 0); /* hmv2 */
2594     bytestream_put_byte(&ptr, 0); /* vmv2 */
2595 }
2596
2597 static void update_mb_info(MpegEncContext *s, int startcode)
2598 {
2599     if (!s->mb_info)
2600         return;
2601     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2602         s->mb_info_size += 12;
2603         s->prev_mb_info = s->last_mb_info;
2604     }
2605     if (startcode) {
2606         s->prev_mb_info = put_bits_count(&s->pb)/8;
2607         /* This might have incremented mb_info_size above, and we return without
2608          * actually writing any info into that slot yet. But in that case,
2609          * this will be called again at the start of the after writing the
2610          * start code, actually writing the mb info. */
2611         return;
2612     }
2613
2614     s->last_mb_info = put_bits_count(&s->pb)/8;
2615     if (!s->mb_info_size)
2616         s->mb_info_size += 12;
2617     write_mb_info(s);
2618 }
2619
2620 static int encode_thread(AVCodecContext *c, void *arg){
2621     MpegEncContext *s= *(void**)arg;
2622     int mb_x, mb_y, pdif = 0;
2623     int chr_h= 16>>s->chroma_y_shift;
2624     int i, j;
2625     MpegEncContext best_s, backup_s;
2626     uint8_t bit_buf[2][MAX_MB_BYTES];
2627     uint8_t bit_buf2[2][MAX_MB_BYTES];
2628     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2629     PutBitContext pb[2], pb2[2], tex_pb[2];
2630
2631     ff_check_alignment();
2632
2633     for(i=0; i<2; i++){
2634         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2635         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2636         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2637     }
2638
2639     s->last_bits= put_bits_count(&s->pb);
2640     s->mv_bits=0;
2641     s->misc_bits=0;
2642     s->i_tex_bits=0;
2643     s->p_tex_bits=0;
2644     s->i_count=0;
2645     s->f_count=0;
2646     s->b_count=0;
2647     s->skip_count=0;
2648
2649     for(i=0; i<3; i++){
2650         /* init last dc values */
2651         /* note: quant matrix value (8) is implied here */
2652         s->last_dc[i] = 128 << s->intra_dc_precision;
2653
2654         s->current_picture.f.error[i] = 0;
2655     }
2656     if(s->codec_id==AV_CODEC_ID_AMV){
2657         s->last_dc[0] = 128*8/13;
2658         s->last_dc[1] = 128*8/14;
2659         s->last_dc[2] = 128*8/14;
2660     }
2661     s->mb_skip_run = 0;
2662     memset(s->last_mv, 0, sizeof(s->last_mv));
2663
2664     s->last_mv_dir = 0;
2665
2666     switch(s->codec_id){
2667     case AV_CODEC_ID_H263:
2668     case AV_CODEC_ID_H263P:
2669     case AV_CODEC_ID_FLV1:
2670         if (CONFIG_H263_ENCODER)
2671             s->gob_index = ff_h263_get_gob_height(s);
2672         break;
2673     case AV_CODEC_ID_MPEG4:
2674         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2675             ff_mpeg4_init_partitions(s);
2676         break;
2677     }
2678
2679     s->resync_mb_x=0;
2680     s->resync_mb_y=0;
2681     s->first_slice_line = 1;
2682     s->ptr_lastgob = s->pb.buf;
2683     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2684         s->mb_x=0;
2685         s->mb_y= mb_y;
2686
2687         ff_set_qscale(s, s->qscale);
2688         ff_init_block_index(s);
2689
2690         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2691             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2692             int mb_type= s->mb_type[xy];
2693 //            int d;
2694             int dmin= INT_MAX;
2695             int dir;
2696
2697             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2698                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2699                 return -1;
2700             }
2701             if(s->data_partitioning){
2702                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2703                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2704                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2705                     return -1;
2706                 }
2707             }
2708
2709             s->mb_x = mb_x;
2710             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2711             ff_update_block_index(s);
2712
2713             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2714                 ff_h261_reorder_mb_index(s);
2715                 xy= s->mb_y*s->mb_stride + s->mb_x;
2716                 mb_type= s->mb_type[xy];
2717             }
2718
2719             /* write gob / video packet header  */
2720             if(s->rtp_mode){
2721                 int current_packet_size, is_gob_start;
2722
2723                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2724
2725                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2726
2727                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2728
2729                 switch(s->codec_id){
2730                 case AV_CODEC_ID_H263:
2731                 case AV_CODEC_ID_H263P:
2732                     if(!s->h263_slice_structured)
2733                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2734                     break;
2735                 case AV_CODEC_ID_MPEG2VIDEO:
2736                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2737                 case AV_CODEC_ID_MPEG1VIDEO:
2738                     if(s->mb_skip_run) is_gob_start=0;
2739                     break;
2740                 case AV_CODEC_ID_MJPEG:
2741                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2742                     break;
2743                 }
2744
2745                 if(is_gob_start){
2746                     if(s->start_mb_y != mb_y || mb_x!=0){
2747                         write_slice_end(s);
2748
2749                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2750                             ff_mpeg4_init_partitions(s);
2751                         }
2752                     }
2753
2754                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2755                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2756
2757                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2758                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2759                         int d = 100 / s->error_rate;
2760                         if(r % d == 0){
2761                             current_packet_size=0;
2762                             s->pb.buf_ptr= s->ptr_lastgob;
2763                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2764                         }
2765                     }
2766
2767                     if (s->avctx->rtp_callback){
2768                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2769                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2770                     }
2771                     update_mb_info(s, 1);
2772
2773                     switch(s->codec_id){
2774                     case AV_CODEC_ID_MPEG4:
2775                         if (CONFIG_MPEG4_ENCODER) {
2776                             ff_mpeg4_encode_video_packet_header(s);
2777                             ff_mpeg4_clean_buffers(s);
2778                         }
2779                     break;
2780                     case AV_CODEC_ID_MPEG1VIDEO:
2781                     case AV_CODEC_ID_MPEG2VIDEO:
2782                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2783                             ff_mpeg1_encode_slice_header(s);
2784                             ff_mpeg1_clean_buffers(s);
2785                         }
2786                     break;
2787                     case AV_CODEC_ID_H263:
2788                     case AV_CODEC_ID_H263P:
2789                         if (CONFIG_H263_ENCODER)
2790                             ff_h263_encode_gob_header(s, mb_y);
2791                     break;
2792                     }
2793
2794                     if(s->flags&CODEC_FLAG_PASS1){
2795                         int bits= put_bits_count(&s->pb);
2796                         s->misc_bits+= bits - s->last_bits;
2797                         s->last_bits= bits;
2798                     }
2799
2800                     s->ptr_lastgob += current_packet_size;
2801                     s->first_slice_line=1;
2802                     s->resync_mb_x=mb_x;
2803                     s->resync_mb_y=mb_y;
2804                 }
2805             }
2806
2807             if(  (s->resync_mb_x   == s->mb_x)
2808                && s->resync_mb_y+1 == s->mb_y){
2809                 s->first_slice_line=0;
2810             }
2811
2812             s->mb_skipped=0;
2813             s->dquant=0; //only for QP_RD
2814
2815             update_mb_info(s, 0);
2816
2817             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2818                 int next_block=0;
2819                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2820
2821                 copy_context_before_encode(&backup_s, s, -1);
2822                 backup_s.pb= s->pb;
2823                 best_s.data_partitioning= s->data_partitioning;
2824                 best_s.partitioned_frame= s->partitioned_frame;
2825                 if(s->data_partitioning){
2826                     backup_s.pb2= s->pb2;
2827                     backup_s.tex_pb= s->tex_pb;
2828                 }
2829
2830                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2831                     s->mv_dir = MV_DIR_FORWARD;
2832                     s->mv_type = MV_TYPE_16X16;
2833                     s->mb_intra= 0;
2834                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2835                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2836                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2837                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2838                 }
2839                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2840                     s->mv_dir = MV_DIR_FORWARD;
2841                     s->mv_type = MV_TYPE_FIELD;
2842                     s->mb_intra= 0;
2843                     for(i=0; i<2; i++){
2844                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2845                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2846                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2847                     }
2848                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2849                                  &dmin, &next_block, 0, 0);
2850                 }
2851                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2852                     s->mv_dir = MV_DIR_FORWARD;
2853                     s->mv_type = MV_TYPE_16X16;
2854                     s->mb_intra= 0;
2855                     s->mv[0][0][0] = 0;
2856                     s->mv[0][0][1] = 0;
2857                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2858                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2859                 }
2860                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2861                     s->mv_dir = MV_DIR_FORWARD;
2862                     s->mv_type = MV_TYPE_8X8;
2863                     s->mb_intra= 0;
2864                     for(i=0; i<4; i++){
2865                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2866                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2867                     }
2868                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2869                                  &dmin, &next_block, 0, 0);
2870                 }
2871                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2872                     s->mv_dir = MV_DIR_FORWARD;
2873                     s->mv_type = MV_TYPE_16X16;
2874                     s->mb_intra= 0;
2875                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2876                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2877                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2878                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2879                 }
2880                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2881                     s->mv_dir = MV_DIR_BACKWARD;
2882                     s->mv_type = MV_TYPE_16X16;
2883                     s->mb_intra= 0;
2884                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2885                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2886                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2887                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2888                 }
2889                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2890                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2891                     s->mv_type = MV_TYPE_16X16;
2892                     s->mb_intra= 0;
2893                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2894                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2895                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2896                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2897                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2898                                  &dmin, &next_block, 0, 0);
2899                 }
2900                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2901                     s->mv_dir = MV_DIR_FORWARD;
2902                     s->mv_type = MV_TYPE_FIELD;
2903                     s->mb_intra= 0;
2904                     for(i=0; i<2; i++){
2905                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2906                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2907                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2908                     }
2909                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2910                                  &dmin, &next_block, 0, 0);
2911                 }
2912                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2913                     s->mv_dir = MV_DIR_BACKWARD;
2914                     s->mv_type = MV_TYPE_FIELD;
2915                     s->mb_intra= 0;
2916                     for(i=0; i<2; i++){
2917                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2918                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2919                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2920                     }
2921                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2922                                  &dmin, &next_block, 0, 0);
2923                 }
2924                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2925                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2926                     s->mv_type = MV_TYPE_FIELD;
2927                     s->mb_intra= 0;
2928                     for(dir=0; dir<2; dir++){
2929                         for(i=0; i<2; i++){
2930                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2931                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2932                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2933                         }
2934                     }
2935                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2936                                  &dmin, &next_block, 0, 0);
2937                 }
2938                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2939                     s->mv_dir = 0;
2940                     s->mv_type = MV_TYPE_16X16;
2941                     s->mb_intra= 1;
2942                     s->mv[0][0][0] = 0;
2943                     s->mv[0][0][1] = 0;
2944                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2945                                  &dmin, &next_block, 0, 0);
2946                     if(s->h263_pred || s->h263_aic){
2947                         if(best_s.mb_intra)
2948                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2949                         else
2950                             ff_clean_intra_table_entries(s); //old mode?
2951                     }
2952                 }
2953
2954                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2955                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2956                         const int last_qp= backup_s.qscale;
2957                         int qpi, qp, dc[6];
2958                         int16_t ac[6][16];
2959                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2960                         static const int dquant_tab[4]={-1,1,-2,2};
2961                         int storecoefs = s->mb_intra && s->dc_val[0];
2962
2963                         av_assert2(backup_s.dquant == 0);
2964
2965                         //FIXME intra
2966                         s->mv_dir= best_s.mv_dir;
2967                         s->mv_type = MV_TYPE_16X16;
2968                         s->mb_intra= best_s.mb_intra;
2969                         s->mv[0][0][0] = best_s.mv[0][0][0];
2970                         s->mv[0][0][1] = best_s.mv[0][0][1];
2971                         s->mv[1][0][0] = best_s.mv[1][0][0];
2972                         s->mv[1][0][1] = best_s.mv[1][0][1];
2973
2974                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2975                         for(; qpi<4; qpi++){
2976                             int dquant= dquant_tab[qpi];
2977                             qp= last_qp + dquant;
2978                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2979                                 continue;
2980                             backup_s.dquant= dquant;
2981                             if(storecoefs){
2982                                 for(i=0; i<6; i++){
2983                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2984                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2985                                 }
2986                             }
2987
2988                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2989                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2990                             if(best_s.qscale != qp){
2991                                 if(storecoefs){
2992                                     for(i=0; i<6; i++){
2993                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2994                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2995                                     }
2996                                 }
2997                             }
2998                         }
2999                     }
3000                 }
3001                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3002                     int mx= s->b_direct_mv_table[xy][0];
3003                     int my= s->b_direct_mv_table[xy][1];
3004
3005                     backup_s.dquant = 0;
3006                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3007                     s->mb_intra= 0;
3008                     ff_mpeg4_set_direct_mv(s, mx, my);
3009                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3010                                  &dmin, &next_block, mx, my);
3011                 }
3012                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3013                     backup_s.dquant = 0;
3014                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3015                     s->mb_intra= 0;
3016                     ff_mpeg4_set_direct_mv(s, 0, 0);
3017                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3018                                  &dmin, &next_block, 0, 0);
3019                 }
3020                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3021                     int coded=0;
3022                     for(i=0; i<6; i++)
3023                         coded |= s->block_last_index[i];
3024                     if(coded){
3025                         int mx,my;
3026                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3027                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3028                             mx=my=0; //FIXME find the one we actually used
3029                             ff_mpeg4_set_direct_mv(s, mx, my);
3030                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3031                             mx= s->mv[1][0][0];
3032                             my= s->mv[1][0][1];
3033                         }else{
3034                             mx= s->mv[0][0][0];
3035                             my= s->mv[0][0][1];
3036                         }
3037
3038                         s->mv_dir= best_s.mv_dir;
3039                         s->mv_type = best_s.mv_type;
3040                         s->mb_intra= 0;
3041 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3042                         s->mv[0][0][1] = best_s.mv[0][0][1];
3043                         s->mv[1][0][0] = best_s.mv[1][0][0];
3044                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3045                         backup_s.dquant= 0;
3046                         s->skipdct=1;
3047                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3048                                         &dmin, &next_block, mx, my);
3049                         s->skipdct=0;
3050                     }
3051                 }
3052
3053                 s->current_picture.qscale_table[xy] = best_s.qscale;
3054
3055                 copy_context_after_encode(s, &best_s, -1);
3056
3057                 pb_bits_count= put_bits_count(&s->pb);
3058                 flush_put_bits(&s->pb);
3059                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3060                 s->pb= backup_s.pb;
3061
3062                 if(s->data_partitioning){
3063                     pb2_bits_count= put_bits_count(&s->pb2);
3064                     flush_put_bits(&s->pb2);
3065                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3066                     s->pb2= backup_s.pb2;
3067
3068                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3069                     flush_put_bits(&s->tex_pb);
3070                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3071                     s->tex_pb= backup_s.tex_pb;
3072                 }
3073                 s->last_bits= put_bits_count(&s->pb);
3074
3075                 if (CONFIG_H263_ENCODER &&
3076                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3077                     ff_h263_update_motion_val(s);
3078
3079                 if(next_block==0){ //FIXME 16 vs linesize16
3080                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3081                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3082                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3083                 }
3084
3085                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3086                     ff_MPV_decode_mb(s, s->block);
3087             } else {
3088                 int motion_x = 0, motion_y = 0;
3089                 s->mv_type=MV_TYPE_16X16;
3090                 // only one MB-Type possible
3091
3092                 switch(mb_type){
3093                 case CANDIDATE_MB_TYPE_INTRA:
3094                     s->mv_dir = 0;
3095                     s->mb_intra= 1;
3096                     motion_x= s->mv[0][0][0] = 0;
3097                     motion_y= s->mv[0][0][1] = 0;
3098                     break;
3099                 case CANDIDATE_MB_TYPE_INTER:
3100                     s->mv_dir = MV_DIR_FORWARD;
3101                     s->mb_intra= 0;
3102                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3103                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3104                     break;
3105                 case CANDIDATE_MB_TYPE_INTER_I:
3106                     s->mv_dir = MV_DIR_FORWARD;
3107                     s->mv_type = MV_TYPE_FIELD;
3108                     s->mb_intra= 0;
3109                     for(i=0; i<2; i++){
3110                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3111                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3112                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3113                     }
3114                     break;
3115                 case CANDIDATE_MB_TYPE_INTER4V:
3116                     s->mv_dir = MV_DIR_FORWARD;
3117                     s->mv_type = MV_TYPE_8X8;
3118                     s->mb_intra= 0;
3119                     for(i=0; i<4; i++){
3120                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3121                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3122                     }
3123                     break;
3124                 case CANDIDATE_MB_TYPE_DIRECT:
3125                     if (CONFIG_MPEG4_ENCODER) {
3126                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3127                         s->mb_intra= 0;
3128                         motion_x=s->b_direct_mv_table[xy][0];
3129                         motion_y=s->b_direct_mv_table[xy][1];
3130                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3131                     }
3132                     break;
3133                 case CANDIDATE_MB_TYPE_DIRECT0:
3134                     if (CONFIG_MPEG4_ENCODER) {
3135                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3136                         s->mb_intra= 0;
3137                         ff_mpeg4_set_direct_mv(s, 0, 0);
3138                     }
3139                     break;
3140                 case CANDIDATE_MB_TYPE_BIDIR:
3141                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3142                     s->mb_intra= 0;
3143                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3144                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3145                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3146                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3147                     break;
3148                 case CANDIDATE_MB_TYPE_BACKWARD:
3149                     s->mv_dir = MV_DIR_BACKWARD;
3150                     s->mb_intra= 0;
3151                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3152                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3153                     break;
3154                 case CANDIDATE_MB_TYPE_FORWARD:
3155                     s->mv_dir = MV_DIR_FORWARD;
3156                     s->mb_intra= 0;
3157                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3158                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3159                     break;
3160                 case CANDIDATE_MB_TYPE_FORWARD_I:
3161                     s->mv_dir = MV_DIR_FORWARD;
3162                     s->mv_type = MV_TYPE_FIELD;
3163                     s->mb_intra= 0;
3164                     for(i=0; i<2; i++){
3165                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3166                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3167                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3168                     }
3169                     break;
3170                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3171                     s->mv_dir = MV_DIR_BACKWARD;
3172                     s->mv_type = MV_TYPE_FIELD;
3173                     s->mb_intra= 0;
3174                     for(i=0; i<2; i++){
3175                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3176                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3177                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3178                     }
3179                     break;
3180                 case CANDIDATE_MB_TYPE_BIDIR_I:
3181                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3182                     s->mv_type = MV_TYPE_FIELD;
3183                     s->mb_intra= 0;
3184                     for(dir=0; dir<2; dir++){
3185                         for(i=0; i<2; i++){
3186                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3187                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3188                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3189                         }
3190                     }
3191                     break;
3192                 default:
3193                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3194                 }
3195
3196                 encode_mb(s, motion_x, motion_y);
3197
3198                 // RAL: Update last macroblock type
3199                 s->last_mv_dir = s->mv_dir;
3200
3201                 if (CONFIG_H263_ENCODER &&
3202                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3203                     ff_h263_update_motion_val(s);
3204
3205                 ff_MPV_decode_mb(s, s->block);
3206             }
3207
3208             /* clean the MV table in IPS frames for direct mode in B frames */
3209             if(s->mb_intra /* && I,P,S_TYPE */){
3210                 s->p_mv_table[xy][0]=0;
3211                 s->p_mv_table[xy][1]=0;
3212             }
3213
3214             if(s->flags&CODEC_FLAG_PSNR){
3215                 int w= 16;
3216                 int h= 16;
3217
3218                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3219                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3220
3221                 s->current_picture.f.error[0] += sse(
3222                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3223                     s->dest[0], w, h, s->linesize);
3224                 s->current_picture.f.error[1] += sse(
3225                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3226                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3227                 s->current_picture.f.error[2] += sse(
3228                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3229                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3230             }
3231             if(s->loop_filter){
3232                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3233                     ff_h263_loop_filter(s);
3234             }
3235             av_dlog(s->avctx, "MB %d %d bits\n",
3236                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3237         }
3238     }
3239
3240     //not beautiful here but we must write it before flushing so it has to be here
3241     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3242         ff_msmpeg4_encode_ext_header(s);
3243
3244     write_slice_end(s);
3245
3246     /* Send the last GOB if RTP */
3247     if (s->avctx->rtp_callback) {
3248         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3249         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3250         /* Call the RTP callback to send the last GOB */
3251         emms_c();
3252         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3253     }
3254
3255     return 0;
3256 }
3257
3258 #define MERGE(field) dst->field += src->field; src->field=0
3259 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3260     MERGE(me.scene_change_score);
3261     MERGE(me.mc_mb_var_sum_temp);
3262     MERGE(me.mb_var_sum_temp);
3263 }
3264
3265 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3266     int i;
3267
3268     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3269     MERGE(dct_count[1]);
3270     MERGE(mv_bits);
3271     MERGE(i_tex_bits);
3272     MERGE(p_tex_bits);
3273     MERGE(i_count);
3274     MERGE(f_count);
3275     MERGE(b_count);
3276     MERGE(skip_count);
3277     MERGE(misc_bits);
3278     MERGE(er.error_count);
3279     MERGE(padding_bug_score);
3280     MERGE(current_picture.f.error[0]);
3281     MERGE(current_picture.f.error[1]);
3282     MERGE(current_picture.f.error[2]);
3283
3284     if(dst->avctx->noise_reduction){
3285         for(i=0; i<64; i++){
3286             MERGE(dct_error_sum[0][i]);
3287             MERGE(dct_error_sum[1][i]);
3288         }
3289     }
3290
3291     assert(put_bits_count(&src->pb) % 8 ==0);
3292     assert(put_bits_count(&dst->pb) % 8 ==0);
3293     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3294     flush_put_bits(&dst->pb);
3295 }
3296
3297 static int estimate_qp(MpegEncContext *s, int dry_run){
3298     if (s->next_lambda){
3299         s->current_picture_ptr->f.quality =
3300         s->current_picture.f.quality = s->next_lambda;
3301         if(!dry_run) s->next_lambda= 0;
3302     } else if (!s->fixed_qscale) {
3303         s->current_picture_ptr->f.quality =
3304         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3305         if (s->current_picture.f.quality < 0)
3306             return -1;
3307     }
3308
3309     if(s->adaptive_quant){
3310         switch(s->codec_id){
3311         case AV_CODEC_ID_MPEG4:
3312             if (CONFIG_MPEG4_ENCODER)
3313                 ff_clean_mpeg4_qscales(s);
3314             break;
3315         case AV_CODEC_ID_H263:
3316         case AV_CODEC_ID_H263P:
3317         case AV_CODEC_ID_FLV1:
3318             if (CONFIG_H263_ENCODER)
3319                 ff_clean_h263_qscales(s);
3320             break;
3321         default:
3322             ff_init_qscale_tab(s);
3323         }
3324
3325         s->lambda= s->lambda_table[0];
3326         //FIXME broken
3327     }else
3328         s->lambda = s->current_picture.f.quality;
3329     update_qscale(s);
3330     return 0;
3331 }
3332
3333 /* must be called before writing the header */
3334 static void set_frame_distances(MpegEncContext * s){
3335     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3336     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3337
3338     if(s->pict_type==AV_PICTURE_TYPE_B){
3339         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3340         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3341     }else{
3342         s->pp_time= s->time - s->last_non_b_time;
3343         s->last_non_b_time= s->time;
3344         assert(s->picture_number==0 || s->pp_time > 0);
3345     }
3346 }
3347
3348 static int encode_picture(MpegEncContext *s, int picture_number)
3349 {
3350     int i, ret;
3351     int bits;
3352     int context_count = s->slice_context_count;
3353
3354     s->picture_number = picture_number;
3355
3356     /* Reset the average MB variance */
3357     s->me.mb_var_sum_temp    =
3358     s->me.mc_mb_var_sum_temp = 0;
3359
3360     /* we need to initialize some time vars before we can encode b-frames */
3361     // RAL: Condition added for MPEG1VIDEO
3362     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3363         set_frame_distances(s);
3364     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3365         ff_set_mpeg4_time(s);
3366
3367     s->me.scene_change_score=0;
3368
3369 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3370
3371     if(s->pict_type==AV_PICTURE_TYPE_I){
3372         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3373         else                        s->no_rounding=0;
3374     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3375         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3376             s->no_rounding ^= 1;
3377     }
3378
3379     if(s->flags & CODEC_FLAG_PASS2){
3380         if (estimate_qp(s,1) < 0)
3381             return -1;
3382         ff_get_2pass_fcode(s);
3383     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3384         if(s->pict_type==AV_PICTURE_TYPE_B)
3385             s->lambda= s->last_lambda_for[s->pict_type];
3386         else
3387             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3388         update_qscale(s);
3389     }
3390
3391     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3392         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3393         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3394         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3395         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3396     }
3397
3398     s->mb_intra=0; //for the rate distortion & bit compare functions
3399     for(i=1; i<context_count; i++){
3400         ret = ff_update_duplicate_context(s->thread_context[i], s);
3401         if (ret < 0)
3402             return ret;
3403     }
3404
3405     if(ff_init_me(s)<0)
3406         return -1;
3407
3408     /* Estimate motion for every MB */
3409     if(s->pict_type != AV_PICTURE_TYPE_I){
3410         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3411         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3412         if (s->pict_type != AV_PICTURE_TYPE_B) {
3413             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3414                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3415             }
3416         }
3417
3418         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3419     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3420         /* I-Frame */
3421         for(i=0; i<s->mb_stride*s->mb_height; i++)
3422             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3423
3424         if(!s->fixed_qscale){
3425             /* finding spatial complexity for I-frame rate control */
3426             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3427         }
3428     }
3429     for(i=1; i<context_count; i++){
3430         merge_context_after_me(s, s->thread_context[i]);
3431     }
3432     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3433     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3434     emms_c();
3435
3436     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3437         s->pict_type= AV_PICTURE_TYPE_I;
3438         for(i=0; i<s->mb_stride*s->mb_height; i++)
3439             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3440         if(s->msmpeg4_version >= 3)
3441             s->no_rounding=1;
3442         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3443                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3444     }
3445
3446     if(!s->umvplus){
3447         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3448             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3449
3450             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3451                 int a,b;
3452                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3453                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3454                 s->f_code= FFMAX3(s->f_code, a, b);
3455             }
3456
3457             ff_fix_long_p_mvs(s);
3458             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3459             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3460                 int j;
3461                 for(i=0; i<2; i++){
3462                     for(j=0; j<2; j++)
3463                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3464                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3465                 }
3466             }
3467         }
3468
3469         if(s->pict_type==AV_PICTURE_TYPE_B){
3470             int a, b;
3471
3472             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3473             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3474             s->f_code = FFMAX(a, b);
3475
3476             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3477             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3478             s->b_code = FFMAX(a, b);
3479
3480             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3481             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3482             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3483             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3484             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3485                 int dir, j;
3486                 for(dir=0; dir<2; dir++){
3487                     for(i=0; i<2; i++){
3488                         for(j=0; j<2; j++){
3489                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3490                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3491                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3492                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3493                         }
3494                     }
3495                 }
3496             }
3497         }
3498     }
3499
3500     if (estimate_qp(s, 0) < 0)
3501         return -1;
3502
3503     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3504         s->qscale= 3; //reduce clipping problems
3505
3506     if (s->out_format == FMT_MJPEG) {
3507         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3508         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3509
3510         if (s->avctx->intra_matrix) {
3511             chroma_matrix =
3512             luma_matrix = s->avctx->intra_matrix;
3513         }
3514
3515         /* for mjpeg, we do include qscale in the matrix */
3516         for(i=1;i<64;i++){
3517             int j= s->dsp.idct_permutation[i];
3518
3519             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3520             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3521         }
3522         s->y_dc_scale_table=
3523         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3524         s->chroma_intra_matrix[0] =
3525         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3526         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3527                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3528         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3529                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3530         s->qscale= 8;
3531     }
3532     if(s->codec_id == AV_CODEC_ID_AMV){
3533         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3534         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3535         for(i=1;i<64;i++){
3536             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3537
3538             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3539             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3540         }
3541         s->y_dc_scale_table= y;
3542         s->c_dc_scale_table= c;
3543         s->intra_matrix[0] = 13;
3544         s->chroma_intra_matrix[0] = 14;
3545         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3546                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3547         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3548                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3549         s->qscale= 8;
3550     }
3551
3552     //FIXME var duplication
3553     s->current_picture_ptr->f.key_frame =
3554     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3555     s->current_picture_ptr->f.pict_type =
3556     s->current_picture.f.pict_type = s->pict_type;
3557
3558     if (s->current_picture.f.key_frame)
3559         s->picture_in_gop_number=0;
3560
3561     s->mb_x = s->mb_y = 0;
3562     s->last_bits= put_bits_count(&s->pb);
3563     switch(s->out_format) {
3564     case FMT_MJPEG:
3565         if (CONFIG_MJPEG_ENCODER)
3566             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3567                                            s->intra_matrix, s->chroma_intra_matrix);
3568         break;
3569     case FMT_H261:
3570         if (CONFIG_H261_ENCODER)
3571             ff_h261_encode_picture_header(s, picture_number);
3572         break;
3573     case FMT_H263:
3574         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3575             ff_wmv2_encode_picture_header(s, picture_number);
3576         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3577             ff_msmpeg4_encode_picture_header(s, picture_number);
3578         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3579             ff_mpeg4_encode_picture_header(s, picture_number);
3580         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3581             ff_rv10_encode_picture_header(s, picture_number);
3582         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3583             ff_rv20_encode_picture_header(s, picture_number);
3584         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3585             ff_flv_encode_picture_header(s, picture_number);
3586         else if (CONFIG_H263_ENCODER)
3587             ff_h263_encode_picture_header(s, picture_number);
3588         break;
3589     case FMT_MPEG1:
3590         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3591             ff_mpeg1_encode_picture_header(s, picture_number);
3592         break;
3593     default:
3594         av_assert0(0);
3595     }
3596     bits= put_bits_count(&s->pb);
3597     s->header_bits= bits - s->last_bits;
3598
3599     for(i=1; i<context_count; i++){
3600         update_duplicate_context_after_me(s->thread_context[i], s);
3601     }
3602     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3603     for(i=1; i<context_count; i++){
3604         merge_context_after_encode(s, s->thread_context[i]);
3605     }
3606     emms_c();
3607     return 0;
3608 }
3609
3610 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3611     const int intra= s->mb_intra;
3612     int i;
3613
3614     s->dct_count[intra]++;
3615
3616     for(i=0; i<64; i++){
3617         int level= block[i];
3618
3619         if(level){
3620             if(level>0){
3621                 s->dct_error_sum[intra][i] += level;
3622                 level -= s->dct_offset[intra][i];
3623                 if(level<0) level=0;
3624             }else{
3625                 s->dct_error_sum[intra][i] -= level;
3626                 level += s->dct_offset[intra][i];
3627                 if(level>0) level=0;
3628             }
3629             block[i]= level;
3630         }
3631     }
3632 }
3633
3634 static int dct_quantize_trellis_c(MpegEncContext *s,
3635                                   int16_t *block, int n,
3636                                   int qscale, int *overflow){
3637     const int *qmat;
3638     const uint8_t *scantable= s->intra_scantable.scantable;
3639     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3640     int max=0;
3641     unsigned int threshold1, threshold2;
3642     int bias=0;
3643     int run_tab[65];
3644     int level_tab[65];
3645     int score_tab[65];
3646     int survivor[65];
3647     int survivor_count;
3648     int last_run=0;
3649     int last_level=0;
3650     int last_score= 0;
3651     int last_i;
3652     int coeff[2][64];
3653     int coeff_count[64];
3654     int qmul, qadd, start_i, last_non_zero, i, dc;
3655     const int esc_length= s->ac_esc_length;
3656     uint8_t * length;
3657     uint8_t * last_length;
3658     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3659
3660     s->dsp.fdct (block);
3661
3662     if(s->dct_error_sum)
3663         s->denoise_dct(s, block);
3664     qmul= qscale*16;
3665     qadd= ((qscale-1)|1)*8;
3666
3667     if (s->mb_intra) {
3668         int q;
3669         if (!s->h263_aic) {
3670             if (n < 4)
3671                 q = s->y_dc_scale;
3672             else
3673                 q = s->c_dc_scale;
3674             q = q << 3;
3675         } else{
3676             /* For AIC we skip quant/dequant of INTRADC */
3677             q = 1 << 3;
3678             qadd=0;
3679         }
3680
3681         /* note: block[0] is assumed to be positive */
3682         block[0] = (block[0] + (q >> 1)) / q;
3683         start_i = 1;
3684         last_non_zero = 0;
3685         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3686         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3687             bias= 1<<(QMAT_SHIFT-1);
3688         length     = s->intra_ac_vlc_length;
3689         last_length= s->intra_ac_vlc_last_length;
3690     } else {
3691         start_i = 0;
3692         last_non_zero = -1;
3693         qmat = s->q_inter_matrix[qscale];
3694         length     = s->inter_ac_vlc_length;
3695         last_length= s->inter_ac_vlc_last_length;
3696     }
3697     last_i= start_i;
3698
3699     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3700     threshold2= (threshold1<<1);
3701
3702     for(i=63; i>=start_i; i--) {
3703         const int j = scantable[i];
3704         int level = block[j] * qmat[j];
3705
3706         if(((unsigned)(level+threshold1))>threshold2){
3707             last_non_zero = i;
3708             break;
3709         }
3710     }
3711
3712     for(i=start_i; i<=last_non_zero; i++) {
3713         const int j = scantable[i];
3714         int level = block[j] * qmat[j];
3715
3716 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3717 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3718         if(((unsigned)(level+threshold1))>threshold2){
3719             if(level>0){
3720                 level= (bias + level)>>QMAT_SHIFT;
3721                 coeff[0][i]= level;
3722                 coeff[1][i]= level-1;
3723 //                coeff[2][k]= level-2;
3724             }else{
3725                 level= (bias - level)>>QMAT_SHIFT;
3726                 coeff[0][i]= -level;
3727                 coeff[1][i]= -level+1;
3728 //                coeff[2][k]= -level+2;
3729             }
3730             coeff_count[i]= FFMIN(level, 2);
3731             av_assert2(coeff_count[i]);
3732             max |=level;
3733         }else{
3734             coeff[0][i]= (level>>31)|1;
3735             coeff_count[i]= 1;
3736         }
3737     }
3738
3739     *overflow= s->max_qcoeff < max; //overflow might have happened
3740
3741     if(last_non_zero < start_i){
3742         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3743         return last_non_zero;
3744     }
3745
3746     score_tab[start_i]= 0;
3747     survivor[0]= start_i;
3748     survivor_count= 1;
3749
3750     for(i=start_i; i<=last_non_zero; i++){
3751         int level_index, j, zero_distortion;
3752         int dct_coeff= FFABS(block[ scantable[i] ]);
3753         int best_score=256*256*256*120;
3754
3755         if (s->dsp.fdct == ff_fdct_ifast)
3756             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3757         zero_distortion= dct_coeff*dct_coeff;
3758
3759         for(level_index=0; level_index < coeff_count[i]; level_index++){
3760             int distortion;
3761             int level= coeff[level_index][i];
3762             const int alevel= FFABS(level);
3763             int unquant_coeff;
3764
3765             av_assert2(level);
3766
3767             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3768                 unquant_coeff= alevel*qmul + qadd;
3769             }else{ //MPEG1
3770                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3771                 if(s->mb_intra){
3772                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3773                         unquant_coeff =   (unquant_coeff - 1) | 1;
3774                 }else{
3775                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3776                         unquant_coeff =   (unquant_coeff - 1) | 1;
3777                 }
3778                 unquant_coeff<<= 3;
3779             }
3780
3781             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3782             level+=64;
3783             if((level&(~127)) == 0){
3784                 for(j=survivor_count-1; j>=0; j--){
3785                     int run= i - survivor[j];
3786                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3787                     score += score_tab[i-run];
3788
3789                     if(score < best_score){
3790                         best_score= score;
3791                         run_tab[i+1]= run;
3792                         level_tab[i+1]= level-64;
3793                     }
3794                 }
3795
3796                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3797                     for(j=survivor_count-1; j>=0; j--){
3798                         int run= i - survivor[j];
3799                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3800                         score += score_tab[i-run];
3801                         if(score < last_score){
3802                             last_score= score;
3803                             last_run= run;
3804                             last_level= level-64;
3805                             last_i= i+1;
3806                         }
3807                     }
3808                 }
3809             }else{
3810                 distortion += esc_length*lambda;
3811                 for(j=survivor_count-1; j>=0; j--){
3812                     int run= i - survivor[j];
3813                     int score= distortion + score_tab[i-run];
3814
3815                     if(score < best_score){
3816                         best_score= score;
3817                         run_tab[i+1]= run;
3818                         level_tab[i+1]= level-64;
3819                     }
3820                 }
3821
3822                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3823                   for(j=survivor_count-1; j>=0; j--){
3824                         int run= i - survivor[j];
3825                         int score= distortion + score_tab[i-run];
3826                         if(score < last_score){
3827                             last_score= score;
3828                             last_run= run;
3829                             last_level= level-64;
3830                             last_i= i+1;
3831                         }
3832                     }
3833                 }
3834             }
3835         }
3836
3837         score_tab[i+1]= best_score;
3838
3839         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3840         if(last_non_zero <= 27){
3841             for(; survivor_count; survivor_count--){
3842                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3843                     break;
3844             }
3845         }else{
3846             for(; survivor_count; survivor_count--){
3847                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3848                     break;
3849             }
3850         }
3851
3852         survivor[ survivor_count++ ]= i+1;
3853     }
3854
3855     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3856         last_score= 256*256*256*120;
3857         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3858             int score= score_tab[i];
3859             if(i) score += lambda*2; //FIXME exacter?
3860
3861             if(score < last_score){
3862                 last_score= score;
3863                 last_i= i;
3864                 last_level= level_tab[i];
3865                 last_run= run_tab[i];
3866             }
3867         }
3868     }
3869
3870     s->coded_score[n] = last_score;
3871
3872     dc= FFABS(block[0]);
3873     last_non_zero= last_i - 1;
3874     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3875
3876     if(last_non_zero < start_i)
3877         return last_non_zero;
3878
3879     if(last_non_zero == 0 && start_i == 0){
3880         int best_level= 0;
3881         int best_score= dc * dc;
3882
3883         for(i=0; i<coeff_count[0]; i++){
3884             int level= coeff[i][0];
3885             int alevel= FFABS(level);
3886             int unquant_coeff, score, distortion;
3887
3888             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3889                     unquant_coeff= (alevel*qmul + qadd)>>3;
3890             }else{ //MPEG1
3891                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3892                     unquant_coeff =   (unquant_coeff - 1) | 1;
3893             }
3894             unquant_coeff = (unquant_coeff + 4) >> 3;
3895             unquant_coeff<<= 3 + 3;
3896
3897             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3898             level+=64;
3899             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3900             else                    score= distortion + esc_length*lambda;
3901
3902             if(score < best_score){
3903                 best_score= score;
3904                 best_level= level - 64;
3905             }
3906         }
3907         block[0]= best_level;
3908         s->coded_score[n] = best_score - dc*dc;
3909         if(best_level == 0) return -1;
3910         else                return last_non_zero;
3911     }
3912
3913     i= last_i;
3914     av_assert2(last_level);
3915
3916     block[ perm_scantable[last_non_zero] ]= last_level;
3917     i -= last_run + 1;
3918
3919     for(; i>start_i; i -= run_tab[i] + 1){
3920         block[ perm_scantable[i-1] ]= level_tab[i];
3921     }
3922
3923     return last_non_zero;
3924 }
3925
3926 //#define REFINE_STATS 1
3927 static int16_t basis[64][64];
3928
3929 static void build_basis(uint8_t *perm){
3930     int i, j, x, y;
3931     emms_c();
3932     for(i=0; i<8; i++){
3933         for(j=0; j<8; j++){
3934             for(y=0; y<8; y++){
3935                 for(x=0; x<8; x++){
3936                     double s= 0.25*(1<<BASIS_SHIFT);
3937                     int index= 8*i + j;
3938                     int perm_index= perm[index];
3939                     if(i==0) s*= sqrt(0.5);
3940                     if(j==0) s*= sqrt(0.5);
3941                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3942                 }
3943             }
3944         }
3945     }
3946 }
3947
3948 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3949                         int16_t *block, int16_t *weight, int16_t *orig,
3950                         int n, int qscale){
3951     int16_t rem[64];
3952     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3953     const uint8_t *scantable= s->intra_scantable.scantable;
3954     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3955 //    unsigned int threshold1, threshold2;
3956 //    int bias=0;
3957     int run_tab[65];
3958     int prev_run=0;
3959     int prev_level=0;
3960     int qmul, qadd, start_i, last_non_zero, i, dc;
3961     uint8_t * length;
3962     uint8_t * last_length;
3963     int lambda;
3964     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3965 #ifdef REFINE_STATS
3966 static int count=0;
3967 static int after_last=0;
3968 static int to_zero=0;
3969 static int from_zero=0;
3970 static int raise=0;
3971 static int lower=0;
3972 static int messed_sign=0;
3973 #endif
3974
3975     if(basis[0][0] == 0)
3976         build_basis(s->dsp.idct_permutation);
3977
3978     qmul= qscale*2;
3979     qadd= (qscale-1)|1;
3980     if (s->mb_intra) {
3981         if (!s->h263_aic) {
3982             if (n < 4)
3983                 q = s->y_dc_scale;
3984             else
3985                 q = s->c_dc_scale;
3986         } else{
3987             /* For AIC we skip quant/dequant of INTRADC */
3988             q = 1;
3989             qadd=0;
3990         }
3991         q <<= RECON_SHIFT-3;
3992         /* note: block[0] is assumed to be positive */
3993         dc= block[0]*q;
3994 //        block[0] = (block[0] + (q >> 1)) / q;
3995         start_i = 1;
3996 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3997 //            bias= 1<<(QMAT_SHIFT-1);
3998         length     = s->intra_ac_vlc_length;
3999         last_length= s->intra_ac_vlc_last_length;
4000     } else {
4001         dc= 0;
4002         start_i = 0;
4003         length     = s->inter_ac_vlc_length;
4004         last_length= s->inter_ac_vlc_last_length;
4005     }
4006     last_non_zero = s->block_last_index[n];
4007
4008 #ifdef REFINE_STATS
4009 {START_TIMER
4010 #endif
4011     dc += (1<<(RECON_SHIFT-1));
4012     for(i=0; i<64; i++){
4013         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4014     }
4015 #ifdef REFINE_STATS
4016 STOP_TIMER("memset rem[]")}
4017 #endif
4018     sum=0;
4019     for(i=0; i<64; i++){
4020         int one= 36;
4021         int qns=4;
4022         int w;
4023
4024         w= FFABS(weight[i]) + qns*one;
4025         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4026
4027         weight[i] = w;
4028 //        w=weight[i] = (63*qns + (w/2)) / w;
4029
4030         av_assert2(w>0);
4031         av_assert2(w<(1<<6));
4032         sum += w*w;
4033     }
4034     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4035 #ifdef REFINE_STATS
4036 {START_TIMER
4037 #endif
4038     run=0;
4039     rle_index=0;
4040     for(i=start_i; i<=last_non_zero; i++){
4041         int j= perm_scantable[i];
4042         const int level= block[j];
4043         int coeff;
4044
4045         if(level){
4046             if(level<0) coeff= qmul*level - qadd;
4047             else        coeff= qmul*level + qadd;
4048             run_tab[rle_index++]=run;
4049             run=0;
4050
4051             s->dsp.add_8x8basis(rem, basis[j], coeff);
4052         }else{
4053             run++;
4054         }
4055     }
4056 #ifdef REFINE_STATS
4057 if(last_non_zero>0){
4058 STOP_TIMER("init rem[]")
4059 }
4060 }
4061
4062 {START_TIMER
4063 #endif
4064     for(;;){
4065         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
4066         int best_coeff=0;
4067         int best_change=0;
4068         int run2, best_unquant_change=0, analyze_gradient;
4069 #ifdef REFINE_STATS
4070 {START_TIMER
4071 #endif
4072         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4073
4074         if(analyze_gradient){
4075 #ifdef REFINE_STATS
4076 {START_TIMER
4077 #endif
4078             for(i=0; i<64; i++){
4079                 int w= weight[i];
4080
4081                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4082             }
4083 #ifdef REFINE_STATS
4084 STOP_TIMER("rem*w*w")}
4085 {START_TIMER
4086 #endif
4087             s->dsp.fdct(d1);
4088 #ifdef REFINE_STATS
4089 STOP_TIMER("dct")}
4090 #endif
4091         }
4092
4093         if(start_i){
4094             const int level= block[0];
4095             int change, old_coeff;
4096
4097             av_assert2(s->mb_intra);
4098
4099             old_coeff= q*level;
4100
4101             for(change=-1; change<=1; change+=2){
4102                 int new_level= level + change;
4103                 int score, new_coeff;
4104
4105                 new_coeff= q*new_level;
4106                 if(new_coeff >= 2048 || new_coeff < 0)
4107                     continue;
4108
4109                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
4110                 if(score<best_score){
4111                     best_score= score;
4112                     best_coeff= 0;
4113                     best_change= change;
4114                     best_unquant_change= new_coeff - old_coeff;
4115                 }
4116             }
4117         }
4118
4119         run=0;
4120         rle_index=0;
4121         run2= run_tab[rle_index++];
4122         prev_level=0;
4123         prev_run=0;
4124
4125         for(i=start_i; i<64; i++){
4126             int j= perm_scantable[i];
4127             const int level= block[j];
4128             int change, old_coeff;
4129
4130             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4131                 break;
4132
4133             if(level){
4134                 if(level<0) old_coeff= qmul*level - qadd;
4135                 else        old_coeff= qmul*level + qadd;
4136                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4137             }else{
4138                 old_coeff=0;
4139                 run2--;
4140                 av_assert2(run2>=0 || i >= last_non_zero );
4141             }
4142
4143             for(change=-1; change<=1; change+=2){
4144                 int new_level= level + change;
4145                 int score, new_coeff, unquant_change;
4146
4147                 score=0;
4148                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4149                    continue;
4150
4151                 if(new_level){
4152                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4153                     else            new_coeff= qmul*new_level + qadd;
4154                     if(new_coeff >= 2048 || new_coeff <= -2048)
4155                         continue;
4156                     //FIXME check for overflow
4157
4158                     if(level){
4159                         if(level < 63 && level > -63){
4160                             if(i < last_non_zero)
4161                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4162                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4163                             else
4164                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4165                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4166                         }
4167                     }else{
4168                         av_assert2(FFABS(new_level)==1);
4169
4170                         if(analyze_gradient){
4171                             int g= d1[ scantable[i] ];
4172                             if(g && (g^new_level) >= 0)
4173                                 continue;
4174                         }
4175
4176                         if(i < last_non_zero){
4177                             int next_i= i + run2 + 1;
4178                             int next_level= block[ perm_scantable[next_i] ] + 64;
4179
4180                             if(next_level&(~127))
4181                                 next_level= 0;
4182
4183                             if(next_i < last_non_zero)
4184                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4185                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4186                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4187                             else
4188                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4189                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4190                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4191                         }else{
4192                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4193                             if(prev_level){
4194                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4195                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4196                             }
4197                         }
4198                     }
4199                 }else{
4200                     new_coeff=0;
4201                     av_assert2(FFABS(level)==1);
4202
4203                     if(i < last_non_zero){
4204                         int next_i= i + run2 + 1;
4205                         int next_level= block[ perm_scantable[next_i] ] + 64;
4206
4207                         if(next_level&(~127))
4208                             next_level= 0;
4209
4210                         if(next_i < last_non_zero)
4211                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4212                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4213                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4214                         else
4215                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4216                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4217                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4218                     }else{
4219                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4220                         if(prev_level){
4221                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4222                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4223                         }
4224                     }
4225                 }
4226
4227                 score *= lambda;
4228
4229                 unquant_change= new_coeff - old_coeff;
4230                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4231
4232                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4233                 if(score<best_score){
4234                     best_score= score;
4235                     best_coeff= i;
4236                     best_change= change;
4237                     best_unquant_change= unquant_change;
4238                 }
4239             }
4240             if(level){
4241                 prev_level= level + 64;
4242                 if(prev_level&(~127))
4243                     prev_level= 0;
4244                 prev_run= run;
4245                 run=0;
4246             }else{
4247                 run++;
4248             }
4249         }
4250 #ifdef REFINE_STATS
4251 STOP_TIMER("iterative step")}
4252 #endif
4253
4254         if(best_change){
4255             int j= perm_scantable[ best_coeff ];
4256
4257             block[j] += best_change;
4258
4259             if(best_coeff > last_non_zero){
4260                 last_non_zero= best_coeff;
4261                 av_assert2(block[j]);
4262 #ifdef REFINE_STATS
4263 after_last++;
4264 #endif
4265             }else{
4266 #ifdef REFINE_STATS
4267 if(block[j]){
4268     if(block[j] - best_change){
4269         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4270             raise++;
4271         }else{
4272             lower++;
4273         }
4274     }else{
4275         from_zero++;
4276     }
4277 }else{
4278     to_zero++;
4279 }
4280 #endif
4281                 for(; last_non_zero>=start_i; last_non_zero--){
4282                     if(block[perm_scantable[last_non_zero]])
4283                         break;
4284                 }
4285             }
4286 #ifdef REFINE_STATS
4287 count++;
4288 if(256*256*256*64 % count == 0){
4289     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4290 }
4291 #endif
4292             run=0;
4293             rle_index=0;
4294             for(i=start_i; i<=last_non_zero; i++){
4295                 int j= perm_scantable[i];
4296                 const int level= block[j];
4297
4298                  if(level){
4299                      run_tab[rle_index++]=run;
4300                      run=0;
4301                  }else{
4302                      run++;
4303                  }
4304             }
4305
4306             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4307         }else{
4308             break;
4309         }
4310     }
4311 #ifdef REFINE_STATS
4312 if(last_non_zero>0){
4313 STOP_TIMER("iterative search")
4314 }
4315 }
4316 #endif
4317
4318     return last_non_zero;
4319 }
4320
4321 int ff_dct_quantize_c(MpegEncContext *s,
4322                         int16_t *block, int n,
4323                         int qscale, int *overflow)
4324 {
4325     int i, j, level, last_non_zero, q, start_i;
4326     const int *qmat;
4327     const uint8_t *scantable= s->intra_scantable.scantable;
4328     int bias;
4329     int max=0;
4330     unsigned int threshold1, threshold2;
4331
4332     s->dsp.fdct (block);
4333
4334     if(s->dct_error_sum)
4335         s->denoise_dct(s, block);
4336
4337     if (s->mb_intra) {
4338         if (!s->h263_aic) {
4339             if (n < 4)
4340                 q = s->y_dc_scale;
4341             else
4342                 q = s->c_dc_scale;
4343             q = q << 3;
4344         } else
4345             /* For AIC we skip quant/dequant of INTRADC */
4346             q = 1 << 3;
4347
4348         /* note: block[0] is assumed to be positive */
4349         block[0] = (block[0] + (q >> 1)) / q;
4350         start_i = 1;
4351         last_non_zero = 0;
4352         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4353         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4354     } else {
4355         start_i = 0;
4356         last_non_zero = -1;
4357         qmat = s->q_inter_matrix[qscale];
4358         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4359     }
4360     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4361     threshold2= (threshold1<<1);
4362     for(i=63;i>=start_i;i--) {
4363         j = scantable[i];
4364         level = block[j] * qmat[j];
4365
4366         if(((unsigned)(level+threshold1))>threshold2){
4367             last_non_zero = i;
4368             break;
4369         }else{
4370             block[j]=0;
4371         }
4372     }
4373     for(i=start_i; i<=last_non_zero; i++) {
4374         j = scantable[i];
4375         level = block[j] * qmat[j];
4376
4377 //        if(   bias+level >= (1<<QMAT_SHIFT)
4378 //           || bias-level >= (1<<QMAT_SHIFT)){
4379         if(((unsigned)(level+threshold1))>threshold2){
4380             if(level>0){
4381                 level= (bias + level)>>QMAT_SHIFT;
4382                 block[j]= level;
4383             }else{
4384                 level= (bias - level)>>QMAT_SHIFT;
4385                 block[j]= -level;
4386             }
4387             max |=level;
4388         }else{
4389             block[j]=0;
4390         }
4391     }
4392     *overflow= s->max_qcoeff < max; //overflow might have happened
4393
4394     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4395     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4396         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4397
4398     return last_non_zero;
4399 }
4400
4401 #define OFFSET(x) offsetof(MpegEncContext, x)
4402 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4403 static const AVOption h263_options[] = {
4404     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4405     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4406     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4407     FF_MPV_COMMON_OPTS
4408     { NULL },
4409 };
4410
4411 static const AVClass h263_class = {
4412     .class_name = "H.263 encoder",
4413     .item_name  = av_default_item_name,
4414     .option     = h263_options,
4415     .version    = LIBAVUTIL_VERSION_INT,
4416 };
4417
4418 AVCodec ff_h263_encoder = {
4419     .name           = "h263",
4420     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4421     .type           = AVMEDIA_TYPE_VIDEO,
4422     .id             = AV_CODEC_ID_H263,
4423     .priv_data_size = sizeof(MpegEncContext),
4424     .init           = ff_MPV_encode_init,
4425     .encode2        = ff_MPV_encode_picture,
4426     .close          = ff_MPV_encode_end,
4427     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4428     .priv_class     = &h263_class,
4429 };
4430
4431 static const AVOption h263p_options[] = {
4432     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4433     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4434     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4435     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4436     FF_MPV_COMMON_OPTS
4437     { NULL },
4438 };
4439 static const AVClass h263p_class = {
4440     .class_name = "H.263p encoder",
4441     .item_name  = av_default_item_name,
4442     .option     = h263p_options,
4443     .version    = LIBAVUTIL_VERSION_INT,
4444 };
4445
4446 AVCodec ff_h263p_encoder = {
4447     .name           = "h263p",
4448     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4449     .type           = AVMEDIA_TYPE_VIDEO,
4450     .id             = AV_CODEC_ID_H263P,
4451     .priv_data_size = sizeof(MpegEncContext),
4452     .init           = ff_MPV_encode_init,
4453     .encode2        = ff_MPV_encode_picture,
4454     .close          = ff_MPV_encode_end,
4455     .capabilities   = CODEC_CAP_SLICE_THREADS,
4456     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4457     .priv_class     = &h263p_class,
4458 };
4459
4460 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4461
4462 AVCodec ff_msmpeg4v2_encoder = {
4463     .name           = "msmpeg4v2",
4464     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4465     .type           = AVMEDIA_TYPE_VIDEO,
4466     .id             = AV_CODEC_ID_MSMPEG4V2,
4467     .priv_data_size = sizeof(MpegEncContext),
4468     .init           = ff_MPV_encode_init,
4469     .encode2        = ff_MPV_encode_picture,
4470     .close          = ff_MPV_encode_end,
4471     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4472     .priv_class     = &msmpeg4v2_class,
4473 };
4474
4475 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4476
4477 AVCodec ff_msmpeg4v3_encoder = {
4478     .name           = "msmpeg4",
4479     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4480     .type           = AVMEDIA_TYPE_VIDEO,
4481     .id             = AV_CODEC_ID_MSMPEG4V3,
4482     .priv_data_size = sizeof(MpegEncContext),
4483     .init           = ff_MPV_encode_init,
4484     .encode2        = ff_MPV_encode_picture,
4485     .close          = ff_MPV_encode_end,
4486     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4487     .priv_class     = &msmpeg4v3_class,
4488 };
4489
4490 FF_MPV_GENERIC_CLASS(wmv1)
4491
4492 AVCodec ff_wmv1_encoder = {
4493     .name           = "wmv1",
4494     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4495     .type           = AVMEDIA_TYPE_VIDEO,
4496     .id             = AV_CODEC_ID_WMV1,
4497     .priv_data_size = sizeof(MpegEncContext),
4498     .init           = ff_MPV_encode_init,
4499     .encode2        = ff_MPV_encode_picture,
4500     .close          = ff_MPV_encode_end,
4501     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4502     .priv_class     = &wmv1_class,
4503 };