git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * The simplest mpeg encoder (well, it was the simplest!).
  28  */
  29
  30 #include "libavutil/intmath.h"
  31 #include "libavutil/mathematics.h"
  32 #include "libavutil/opt.h"
  33 #include "avcodec.h"
  34 #include "dsputil.h"
  35 #include "mpegvideo.h"
  36 #include "mpegvideo_common.h"
  37 #include "h263.h"
  38 #include "mjpegenc.h"
  39 #include "msmpeg4.h"
  40 #include "faandct.h"
  41 #include "thread.h"
  42 #include "aandcttab.h"
  43 #include "flv.h"
  44 #include "mpeg4video.h"
  45 #include "internal.h"
  46 #include <limits.h>
  47 #include "sp5x.h"
  48
  49 //#undef NDEBUG
  50 //#include <assert.h>
  51
  52 static int encode_picture(MpegEncContext *s, int picture_number);
  53 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
  54 static int sse_mb(MpegEncContext *s);
  55 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
  56 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  57
  58 /* enable all paranoid tests for rounding, overflows, etc... */
  59 //#define PARANOID
  60
  61 //#define DEBUG
  62
  63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
  64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  65
  66 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
  67                        uint16_t (*qmat16)[2][64],
  68                        const uint16_t *quant_matrix,
  69                        int bias, int qmin, int qmax, int intra)
  70 {
  71     int qscale;
  72     int shift = 0;
  73
  74     for (qscale = qmin; qscale <= qmax; qscale++) {
  75         int i;
  76         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
  77             dsp->fdct == ff_jpeg_fdct_islow_10
  78 #ifdef FAAN_POSTSCALE
  79             || dsp->fdct == ff_faandct
  80 #endif
  81             ) {
  82             for (i = 0; i < 64; i++) {
  83                 const int j = dsp->idct_permutation[i];
  84                 /* 16 <= qscale * quant_matrix[i] <= 7905
  85                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
  86                  *             19952 <=              x  <= 249205026
  87                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
  88                  *           3444240 >= (1 << 36) / (x) >= 275 */
  89
  90                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
  91                                         (qscale * quant_matrix[j]));
  92             }
  93         } else if (dsp->fdct == fdct_ifast
  94 #ifndef FAAN_POSTSCALE
  95                    || dsp->fdct == ff_faandct
  96 #endif
  97                    ) {
  98             for (i = 0; i < 64; i++) {
  99                 const int j = dsp->idct_permutation[i];
 100                 /* 16 <= qscale * quant_matrix[i] <= 7905
 101                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
 102                  *             19952 <=              x  <= 249205026
 103                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
 104                  *           3444240 >= (1 << 36) / (x) >= 275 */
 105
 106                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 107                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
 108             }
 109         } else {
 110             for (i = 0; i < 64; i++) {
 111                 const int j = dsp->idct_permutation[i];
 112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 113                  * Assume x = qscale * quant_matrix[i]
 114                  * So             16 <=              x  <= 7905
 115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
 116                  * so          32768 >= (1 << 19) / (x) >= 67 */
 117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 118                                         (qscale * quant_matrix[j]));
 119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
 120                 //                    (qscale * quant_matrix[i]);
 121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
 122                                        (qscale * quant_matrix[j]);
 123
 124                 if (qmat16[qscale][0][i] == 0 ||
 125                     qmat16[qscale][0][i] == 128 * 256)
 126                     qmat16[qscale][0][i] = 128 * 256 - 1;
 127                 qmat16[qscale][1][i] =
 128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
 129                                 qmat16[qscale][0][i]);
 130             }
 131         }
 132
 133         for (i = intra; i < 64; i++) {
 134             int64_t max = 8191;
 135             if (dsp->fdct == fdct_ifast
 136 #ifndef FAAN_POSTSCALE
 137                 || dsp->fdct == ff_faandct
 138 #endif
 139                ) {
 140                 max = (8191LL * ff_aanscales[i]) >> 14;
 141             }
 142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 143                 shift++;
 144             }
 145         }
 146     }
 147     if (shift) {
 148         av_log(NULL, AV_LOG_INFO,
 149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 150                QMAT_SHIFT - shift);
 151     }
 152 }
 153
 154 static inline void update_qscale(MpegEncContext *s)
 155 {
 156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 157                 (FF_LAMBDA_SHIFT + 7);
 158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
 159
 160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 161                  FF_LAMBDA_SHIFT;
 162 }
 163
 164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 165 {
 166     int i;
 167
 168     if (matrix) {
 169         put_bits(pb, 1, 1);
 170         for (i = 0; i < 64; i++) {
 171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 172         }
 173     } else
 174         put_bits(pb, 1, 0);
 175 }
 176
 177 /**
 178  * init s->current_picture.qscale_table from s->lambda_table
 179  */
 180 void ff_init_qscale_tab(MpegEncContext *s)
 181 {
 182     int8_t * const qscale_table = s->current_picture.f.qscale_table;
 183     int i;
 184
 185     for (i = 0; i < s->mb_num; i++) {
 186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 189                                                   s->avctx->qmax);
 190     }
 191 }
 192
 193 static void copy_picture_attributes(MpegEncContext *s,
 194                                     AVFrame *dst,
 195                                     AVFrame *src)
 196 {
 197     int i;
 198
 199     dst->pict_type              = src->pict_type;
 200     dst->quality                = src->quality;
 201     dst->coded_picture_number   = src->coded_picture_number;
 202     dst->display_picture_number = src->display_picture_number;
 203     //dst->reference              = src->reference;
 204     dst->pts                    = src->pts;
 205     dst->interlaced_frame       = src->interlaced_frame;
 206     dst->top_field_first        = src->top_field_first;
 207
 208     if (s->avctx->me_threshold) {
 209         if (!src->motion_val[0])
 210             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
 211         if (!src->mb_type)
 212             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
 213         if (!src->ref_index[0])
 214             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
 215         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
 216             av_log(s->avctx, AV_LOG_ERROR,
 217                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
 218                    src->motion_subsample_log2, dst->motion_subsample_log2);
 219
 220         memcpy(dst->mb_type, src->mb_type,
 221                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
 222
 223         for (i = 0; i < 2; i++) {
 224             int stride = ((16 * s->mb_width ) >>
 225                           src->motion_subsample_log2) + 1;
 226             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
 227
 228             if (src->motion_val[i] &&
 229                 src->motion_val[i] != dst->motion_val[i]) {
 230                 memcpy(dst->motion_val[i], src->motion_val[i],
 231                        2 * stride * height * sizeof(int16_t));
 232             }
 233             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
 234                 memcpy(dst->ref_index[i], src->ref_index[i],
 235                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
 236             }
 237         }
 238     }
 239 }
 240
 241 static void update_duplicate_context_after_me(MpegEncContext *dst,
 242                                               MpegEncContext *src)
 243 {
 244 #define COPY(a) dst->a= src->a
 245     COPY(pict_type);
 246     COPY(current_picture);
 247     COPY(f_code);
 248     COPY(b_code);
 249     COPY(qscale);
 250     COPY(lambda);
 251     COPY(lambda2);
 252     COPY(picture_in_gop_number);
 253     COPY(gop_picture_number);
 254     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 255     COPY(progressive_frame);    // FIXME don't set in encode_header
 256     COPY(partitioned_frame);    // FIXME don't set in encode_header
 257 #undef COPY
 258 }
 259
 260 /**
 261  * Set the given MpegEncContext to defaults for encoding.
 262  * the changed fields will not depend upon the prior state of the MpegEncContext.
 263  */
 264 static void MPV_encode_defaults(MpegEncContext *s)
 265 {
 266     int i;
 267     MPV_common_defaults(s);
 268
 269     for (i = -16; i < 16; i++) {
 270         default_fcode_tab[i + MAX_MV] = 1;
 271     }
 272     s->me.mv_penalty = default_mv_penalty;
 273     s->fcode_tab     = default_fcode_tab;
 274 }
 275
 276 /* init video encoder */
 277 av_cold int MPV_encode_init(AVCodecContext *avctx)
 278 {
 279     MpegEncContext *s = avctx->priv_data;
 280     int i;
 281     int chroma_h_shift, chroma_v_shift;
 282
 283     MPV_encode_defaults(s);
 284
 285     switch (avctx->codec_id) {
 286     case CODEC_ID_MPEG2VIDEO:
 287         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
 288             avctx->pix_fmt != PIX_FMT_YUV422P) {
 289             av_log(avctx, AV_LOG_ERROR,
 290                    "only YUV420 and YUV422 are supported\n");
 291             return -1;
 292         }
 293         break;
 294     case CODEC_ID_LJPEG:
 295         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
 296             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 297             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
 298             avctx->pix_fmt != PIX_FMT_BGRA     &&
 299             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
 300               avctx->pix_fmt != PIX_FMT_YUV422P &&
 301               avctx->pix_fmt != PIX_FMT_YUV444P) ||
 302              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 303             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
 304             return -1;
 305         }
 306         break;
 307     case CODEC_ID_MJPEG:
 308     case CODEC_ID_AMV:
 309         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
 310             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
 311             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
 312               avctx->pix_fmt != PIX_FMT_YUV422P) ||
 313              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
 314             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
 315             return -1;
 316         }
 317         break;
 318     default:
 319         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
 320             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
 321             return -1;
 322         }
 323     }
 324
 325     switch (avctx->pix_fmt) {
 326     case PIX_FMT_YUVJ422P:
 327     case PIX_FMT_YUV422P:
 328         s->chroma_format = CHROMA_422;
 329         break;
 330     case PIX_FMT_YUVJ420P:
 331     case PIX_FMT_YUV420P:
 332     default:
 333         s->chroma_format = CHROMA_420;
 334         break;
 335     }
 336
 337     s->bit_rate = avctx->bit_rate;
 338     s->width    = avctx->width;
 339     s->height   = avctx->height;
 340     if (avctx->gop_size > 600 &&
 341         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 342         av_log(avctx, AV_LOG_WARNING,
 343                "keyframe interval too large!, reducing it from %d to %d\n",
 344                avctx->gop_size, 600);
 345         avctx->gop_size = 600;
 346     }
 347     s->gop_size     = avctx->gop_size;
 348     s->avctx        = avctx;
 349     s->flags        = avctx->flags;
 350     s->flags2       = avctx->flags2;
 351     s->max_b_frames = avctx->max_b_frames;
 352     s->codec_id     = avctx->codec->id;
 353     s->luma_elim_threshold   = avctx->luma_elim_threshold;
 354     s->chroma_elim_threshold = avctx->chroma_elim_threshold;
 355     s->strict_std_compliance = avctx->strict_std_compliance;
 356     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
 357     s->mpeg_quant         = avctx->mpeg_quant;
 358     s->rtp_mode           = !!avctx->rtp_payload_size;
 359     s->intra_dc_precision = avctx->intra_dc_precision;
 360     s->user_specified_pts = AV_NOPTS_VALUE;
 361
 362     if (s->gop_size <= 1) {
 363         s->intra_only = 1;
 364         s->gop_size   = 12;
 365     } else {
 366         s->intra_only = 0;
 367     }
 368
 369     s->me_method = avctx->me_method;
 370
 371     /* Fixed QSCALE */
 372     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
 373
 374     s->adaptive_quant = (s->avctx->lumi_masking ||
 375                          s->avctx->dark_masking ||
 376                          s->avctx->temporal_cplx_masking ||
 377                          s->avctx->spatial_cplx_masking  ||
 378                          s->avctx->p_masking      ||
 379                          s->avctx->border_masking ||
 380                          (s->flags & CODEC_FLAG_QP_RD)) &&
 381                         !s->fixed_qscale;
 382
 383     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
 384
 385     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 386         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 387         return -1;
 388     }
 389
 390     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 391         av_log(avctx, AV_LOG_INFO,
 392                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 393     }
 394
 395     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 396         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 397         return -1;
 398     }
 399
 400     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 401         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 402         return -1;
 403     }
 404
 405     if (avctx->rc_max_rate &&
 406         avctx->rc_max_rate == avctx->bit_rate &&
 407         avctx->rc_max_rate != avctx->rc_min_rate) {
 408         av_log(avctx, AV_LOG_INFO,
 409                "impossible bitrate constraints, this will fail\n");
 410     }
 411
 412     if (avctx->rc_buffer_size &&
 413         avctx->bit_rate * (int64_t)avctx->time_base.num >
 414             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 415         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 416         return -1;
 417     }
 418
 419     if (!s->fixed_qscale &&
 420         avctx->bit_rate * av_q2d(avctx->time_base) >
 421             avctx->bit_rate_tolerance) {
 422         av_log(avctx, AV_LOG_ERROR,
 423                "bitrate tolerance too small for bitrate\n");
 424         return -1;
 425     }
 426
 427     if (s->avctx->rc_max_rate &&
 428         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
 429         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
 430          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
 431         90000LL * (avctx->rc_buffer_size - 1) >
 432             s->avctx->rc_max_rate * 0xFFFFLL) {
 433         av_log(avctx, AV_LOG_INFO,
 434                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 435                "specified vbv buffer is too large for the given bitrate!\n");
 436     }
 437
 438     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
 439         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
 440         s->codec_id != CODEC_ID_FLV1) {
 441         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 442         return -1;
 443     }
 444
 445     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 446         av_log(avctx, AV_LOG_ERROR,
 447                "OBMC is only supported with simple mb decision\n");
 448         return -1;
 449     }
 450
 451     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
 452         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 453         return -1;
 454     }
 455
 456     if (s->max_b_frames                    &&
 457         s->codec_id != CODEC_ID_MPEG4      &&
 458         s->codec_id != CODEC_ID_MPEG1VIDEO &&
 459         s->codec_id != CODEC_ID_MPEG2VIDEO) {
 460         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
 461         return -1;
 462     }
 463
 464     if ((s->codec_id == CODEC_ID_MPEG4 ||
 465          s->codec_id == CODEC_ID_H263  ||
 466          s->codec_id == CODEC_ID_H263P) &&
 467         (avctx->sample_aspect_ratio.num > 255 ||
 468          avctx->sample_aspect_ratio.den > 255)) {
 469         av_log(avctx, AV_LOG_WARNING,
 470                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 471                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 472         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 473                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 474     }
 475
 476     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
 477         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
 478         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 479         return -1;
 480     }
 481
 482     // FIXME mpeg2 uses that too
 483     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
 484         av_log(avctx, AV_LOG_ERROR,
 485                "mpeg2 style quantization not supported by codec\n");
 486         return -1;
 487     }
 488
 489     if ((s->flags & CODEC_FLAG_CBP_RD) && !avctx->trellis) {
 490         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 491         return -1;
 492     }
 493
 494     if ((s->flags & CODEC_FLAG_QP_RD) &&
 495         s->avctx->mb_decision != FF_MB_DECISION_RD) {
 496         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
 497         return -1;
 498     }
 499
 500     if (s->avctx->scenechange_threshold < 1000000000 &&
 501         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
 502         av_log(avctx, AV_LOG_ERROR,
 503                "closed gop with scene change detection are not supported yet, "
 504                "set threshold to 1000000000\n");
 505         return -1;
 506     }
 507
 508     if (s->flags & CODEC_FLAG_LOW_DELAY) {
 509         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
 510             av_log(avctx, AV_LOG_ERROR,
 511                   "low delay forcing is only available for mpeg2\n");
 512             return -1;
 513         }
 514         if (s->max_b_frames != 0) {
 515             av_log(avctx, AV_LOG_ERROR,
 516                    "b frames cannot be used with low delay\n");
 517             return -1;
 518         }
 519     }
 520
 521     if (s->q_scale_type == 1) {
 522         if (avctx->qmax > 12) {
 523             av_log(avctx, AV_LOG_ERROR,
 524                    "non linear quant only supports qmax <= 12 currently\n");
 525             return -1;
 526         }
 527     }
 528
 529     if (s->avctx->thread_count > 1         &&
 530         s->codec_id != CODEC_ID_MPEG4      &&
 531         s->codec_id != CODEC_ID_MPEG1VIDEO &&
 532         s->codec_id != CODEC_ID_MPEG2VIDEO &&
 533         (s->codec_id != CODEC_ID_H263P)) {
 534         av_log(avctx, AV_LOG_ERROR,
 535                "multi threaded encoding not supported by codec\n");
 536         return -1;
 537     }
 538
 539     if (s->avctx->thread_count < 1) {
 540         av_log(avctx, AV_LOG_ERROR,
 541                "automatic thread number detection not supported by codec, "
 542                "patch welcome\n");
 543         return -1;
 544     }
 545
 546     if (s->avctx->thread_count > 1)
 547         s->rtp_mode = 1;
 548
 549     if (!avctx->time_base.den || !avctx->time_base.num) {
 550         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
 551         return -1;
 552     }
 553
 554     i = (INT_MAX / 2 + 128) >> 8;
 555     if (avctx->me_threshold >= i) {
 556         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
 557                i - 1);
 558         return -1;
 559     }
 560     if (avctx->mb_threshold >= i) {
 561         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
 562                i - 1);
 563         return -1;
 564     }
 565
 566     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
 567         av_log(avctx, AV_LOG_INFO,
 568                "notice: b_frame_strategy only affects the first pass\n");
 569         avctx->b_frame_strategy = 0;
 570     }
 571
 572     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 573     if (i > 1) {
 574         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 575         avctx->time_base.den /= i;
 576         avctx->time_base.num /= i;
 577         //return -1;
 578     }
 579
 580     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG || s->codec_id==CODEC_ID_AMV) {
 581         // (a + x * 3 / 8) / x
 582         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 583         s->inter_quant_bias = 0;
 584     } else {
 585         s->intra_quant_bias = 0;
 586         // (a - x / 4) / x
 587         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 588     }
 589
 590     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
 591         s->intra_quant_bias = avctx->intra_quant_bias;
 592     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
 593         s->inter_quant_bias = avctx->inter_quant_bias;
 594
 595     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 596
 597     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
 598                                   &chroma_v_shift);
 599
 600     if (avctx->codec_id == CODEC_ID_MPEG4 &&
 601         s->avctx->time_base.den > (1 << 16) - 1) {
 602         av_log(avctx, AV_LOG_ERROR,
 603                "timebase %d/%d not supported by MPEG 4 standard, "
 604                "the maximum admitted value for the timebase denominator "
 605                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
 606                (1 << 16) - 1);
 607         return -1;
 608     }
 609     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 610
 611     switch (avctx->codec->id) {
 612     case CODEC_ID_MPEG1VIDEO:
 613         s->out_format = FMT_MPEG1;
 614         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 615         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 616         break;
 617     case CODEC_ID_MPEG2VIDEO:
 618         s->out_format = FMT_MPEG1;
 619         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
 620         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 621         s->rtp_mode   = 1;
 622         break;
 623     case CODEC_ID_LJPEG:
 624     case CODEC_ID_MJPEG:
 625     case CODEC_ID_AMV:
 626         s->out_format = FMT_MJPEG;
 627         s->intra_only = 1; /* force intra only for jpeg */
 628         if (avctx->codec->id == CODEC_ID_LJPEG && avctx->pix_fmt   == PIX_FMT_BGRA) {
 629             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
 630             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
 631             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
 632         } else {
 633             s->mjpeg_vsample[0] = 2;
 634             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
 635             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
 636             s->mjpeg_hsample[0] = 2;
 637             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
 638             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
 639         }
 640         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
 641             ff_mjpeg_encode_init(s) < 0)
 642             return -1;
 643         avctx->delay = 0;
 644         s->low_delay = 1;
 645         break;
 646     case CODEC_ID_H261:
 647         if (!CONFIG_H261_ENCODER)
 648             return -1;
 649         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
 650             av_log(avctx, AV_LOG_ERROR,
 651                    "The specified picture size of %dx%d is not valid for the "
 652                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
 653                     s->width, s->height);
 654             return -1;
 655         }
 656         s->out_format = FMT_H261;
 657         avctx->delay  = 0;
 658         s->low_delay  = 1;
 659         break;
 660     case CODEC_ID_H263:
 661         if (!CONFIG_H263_ENCODER)
 662             return -1;
 663         if (ff_match_2uint16(h263_format, FF_ARRAY_ELEMS(h263_format),
 664                              s->width, s->height) == 8) {
 665             av_log(avctx, AV_LOG_ERROR,
 666                    "The specified picture size of %dx%d is not valid for "
 667                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 668                    "352x288, 704x576, and 1408x1152. "
 669                    "Try H.263+.\n", s->width, s->height);
 670             return -1;
 671         }
 672         s->out_format = FMT_H263;
 673         avctx->delay  = 0;
 674         s->low_delay  = 1;
 675         break;
 676     case CODEC_ID_H263P:
 677         s->out_format = FMT_H263;
 678         s->h263_plus  = 1;
 679         /* Fx */
 680         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
 681         s->modified_quant  = s->h263_aic;
 682         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 683         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 684
 685         /* /Fx */
 686         /* These are just to be sure */
 687         avctx->delay = 0;
 688         s->low_delay = 1;
 689         break;
 690     case CODEC_ID_FLV1:
 691         s->out_format      = FMT_H263;
 692         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 693         s->unrestricted_mv = 1;
 694         s->rtp_mode  = 0; /* don't allow GOB */
 695         avctx->delay = 0;
 696         s->low_delay = 1;
 697         break;
 698     case CODEC_ID_RV10:
 699         s->out_format = FMT_H263;
 700         avctx->delay  = 0;
 701         s->low_delay  = 1;
 702         break;
 703     case CODEC_ID_RV20:
 704         s->out_format      = FMT_H263;
 705         avctx->delay       = 0;
 706         s->low_delay       = 1;
 707         s->modified_quant  = 1;
 708         s->h263_aic        = 1;
 709         s->h263_plus       = 1;
 710         s->loop_filter     = 1;
 711         s->unrestricted_mv = 0;
 712         break;
 713     case CODEC_ID_MPEG4:
 714         s->out_format      = FMT_H263;
 715         s->h263_pred       = 1;
 716         s->unrestricted_mv = 1;
 717         s->low_delay       = s->max_b_frames ? 0 : 1;
 718         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 719         break;
 720     case CODEC_ID_MSMPEG4V2:
 721         s->out_format      = FMT_H263;
 722         s->h263_pred       = 1;
 723         s->unrestricted_mv = 1;
 724         s->msmpeg4_version = 2;
 725         avctx->delay       = 0;
 726         s->low_delay       = 1;
 727         break;
 728     case CODEC_ID_MSMPEG4V3:
 729         s->out_format        = FMT_H263;
 730         s->h263_pred         = 1;
 731         s->unrestricted_mv   = 1;
 732         s->msmpeg4_version   = 3;
 733         s->flipflop_rounding = 1;
 734         avctx->delay         = 0;
 735         s->low_delay         = 1;
 736         break;
 737     case CODEC_ID_WMV1:
 738         s->out_format        = FMT_H263;
 739         s->h263_pred         = 1;
 740         s->unrestricted_mv   = 1;
 741         s->msmpeg4_version   = 4;
 742         s->flipflop_rounding = 1;
 743         avctx->delay         = 0;
 744         s->low_delay         = 1;
 745         break;
 746     case CODEC_ID_WMV2:
 747         s->out_format        = FMT_H263;
 748         s->h263_pred         = 1;
 749         s->unrestricted_mv   = 1;
 750         s->msmpeg4_version   = 5;
 751         s->flipflop_rounding = 1;
 752         avctx->delay         = 0;
 753         s->low_delay         = 1;
 754         break;
 755     default:
 756         return -1;
 757     }
 758
 759     avctx->has_b_frames = !s->low_delay;
 760
 761     s->encoding = 1;
 762
 763     s->progressive_frame    =
 764     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
 765                                                 CODEC_FLAG_INTERLACED_ME) ||
 766                                 s->alternate_scan);
 767
 768     /* init */
 769     if (MPV_common_init(s) < 0)
 770         return -1;
 771
 772     if (!s->dct_quantize)
 773         s->dct_quantize = dct_quantize_c;
 774     if (!s->denoise_dct)
 775         s->denoise_dct  = denoise_dct_c;
 776     s->fast_dct_quantize = s->dct_quantize;
 777     if (avctx->trellis)
 778         s->dct_quantize  = dct_quantize_trellis_c;
 779
 780     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 781         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 782
 783     s->quant_precision = 5;
 784
 785     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
 786     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
 787
 788     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
 789         ff_h261_encode_init(s);
 790     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
 791         h263_encode_init(s);
 792     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
 793         ff_msmpeg4_encode_init(s);
 794     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
 795         && s->out_format == FMT_MPEG1)
 796         ff_mpeg1_encode_init(s);
 797
 798     /* init q matrix */
 799     for (i = 0; i < 64; i++) {
 800         int j = s->dsp.idct_permutation[i];
 801         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
 802             s->mpeg_quant) {
 803             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 804             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 805         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 806             s->intra_matrix[j] =
 807             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 808         } else {
 809             /* mpeg1/2 */
 810             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 811             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 812         }
 813         if (s->avctx->intra_matrix)
 814             s->intra_matrix[j] = s->avctx->intra_matrix[i];
 815         if (s->avctx->inter_matrix)
 816             s->inter_matrix[j] = s->avctx->inter_matrix[i];
 817     }
 818
 819     /* precompute matrix */
 820     /* for mjpeg, we do include qscale in the matrix */
 821     if (s->out_format != FMT_MJPEG) {
 822         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
 823                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
 824                           31, 1);
 825         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
 826                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
 827                           31, 0);
 828     }
 829
 830     if (ff_rate_control_init(s) < 0)
 831         return -1;
 832
 833     return 0;
 834 }
 835
 836 av_cold int MPV_encode_end(AVCodecContext *avctx)
 837 {
 838     MpegEncContext *s = avctx->priv_data;
 839
 840     ff_rate_control_uninit(s);
 841
 842     MPV_common_end(s);
 843     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
 844         s->out_format == FMT_MJPEG)
 845         ff_mjpeg_encode_close(s);
 846
 847     av_freep(&avctx->extradata);
 848
 849     return 0;
 850 }
 851
 852 static int get_sae(uint8_t *src, int ref, int stride)
 853 {
 854     int x,y;
 855     int acc = 0;
 856
 857     for (y = 0; y < 16; y++) {
 858         for (x = 0; x < 16; x++) {
 859             acc += FFABS(src[x + y * stride] - ref);
 860         }
 861     }
 862
 863     return acc;
 864 }
 865
 866 static int get_intra_count(MpegEncContext *s, uint8_t *src,
 867                            uint8_t *ref, int stride)
 868 {
 869     int x, y, w, h;
 870     int acc = 0;
 871
 872     w = s->width  & ~15;
 873     h = s->height & ~15;
 874
 875     for (y = 0; y < h; y += 16) {
 876         for (x = 0; x < w; x += 16) {
 877             int offset = x + y * stride;
 878             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
 879                                      16);
 880             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
 881             int sae  = get_sae(src + offset, mean, stride);
 882
 883             acc += sae + 500 < sad;
 884         }
 885     }
 886     return acc;
 887 }
 888
 889
 890 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
 891 {
 892     AVFrame *pic = NULL;
 893     int64_t pts;
 894     int i;
 895     const int encoding_delay = s->max_b_frames;
 896     int direct = 1;
 897
 898     if (pic_arg) {
 899         pts = pic_arg->pts;
 900         pic_arg->display_picture_number = s->input_picture_number++;
 901
 902         if (pts != AV_NOPTS_VALUE) {
 903             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 904                 int64_t time = pts;
 905                 int64_t last = s->user_specified_pts;
 906
 907                 if (time <= last) {
 908                     av_log(s->avctx, AV_LOG_ERROR,
 909                            "Error, Invalid timestamp=%"PRId64", "
 910                            "last=%"PRId64"\n", pts, s->user_specified_pts);
 911                     return -1;
 912                 }
 913             }
 914             s->user_specified_pts = pts;
 915         } else {
 916             if (s->user_specified_pts != AV_NOPTS_VALUE) {
 917                 s->user_specified_pts =
 918                 pts = s->user_specified_pts + 1;
 919                 av_log(s->avctx, AV_LOG_INFO,
 920                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
 921                        pts);
 922             } else {
 923                 pts = pic_arg->display_picture_number;
 924             }
 925         }
 926     }
 927
 928   if (pic_arg) {
 929     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
 930         direct = 0;
 931     if (pic_arg->linesize[0] != s->linesize)
 932         direct = 0;
 933     if (pic_arg->linesize[1] != s->uvlinesize)
 934         direct = 0;
 935     if (pic_arg->linesize[2] != s->uvlinesize)
 936         direct = 0;
 937
 938     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
 939     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
 940
 941     if (direct) {
 942         i = ff_find_unused_picture(s, 1);
 943         if (i < 0)
 944             return i;
 945
 946         pic = (AVFrame *) &s->picture[i];
 947         pic->reference = 3;
 948
 949         for (i = 0; i < 4; i++) {
 950             pic->data[i]     = pic_arg->data[i];
 951             pic->linesize[i] = pic_arg->linesize[i];
 952         }
 953         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
 954             return -1;
 955         }
 956     } else {
 957         i = ff_find_unused_picture(s, 0);
 958         if (i < 0)
 959             return i;
 960
 961         pic = (AVFrame *) &s->picture[i];
 962         pic->reference = 3;
 963
 964         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
 965             return -1;
 966         }
 967
 968         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
 969             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
 970             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
 971             // empty
 972         } else {
 973             int h_chroma_shift, v_chroma_shift;
 974             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
 975                                           &v_chroma_shift);
 976
 977             for (i = 0; i < 3; i++) {
 978                 int src_stride = pic_arg->linesize[i];
 979                 int dst_stride = i ? s->uvlinesize : s->linesize;
 980                 int h_shift = i ? h_chroma_shift : 0;
 981                 int v_shift = i ? v_chroma_shift : 0;
 982                 int w = s->width  >> h_shift;
 983                 int h = s->height >> v_shift;
 984                 uint8_t *src = pic_arg->data[i];
 985                 uint8_t *dst = pic->data[i];
 986
 987                 if(s->codec_id == CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
 988                     h= ((s->height+15)/16*16)>>v_shift;
 989                 }
 990
 991                 if (!s->avctx->rc_buffer_size)
 992                     dst += INPLACE_OFFSET;
 993
 994                 if (src_stride == dst_stride)
 995                     memcpy(dst, src, src_stride * h);
 996                 else {
 997                     while (h--) {
 998                         memcpy(dst, src, w);
 999                         dst += dst_stride;
1000                         src += src_stride;
1001                     }
1002                 }
1003             }
1004         }
1005     }
1006     copy_picture_attributes(s, pic, pic_arg);
1007     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1008   }
1009
1010     /* shift buffer entries */
1011     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1012         s->input_picture[i - 1] = s->input_picture[i];
1013
1014     s->input_picture[encoding_delay] = (Picture*) pic;
1015
1016     return 0;
1017 }
1018
1019 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1020 {
1021     int x, y, plane;
1022     int score = 0;
1023     int64_t score64 = 0;
1024
1025     for (plane = 0; plane < 3; plane++) {
1026         const int stride = p->f.linesize[plane];
1027         const int bw = plane ? 1 : 2;
1028         for (y = 0; y < s->mb_height * bw; y++) {
1029             for (x = 0; x < s->mb_width * bw; x++) {
1030                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1031                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1032                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1033                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1034
1035                 switch (s->avctx->frame_skip_exp) {
1036                 case 0: score    =  FFMAX(score, v);          break;
1037                 case 1: score   += FFABS(v);                  break;
1038                 case 2: score   += v * v;                     break;
1039                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1040                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1041                 }
1042             }
1043         }
1044     }
1045
1046     if (score)
1047         score64 = score;
1048
1049     if (score64 < s->avctx->frame_skip_threshold)
1050         return 1;
1051     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1052         return 1;
1053     return 0;
1054 }
1055
1056 static int estimate_best_b_count(MpegEncContext *s)
1057 {
1058     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1059     AVCodecContext *c = avcodec_alloc_context3(NULL);
1060     AVFrame input[FF_MAX_B_FRAMES + 2];
1061     const int scale = s->avctx->brd_scale;
1062     int i, j, out_size, p_lambda, b_lambda, lambda2;
1063     int outbuf_size  = s->width * s->height; // FIXME
1064     uint8_t *outbuf  = av_malloc(outbuf_size);
1065     int64_t best_rd  = INT64_MAX;
1066     int best_b_count = -1;
1067
1068     assert(scale >= 0 && scale <= 3);
1069
1070     //emms_c();
1071     //s->next_picture_ptr->quality;
1072     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1073     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1074     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1075     if (!b_lambda) // FIXME we should do this somewhere else
1076         b_lambda = p_lambda;
1077     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1078                FF_LAMBDA_SHIFT;
1079
1080     c->width        = s->width  >> scale;
1081     c->height       = s->height >> scale;
1082     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1083                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1084     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1085     c->mb_decision  = s->avctx->mb_decision;
1086     c->me_cmp       = s->avctx->me_cmp;
1087     c->mb_cmp       = s->avctx->mb_cmp;
1088     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1089     c->pix_fmt      = PIX_FMT_YUV420P;
1090     c->time_base    = s->avctx->time_base;
1091     c->max_b_frames = s->max_b_frames;
1092
1093     if (avcodec_open2(c, codec, NULL) < 0)
1094         return -1;
1095
1096     for (i = 0; i < s->max_b_frames + 2; i++) {
1097         int ysize = c->width * c->height;
1098         int csize = (c->width / 2) * (c->height / 2);
1099         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1100                                                 s->next_picture_ptr;
1101
1102         avcodec_get_frame_defaults(&input[i]);
1103         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1104         input[i].data[1]     = input[i].data[0] + ysize;
1105         input[i].data[2]     = input[i].data[1] + csize;
1106         input[i].linesize[0] = c->width;
1107         input[i].linesize[1] =
1108         input[i].linesize[2] = c->width / 2;
1109
1110         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1111             pre_input = *pre_input_ptr;
1112
1113             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1114                 pre_input.f.data[0] += INPLACE_OFFSET;
1115                 pre_input.f.data[1] += INPLACE_OFFSET;
1116                 pre_input.f.data[2] += INPLACE_OFFSET;
1117             }
1118
1119             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1120                                  pre_input.f.data[0], pre_input.f.linesize[0],
1121                                  c->width,      c->height);
1122             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1123                                  pre_input.f.data[1], pre_input.f.linesize[1],
1124                                  c->width >> 1, c->height >> 1);
1125             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1126                                  pre_input.f.data[2], pre_input.f.linesize[2],
1127                                  c->width >> 1, c->height >> 1);
1128         }
1129     }
1130
1131     for (j = 0; j < s->max_b_frames + 1; j++) {
1132         int64_t rd = 0;
1133
1134         if (!s->input_picture[j])
1135             break;
1136
1137         c->error[0] = c->error[1] = c->error[2] = 0;
1138
1139         input[0].pict_type = AV_PICTURE_TYPE_I;
1140         input[0].quality   = 1 * FF_QP2LAMBDA;
1141         out_size           = avcodec_encode_video(c, outbuf,
1142                                                   outbuf_size, &input[0]);
1143         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1144
1145         for (i = 0; i < s->max_b_frames + 1; i++) {
1146             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1147
1148             input[i + 1].pict_type = is_p ?
1149                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1150             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1151             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1152                                             &input[i + 1]);
1153             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1154         }
1155
1156         /* get the delayed frames */
1157         while (out_size) {
1158             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1159             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1160         }
1161
1162         rd += c->error[0] + c->error[1] + c->error[2];
1163
1164         if (rd < best_rd) {
1165             best_rd = rd;
1166             best_b_count = j;
1167         }
1168     }
1169
1170     av_freep(&outbuf);
1171     avcodec_close(c);
1172     av_freep(&c);
1173
1174     for (i = 0; i < s->max_b_frames + 2; i++) {
1175         av_freep(&input[i].data[0]);
1176     }
1177
1178     return best_b_count;
1179 }
1180
1181 static int select_input_picture(MpegEncContext *s)
1182 {
1183     int i;
1184
1185     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1186         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1187     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1188
1189     /* set next picture type & ordering */
1190     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1191         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1192             s->next_picture_ptr == NULL || s->intra_only) {
1193             s->reordered_input_picture[0] = s->input_picture[0];
1194             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1195             s->reordered_input_picture[0]->f.coded_picture_number =
1196                 s->coded_picture_number++;
1197         } else {
1198             int b_frames;
1199
1200             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1201                 if (s->picture_in_gop_number < s->gop_size &&
1202                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1203                     // FIXME check that te gop check above is +-1 correct
1204                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1205                     //       s->input_picture[0]->f.data[0],
1206                     //       s->input_picture[0]->pts);
1207
1208                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1209                         for (i = 0; i < 4; i++)
1210                             s->input_picture[0]->f.data[i] = NULL;
1211                         s->input_picture[0]->f.type = 0;
1212                     } else {
1213                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1214                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1215
1216                         s->avctx->release_buffer(s->avctx,
1217                                                  (AVFrame *) s->input_picture[0]);
1218                     }
1219
1220                     emms_c();
1221                     ff_vbv_update(s, 0);
1222
1223                     goto no_output_pic;
1224                 }
1225             }
1226
1227             if (s->flags & CODEC_FLAG_PASS2) {
1228                 for (i = 0; i < s->max_b_frames + 1; i++) {
1229                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1230
1231                     if (pict_num >= s->rc_context.num_entries)
1232                         break;
1233                     if (!s->input_picture[i]) {
1234                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1235                         break;
1236                     }
1237
1238                     s->input_picture[i]->f.pict_type =
1239                         s->rc_context.entry[pict_num].new_pict_type;
1240                 }
1241             }
1242
1243             if (s->avctx->b_frame_strategy == 0) {
1244                 b_frames = s->max_b_frames;
1245                 while (b_frames && !s->input_picture[b_frames])
1246                     b_frames--;
1247             } else if (s->avctx->b_frame_strategy == 1) {
1248                 for (i = 1; i < s->max_b_frames + 1; i++) {
1249                     if (s->input_picture[i] &&
1250                         s->input_picture[i]->b_frame_score == 0) {
1251                         s->input_picture[i]->b_frame_score =
1252                             get_intra_count(s,
1253                                             s->input_picture[i    ]->f.data[0],
1254                                             s->input_picture[i - 1]->f.data[0],
1255                                             s->linesize) + 1;
1256                     }
1257                 }
1258                 for (i = 0; i < s->max_b_frames + 1; i++) {
1259                     if (s->input_picture[i] == NULL ||
1260                         s->input_picture[i]->b_frame_score - 1 >
1261                             s->mb_num / s->avctx->b_sensitivity)
1262                         break;
1263                 }
1264
1265                 b_frames = FFMAX(0, i - 1);
1266
1267                 /* reset scores */
1268                 for (i = 0; i < b_frames + 1; i++) {
1269                     s->input_picture[i]->b_frame_score = 0;
1270                 }
1271             } else if (s->avctx->b_frame_strategy == 2) {
1272                 b_frames = estimate_best_b_count(s);
1273             } else {
1274                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1275                 b_frames = 0;
1276             }
1277
1278             emms_c();
1279             //static int b_count = 0;
1280             //b_count += b_frames;
1281             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1282
1283             for (i = b_frames - 1; i >= 0; i--) {
1284                 int type = s->input_picture[i]->f.pict_type;
1285                 if (type && type != AV_PICTURE_TYPE_B)
1286                     b_frames = i;
1287             }
1288             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1289                 b_frames == s->max_b_frames) {
1290                 av_log(s->avctx, AV_LOG_ERROR,
1291                        "warning, too many b frames in a row\n");
1292             }
1293
1294             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1295                 if ((s->flags2 & CODEC_FLAG2_STRICT_GOP) &&
1296                     s->gop_size > s->picture_in_gop_number) {
1297                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1298                 } else {
1299                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1300                         b_frames = 0;
1301                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1302                 }
1303             }
1304
1305             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1306                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1307                 b_frames--;
1308
1309             s->reordered_input_picture[0] = s->input_picture[b_frames];
1310             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1311                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1312             s->reordered_input_picture[0]->f.coded_picture_number =
1313                 s->coded_picture_number++;
1314             for (i = 0; i < b_frames; i++) {
1315                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1316                 s->reordered_input_picture[i + 1]->f.pict_type =
1317                     AV_PICTURE_TYPE_B;
1318                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1319                     s->coded_picture_number++;
1320             }
1321         }
1322     }
1323 no_output_pic:
1324     if (s->reordered_input_picture[0]) {
1325         s->reordered_input_picture[0]->f.reference =
1326            s->reordered_input_picture[0]->f.pict_type !=
1327                AV_PICTURE_TYPE_B ? 3 : 0;
1328
1329         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1330
1331         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1332             s->avctx->rc_buffer_size) {
1333             // input is a shared pix, so we can't modifiy it -> alloc a new
1334             // one & ensure that the shared one is reuseable
1335
1336             Picture *pic;
1337             int i = ff_find_unused_picture(s, 0);
1338             if (i < 0)
1339                 return i;
1340             pic = &s->picture[i];
1341
1342             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1343             if (ff_alloc_picture(s, pic, 0) < 0) {
1344                 return -1;
1345             }
1346
1347             /* mark us unused / free shared pic */
1348             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1349                 s->avctx->release_buffer(s->avctx,
1350                                          (AVFrame *) s->reordered_input_picture[0]);
1351             for (i = 0; i < 4; i++)
1352                 s->reordered_input_picture[0]->f.data[i] = NULL;
1353             s->reordered_input_picture[0]->f.type = 0;
1354
1355             copy_picture_attributes(s, (AVFrame *) pic,
1356                                     (AVFrame *) s->reordered_input_picture[0]);
1357
1358             s->current_picture_ptr = pic;
1359         } else {
1360             // input is not a shared pix -> reuse buffer for current_pix
1361
1362             assert(s->reordered_input_picture[0]->f.type ==
1363                        FF_BUFFER_TYPE_USER ||
1364                    s->reordered_input_picture[0]->f.type ==
1365                        FF_BUFFER_TYPE_INTERNAL);
1366
1367             s->current_picture_ptr = s->reordered_input_picture[0];
1368             for (i = 0; i < 4; i++) {
1369                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1370             }
1371         }
1372         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1373
1374         s->picture_number = s->new_picture.f.display_picture_number;
1375         //printf("dpn:%d\n", s->picture_number);
1376     } else {
1377         memset(&s->new_picture, 0, sizeof(Picture));
1378     }
1379     return 0;
1380 }
1381
1382 int MPV_encode_picture(AVCodecContext *avctx,
1383                        unsigned char *buf, int buf_size, void *data)
1384 {
1385     MpegEncContext *s = avctx->priv_data;
1386     AVFrame *pic_arg  = data;
1387     int i, stuffing_count;
1388     int context_count = s->slice_context_count;
1389
1390     for (i = 0; i < context_count; i++) {
1391         int start_y = s->thread_context[i]->start_mb_y;
1392         int   end_y = s->thread_context[i]->  end_mb_y;
1393         int h       = s->mb_height;
1394         uint8_t *start = buf + (size_t)(((int64_t) buf_size) * start_y / h);
1395         uint8_t *end   = buf + (size_t)(((int64_t) buf_size) *   end_y / h);
1396
1397         init_put_bits(&s->thread_context[i]->pb, start, end - start);
1398     }
1399
1400     s->picture_in_gop_number++;
1401
1402     if (load_input_picture(s, pic_arg) < 0)
1403         return -1;
1404
1405     if (select_input_picture(s) < 0) {
1406         return -1;
1407     }
1408
1409     /* output? */
1410     if (s->new_picture.f.data[0]) {
1411         s->pict_type = s->new_picture.f.pict_type;
1412         //emms_c();
1413         //printf("qs:%f %f %d\n", s->new_picture.quality,
1414         //       s->current_picture.quality, s->qscale);
1415         MPV_frame_start(s, avctx);
1416 vbv_retry:
1417         if (encode_picture(s, s->picture_number) < 0)
1418             return -1;
1419
1420         avctx->header_bits = s->header_bits;
1421         avctx->mv_bits     = s->mv_bits;
1422         avctx->misc_bits   = s->misc_bits;
1423         avctx->i_tex_bits  = s->i_tex_bits;
1424         avctx->p_tex_bits  = s->p_tex_bits;
1425         avctx->i_count     = s->i_count;
1426         // FIXME f/b_count in avctx
1427         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1428         avctx->skip_count  = s->skip_count;
1429
1430         MPV_frame_end(s);
1431
1432         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1433             ff_mjpeg_encode_picture_trailer(s);
1434
1435         if (avctx->rc_buffer_size) {
1436             RateControlContext *rcc = &s->rc_context;
1437             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1438
1439             if (put_bits_count(&s->pb) > max_size &&
1440                 s->lambda < s->avctx->lmax) {
1441                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1442                                        (s->qscale + 1) / s->qscale);
1443                 if (s->adaptive_quant) {
1444                     int i;
1445                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1446                         s->lambda_table[i] =
1447                             FFMAX(s->lambda_table[i] + 1,
1448                                   s->lambda_table[i] * (s->qscale + 1) /
1449                                   s->qscale);
1450                 }
1451                 s->mb_skipped = 0;        // done in MPV_frame_start()
1452                 // done in encode_picture() so we must undo it
1453                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1454                     if (s->flipflop_rounding          ||
1455                         s->codec_id == CODEC_ID_H263P ||
1456                         s->codec_id == CODEC_ID_MPEG4)
1457                         s->no_rounding ^= 1;
1458                 }
1459                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1460                     s->time_base       = s->last_time_base;
1461                     s->last_non_b_time = s->time - s->pp_time;
1462                 }
1463                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1464                 for (i = 0; i < context_count; i++) {
1465                     PutBitContext *pb = &s->thread_context[i]->pb;
1466                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1467                 }
1468                 goto vbv_retry;
1469             }
1470
1471             assert(s->avctx->rc_max_rate);
1472         }
1473
1474         if (s->flags & CODEC_FLAG_PASS1)
1475             ff_write_pass1_stats(s);
1476
1477         for (i = 0; i < 4; i++) {
1478             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1479             avctx->error[i] += s->current_picture_ptr->f.error[i];
1480         }
1481
1482         if (s->flags & CODEC_FLAG_PASS1)
1483             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1484                    avctx->i_tex_bits + avctx->p_tex_bits ==
1485                        put_bits_count(&s->pb));
1486         flush_put_bits(&s->pb);
1487         s->frame_bits  = put_bits_count(&s->pb);
1488
1489         stuffing_count = ff_vbv_update(s, s->frame_bits);
1490         if (stuffing_count) {
1491             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1492                     stuffing_count + 50) {
1493                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1494                 return -1;
1495             }
1496
1497             switch (s->codec_id) {
1498             case CODEC_ID_MPEG1VIDEO:
1499             case CODEC_ID_MPEG2VIDEO:
1500                 while (stuffing_count--) {
1501                     put_bits(&s->pb, 8, 0);
1502                 }
1503             break;
1504             case CODEC_ID_MPEG4:
1505                 put_bits(&s->pb, 16, 0);
1506                 put_bits(&s->pb, 16, 0x1C3);
1507                 stuffing_count -= 4;
1508                 while (stuffing_count--) {
1509                     put_bits(&s->pb, 8, 0xFF);
1510                 }
1511             break;
1512             default:
1513                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1514             }
1515             flush_put_bits(&s->pb);
1516             s->frame_bits  = put_bits_count(&s->pb);
1517         }
1518
1519         /* update mpeg1/2 vbv_delay for CBR */
1520         if (s->avctx->rc_max_rate                          &&
1521             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1522             s->out_format == FMT_MPEG1                     &&
1523             90000LL * (avctx->rc_buffer_size - 1) <=
1524                 s->avctx->rc_max_rate * 0xFFFFLL) {
1525             int vbv_delay, min_delay;
1526             double inbits  = s->avctx->rc_max_rate *
1527                              av_q2d(s->avctx->time_base);
1528             int    minbits = s->frame_bits - 8 *
1529                              (s->vbv_delay_ptr - s->pb.buf - 1);
1530             double bits    = s->rc_context.buffer_index + minbits - inbits;
1531
1532             if (bits < 0)
1533                 av_log(s->avctx, AV_LOG_ERROR,
1534                        "Internal error, negative bits\n");
1535
1536             assert(s->repeat_first_field == 0);
1537
1538             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1539             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1540                         s->avctx->rc_max_rate;
1541
1542             vbv_delay = FFMAX(vbv_delay, min_delay);
1543
1544             assert(vbv_delay < 0xFFFF);
1545
1546             s->vbv_delay_ptr[0] &= 0xF8;
1547             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1548             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1549             s->vbv_delay_ptr[2] &= 0x07;
1550             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1551             avctx->vbv_delay     = vbv_delay * 300;
1552         }
1553         s->total_bits     += s->frame_bits;
1554         avctx->frame_bits  = s->frame_bits;
1555     } else {
1556         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1557         s->frame_bits = 0;
1558     }
1559     assert((s->frame_bits & 7) == 0);
1560
1561     return s->frame_bits / 8;
1562 }
1563
1564 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1565                                                 int n, int threshold)
1566 {
1567     static const char tab[64] = {
1568         3, 2, 2, 1, 1, 1, 1, 1,
1569         1, 1, 1, 1, 1, 1, 1, 1,
1570         1, 1, 1, 1, 1, 1, 1, 1,
1571         0, 0, 0, 0, 0, 0, 0, 0,
1572         0, 0, 0, 0, 0, 0, 0, 0,
1573         0, 0, 0, 0, 0, 0, 0, 0,
1574         0, 0, 0, 0, 0, 0, 0, 0,
1575         0, 0, 0, 0, 0, 0, 0, 0
1576     };
1577     int score = 0;
1578     int run = 0;
1579     int i;
1580     DCTELEM *block = s->block[n];
1581     const int last_index = s->block_last_index[n];
1582     int skip_dc;
1583
1584     if (threshold < 0) {
1585         skip_dc = 0;
1586         threshold = -threshold;
1587     } else
1588         skip_dc = 1;
1589
1590     /* Are all we could set to zero already zero? */
1591     if (last_index <= skip_dc - 1)
1592         return;
1593
1594     for (i = 0; i <= last_index; i++) {
1595         const int j = s->intra_scantable.permutated[i];
1596         const int level = FFABS(block[j]);
1597         if (level == 1) {
1598             if (skip_dc && i == 0)
1599                 continue;
1600             score += tab[run];
1601             run = 0;
1602         } else if (level > 1) {
1603             return;
1604         } else {
1605             run++;
1606         }
1607     }
1608     if (score >= threshold)
1609         return;
1610     for (i = skip_dc; i <= last_index; i++) {
1611         const int j = s->intra_scantable.permutated[i];
1612         block[j] = 0;
1613     }
1614     if (block[0])
1615         s->block_last_index[n] = 0;
1616     else
1617         s->block_last_index[n] = -1;
1618 }
1619
1620 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1621                                int last_index)
1622 {
1623     int i;
1624     const int maxlevel = s->max_qcoeff;
1625     const int minlevel = s->min_qcoeff;
1626     int overflow = 0;
1627
1628     if (s->mb_intra) {
1629         i = 1; // skip clipping of intra dc
1630     } else
1631         i = 0;
1632
1633     for (; i <= last_index; i++) {
1634         const int j = s->intra_scantable.permutated[i];
1635         int level = block[j];
1636
1637         if (level > maxlevel) {
1638             level = maxlevel;
1639             overflow++;
1640         } else if (level < minlevel) {
1641             level = minlevel;
1642             overflow++;
1643         }
1644
1645         block[j] = level;
1646     }
1647
1648     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1649         av_log(s->avctx, AV_LOG_INFO,
1650                "warning, clipping %d dct coefficients to %d..%d\n",
1651                overflow, minlevel, maxlevel);
1652 }
1653
1654 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1655 {
1656     int x, y;
1657     // FIXME optimize
1658     for (y = 0; y < 8; y++) {
1659         for (x = 0; x < 8; x++) {
1660             int x2, y2;
1661             int sum = 0;
1662             int sqr = 0;
1663             int count = 0;
1664
1665             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1666                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1667                     int v = ptr[x2 + y2 * stride];
1668                     sum += v;
1669                     sqr += v * v;
1670                     count++;
1671                 }
1672             }
1673             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1674         }
1675     }
1676 }
1677
1678 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1679                                                 int motion_x, int motion_y,
1680                                                 int mb_block_height,
1681                                                 int mb_block_count)
1682 {
1683     int16_t weight[8][64];
1684     DCTELEM orig[8][64];
1685     const int mb_x = s->mb_x;
1686     const int mb_y = s->mb_y;
1687     int i;
1688     int skip_dct[8];
1689     int dct_offset = s->linesize * 8; // default for progressive frames
1690     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1691     int wrap_y, wrap_c;
1692
1693     for (i = 0; i < mb_block_count; i++)
1694         skip_dct[i] = s->skipdct;
1695
1696     if (s->adaptive_quant) {
1697         const int last_qp = s->qscale;
1698         const int mb_xy = mb_x + mb_y * s->mb_stride;
1699
1700         s->lambda = s->lambda_table[mb_xy];
1701         update_qscale(s);
1702
1703         if (!(s->flags & CODEC_FLAG_QP_RD)) {
1704             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1705             s->dquant = s->qscale - last_qp;
1706
1707             if (s->out_format == FMT_H263) {
1708                 s->dquant = av_clip(s->dquant, -2, 2);
1709
1710                 if (s->codec_id == CODEC_ID_MPEG4) {
1711                     if (!s->mb_intra) {
1712                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1713                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1714                                 s->dquant = 0;
1715                         }
1716                         if (s->mv_type == MV_TYPE_8X8)
1717                             s->dquant = 0;
1718                     }
1719                 }
1720             }
1721         }
1722         ff_set_qscale(s, last_qp + s->dquant);
1723     } else if (s->flags & CODEC_FLAG_QP_RD)
1724         ff_set_qscale(s, s->qscale + s->dquant);
1725
1726     wrap_y = s->linesize;
1727     wrap_c = s->uvlinesize;
1728     ptr_y  = s->new_picture.f.data[0] +
1729              (mb_y * 16 * wrap_y)              + mb_x * 16;
1730     ptr_cb = s->new_picture.f.data[1] +
1731              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1732     ptr_cr = s->new_picture.f.data[2] +
1733              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1734
1735     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != CODEC_ID_AMV){
1736         uint8_t *ebuf = s->edge_emu_buffer + 32;
1737         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1738                                 mb_y * 16, s->width, s->height);
1739         ptr_y = ebuf;
1740         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1741                                 mb_block_height, mb_x * 8, mb_y * 8,
1742                                 s->width >> 1, s->height >> 1);
1743         ptr_cb = ebuf + 18 * wrap_y;
1744         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1745                                 mb_block_height, mb_x * 8, mb_y * 8,
1746                                 s->width >> 1, s->height >> 1);
1747         ptr_cr = ebuf + 18 * wrap_y + 8;
1748     }
1749
1750     if (s->mb_intra) {
1751         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1752             int progressive_score, interlaced_score;
1753
1754             s->interlaced_dct = 0;
1755             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1756                                                     NULL, wrap_y, 8) +
1757                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1758                                                     NULL, wrap_y, 8) - 400;
1759
1760             if (progressive_score > 0) {
1761                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1762                                                        NULL, wrap_y * 2, 8) +
1763                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1764                                                        NULL, wrap_y * 2, 8);
1765                 if (progressive_score > interlaced_score) {
1766                     s->interlaced_dct = 1;
1767
1768                     dct_offset = wrap_y;
1769                     wrap_y <<= 1;
1770                     if (s->chroma_format == CHROMA_422)
1771                         wrap_c <<= 1;
1772                 }
1773             }
1774         }
1775
1776         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1777         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1778         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1779         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1780
1781         if (s->flags & CODEC_FLAG_GRAY) {
1782             skip_dct[4] = 1;
1783             skip_dct[5] = 1;
1784         } else {
1785             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1786             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1787             if (!s->chroma_y_shift) { /* 422 */
1788                 s->dsp.get_pixels(s->block[6],
1789                                   ptr_cb + (dct_offset >> 1), wrap_c);
1790                 s->dsp.get_pixels(s->block[7],
1791                                   ptr_cr + (dct_offset >> 1), wrap_c);
1792             }
1793         }
1794     } else {
1795         op_pixels_func (*op_pix)[4];
1796         qpel_mc_func (*op_qpix)[16];
1797         uint8_t *dest_y, *dest_cb, *dest_cr;
1798
1799         dest_y  = s->dest[0];
1800         dest_cb = s->dest[1];
1801         dest_cr = s->dest[2];
1802
1803         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1804             op_pix  = s->dsp.put_pixels_tab;
1805             op_qpix = s->dsp.put_qpel_pixels_tab;
1806         } else {
1807             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1808             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1809         }
1810
1811         if (s->mv_dir & MV_DIR_FORWARD) {
1812             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1813                        op_pix, op_qpix);
1814             op_pix  = s->dsp.avg_pixels_tab;
1815             op_qpix = s->dsp.avg_qpel_pixels_tab;
1816         }
1817         if (s->mv_dir & MV_DIR_BACKWARD) {
1818             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1819                        op_pix, op_qpix);
1820         }
1821
1822         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1823             int progressive_score, interlaced_score;
1824
1825             s->interlaced_dct = 0;
1826             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1827                                                     ptr_y,              wrap_y,
1828                                                     8) +
1829                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1830                                                     ptr_y + wrap_y * 8, wrap_y,
1831                                                     8) - 400;
1832
1833             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1834                 progressive_score -= 400;
1835
1836             if (progressive_score > 0) {
1837                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1838                                                        ptr_y,
1839                                                        wrap_y * 2, 8) +
1840                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1841                                                        ptr_y + wrap_y,
1842                                                        wrap_y * 2, 8);
1843
1844                 if (progressive_score > interlaced_score) {
1845                     s->interlaced_dct = 1;
1846
1847                     dct_offset = wrap_y;
1848                     wrap_y <<= 1;
1849                     if (s->chroma_format == CHROMA_422)
1850                         wrap_c <<= 1;
1851                 }
1852             }
1853         }
1854
1855         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1856         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1857         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1858                            dest_y + dct_offset, wrap_y);
1859         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1860                            dest_y + dct_offset + 8, wrap_y);
1861
1862         if (s->flags & CODEC_FLAG_GRAY) {
1863             skip_dct[4] = 1;
1864             skip_dct[5] = 1;
1865         } else {
1866             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1867             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1868             if (!s->chroma_y_shift) { /* 422 */
1869                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1870                                    dest_cb + (dct_offset >> 1), wrap_c);
1871                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1872                                    dest_cr + (dct_offset >> 1), wrap_c);
1873             }
1874         }
1875         /* pre quantization */
1876         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1877                 2 * s->qscale * s->qscale) {
1878             // FIXME optimize
1879             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1880                               wrap_y, 8) < 20 * s->qscale)
1881                 skip_dct[0] = 1;
1882             if (s->dsp.sad[1](NULL, ptr_y + 8,
1883                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1884                 skip_dct[1] = 1;
1885             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1886                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1887                 skip_dct[2] = 1;
1888             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1889                               dest_y + dct_offset + 8,
1890                               wrap_y, 8) < 20 * s->qscale)
1891                 skip_dct[3] = 1;
1892             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1893                               wrap_c, 8) < 20 * s->qscale)
1894                 skip_dct[4] = 1;
1895             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1896                               wrap_c, 8) < 20 * s->qscale)
1897                 skip_dct[5] = 1;
1898             if (!s->chroma_y_shift) { /* 422 */
1899                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1900                                   dest_cb + (dct_offset >> 1),
1901                                   wrap_c, 8) < 20 * s->qscale)
1902                     skip_dct[6] = 1;
1903                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1904                                   dest_cr + (dct_offset >> 1),
1905                                   wrap_c, 8) < 20 * s->qscale)
1906                     skip_dct[7] = 1;
1907             }
1908         }
1909     }
1910
1911     if (s->avctx->quantizer_noise_shaping) {
1912         if (!skip_dct[0])
1913             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1914         if (!skip_dct[1])
1915             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1916         if (!skip_dct[2])
1917             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1918         if (!skip_dct[3])
1919             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1920         if (!skip_dct[4])
1921             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1922         if (!skip_dct[5])
1923             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1924         if (!s->chroma_y_shift) { /* 422 */
1925             if (!skip_dct[6])
1926                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1927                                   wrap_c);
1928             if (!skip_dct[7])
1929                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1930                                   wrap_c);
1931         }
1932         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1933     }
1934
1935     /* DCT & quantize */
1936     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1937     {
1938         for (i = 0; i < mb_block_count; i++) {
1939             if (!skip_dct[i]) {
1940                 int overflow;
1941                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1942                 // FIXME we could decide to change to quantizer instead of
1943                 // clipping
1944                 // JS: I don't think that would be a good idea it could lower
1945                 //     quality instead of improve it. Just INTRADC clipping
1946                 //     deserves changes in quantizer
1947                 if (overflow)
1948                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1949             } else
1950                 s->block_last_index[i] = -1;
1951         }
1952         if (s->avctx->quantizer_noise_shaping) {
1953             for (i = 0; i < mb_block_count; i++) {
1954                 if (!skip_dct[i]) {
1955                     s->block_last_index[i] =
1956                         dct_quantize_refine(s, s->block[i], weight[i],
1957                                             orig[i], i, s->qscale);
1958                 }
1959             }
1960         }
1961
1962         if (s->luma_elim_threshold && !s->mb_intra)
1963             for (i = 0; i < 4; i++)
1964                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
1965         if (s->chroma_elim_threshold && !s->mb_intra)
1966             for (i = 4; i < mb_block_count; i++)
1967                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
1968
1969         if (s->flags & CODEC_FLAG_CBP_RD) {
1970             for (i = 0; i < mb_block_count; i++) {
1971                 if (s->block_last_index[i] == -1)
1972                     s->coded_score[i] = INT_MAX / 256;
1973             }
1974         }
1975     }
1976
1977     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
1978         s->block_last_index[4] =
1979         s->block_last_index[5] = 0;
1980         s->block[4][0] =
1981         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
1982     }
1983
1984     // non c quantize code returns incorrect block_last_index FIXME
1985     if (s->alternate_scan && s->dct_quantize != dct_quantize_c) {
1986         for (i = 0; i < mb_block_count; i++) {
1987             int j;
1988             if (s->block_last_index[i] > 0) {
1989                 for (j = 63; j > 0; j--) {
1990                     if (s->block[i][s->intra_scantable.permutated[j]])
1991                         break;
1992                 }
1993                 s->block_last_index[i] = j;
1994             }
1995         }
1996     }
1997
1998     /* huffman encode */
1999     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2000     case CODEC_ID_MPEG1VIDEO:
2001     case CODEC_ID_MPEG2VIDEO:
2002         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2003             mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2004         break;
2005     case CODEC_ID_MPEG4:
2006         if (CONFIG_MPEG4_ENCODER)
2007             mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2008         break;
2009     case CODEC_ID_MSMPEG4V2:
2010     case CODEC_ID_MSMPEG4V3:
2011     case CODEC_ID_WMV1:
2012         if (CONFIG_MSMPEG4_ENCODER)
2013             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2014         break;
2015     case CODEC_ID_WMV2:
2016         if (CONFIG_WMV2_ENCODER)
2017             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2018         break;
2019     case CODEC_ID_H261:
2020         if (CONFIG_H261_ENCODER)
2021             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2022         break;
2023     case CODEC_ID_H263:
2024     case CODEC_ID_H263P:
2025     case CODEC_ID_FLV1:
2026     case CODEC_ID_RV10:
2027     case CODEC_ID_RV20:
2028         if (CONFIG_H263_ENCODER)
2029             h263_encode_mb(s, s->block, motion_x, motion_y);
2030         break;
2031     case CODEC_ID_MJPEG:
2032     case CODEC_ID_AMV:
2033         if (CONFIG_MJPEG_ENCODER)
2034             ff_mjpeg_encode_mb(s, s->block);
2035         break;
2036     default:
2037         assert(0);
2038     }
2039 }
2040
2041 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2042 {
2043     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2044     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2045 }
2046
2047 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2048     int i;
2049
2050     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2051
2052     /* mpeg1 */
2053     d->mb_skip_run= s->mb_skip_run;
2054     for(i=0; i<3; i++)
2055         d->last_dc[i] = s->last_dc[i];
2056
2057     /* statistics */
2058     d->mv_bits= s->mv_bits;
2059     d->i_tex_bits= s->i_tex_bits;
2060     d->p_tex_bits= s->p_tex_bits;
2061     d->i_count= s->i_count;
2062     d->f_count= s->f_count;
2063     d->b_count= s->b_count;
2064     d->skip_count= s->skip_count;
2065     d->misc_bits= s->misc_bits;
2066     d->last_bits= 0;
2067
2068     d->mb_skipped= 0;
2069     d->qscale= s->qscale;
2070     d->dquant= s->dquant;
2071
2072     d->esc3_level_length= s->esc3_level_length;
2073 }
2074
2075 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2076     int i;
2077
2078     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2079     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2080
2081     /* mpeg1 */
2082     d->mb_skip_run= s->mb_skip_run;
2083     for(i=0; i<3; i++)
2084         d->last_dc[i] = s->last_dc[i];
2085
2086     /* statistics */
2087     d->mv_bits= s->mv_bits;
2088     d->i_tex_bits= s->i_tex_bits;
2089     d->p_tex_bits= s->p_tex_bits;
2090     d->i_count= s->i_count;
2091     d->f_count= s->f_count;
2092     d->b_count= s->b_count;
2093     d->skip_count= s->skip_count;
2094     d->misc_bits= s->misc_bits;
2095
2096     d->mb_intra= s->mb_intra;
2097     d->mb_skipped= s->mb_skipped;
2098     d->mv_type= s->mv_type;
2099     d->mv_dir= s->mv_dir;
2100     d->pb= s->pb;
2101     if(s->data_partitioning){
2102         d->pb2= s->pb2;
2103         d->tex_pb= s->tex_pb;
2104     }
2105     d->block= s->block;
2106     for(i=0; i<8; i++)
2107         d->block_last_index[i]= s->block_last_index[i];
2108     d->interlaced_dct= s->interlaced_dct;
2109     d->qscale= s->qscale;
2110
2111     d->esc3_level_length= s->esc3_level_length;
2112 }
2113
2114 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2115                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2116                            int *dmin, int *next_block, int motion_x, int motion_y)
2117 {
2118     int score;
2119     uint8_t *dest_backup[3];
2120
2121     copy_context_before_encode(s, backup, type);
2122
2123     s->block= s->blocks[*next_block];
2124     s->pb= pb[*next_block];
2125     if(s->data_partitioning){
2126         s->pb2   = pb2   [*next_block];
2127         s->tex_pb= tex_pb[*next_block];
2128     }
2129
2130     if(*next_block){
2131         memcpy(dest_backup, s->dest, sizeof(s->dest));
2132         s->dest[0] = s->rd_scratchpad;
2133         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2134         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2135         assert(s->linesize >= 32); //FIXME
2136     }
2137
2138     encode_mb(s, motion_x, motion_y);
2139
2140     score= put_bits_count(&s->pb);
2141     if(s->data_partitioning){
2142         score+= put_bits_count(&s->pb2);
2143         score+= put_bits_count(&s->tex_pb);
2144     }
2145
2146     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2147         MPV_decode_mb(s, s->block);
2148
2149         score *= s->lambda2;
2150         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2151     }
2152
2153     if(*next_block){
2154         memcpy(s->dest, dest_backup, sizeof(s->dest));
2155     }
2156
2157     if(score<*dmin){
2158         *dmin= score;
2159         *next_block^=1;
2160
2161         copy_context_after_encode(best, s, type);
2162     }
2163 }
2164
2165 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2166     uint32_t *sq = ff_squareTbl + 256;
2167     int acc=0;
2168     int x,y;
2169
2170     if(w==16 && h==16)
2171         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2172     else if(w==8 && h==8)
2173         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2174
2175     for(y=0; y<h; y++){
2176         for(x=0; x<w; x++){
2177             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2178         }
2179     }
2180
2181     assert(acc>=0);
2182
2183     return acc;
2184 }
2185
2186 static int sse_mb(MpegEncContext *s){
2187     int w= 16;
2188     int h= 16;
2189
2190     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2191     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2192
2193     if(w==16 && h==16)
2194       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2195         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2196                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2197                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2198       }else{
2199         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2200                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2201                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2202       }
2203     else
2204         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2205                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2206                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2207 }
2208
2209 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2210     MpegEncContext *s= *(void**)arg;
2211
2212
2213     s->me.pre_pass=1;
2214     s->me.dia_size= s->avctx->pre_dia_size;
2215     s->first_slice_line=1;
2216     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2217         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2218             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2219         }
2220         s->first_slice_line=0;
2221     }
2222
2223     s->me.pre_pass=0;
2224
2225     return 0;
2226 }
2227
2228 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2229     MpegEncContext *s= *(void**)arg;
2230
2231     ff_check_alignment();
2232
2233     s->me.dia_size= s->avctx->dia_size;
2234     s->first_slice_line=1;
2235     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2236         s->mb_x=0; //for block init below
2237         ff_init_block_index(s);
2238         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2239             s->block_index[0]+=2;
2240             s->block_index[1]+=2;
2241             s->block_index[2]+=2;
2242             s->block_index[3]+=2;
2243
2244             /* compute motion vector & mb_type and store in context */
2245             if(s->pict_type==AV_PICTURE_TYPE_B)
2246                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2247             else
2248                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2249         }
2250         s->first_slice_line=0;
2251     }
2252     return 0;
2253 }
2254
2255 static int mb_var_thread(AVCodecContext *c, void *arg){
2256     MpegEncContext *s= *(void**)arg;
2257     int mb_x, mb_y;
2258
2259     ff_check_alignment();
2260
2261     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2262         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2263             int xx = mb_x * 16;
2264             int yy = mb_y * 16;
2265             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2266             int varc;
2267             int sum = s->dsp.pix_sum(pix, s->linesize);
2268
2269             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2270
2271             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2272             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2273             s->me.mb_var_sum_temp    += varc;
2274         }
2275     }
2276     return 0;
2277 }
2278
2279 static void write_slice_end(MpegEncContext *s){
2280     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2281         if(s->partitioned_frame){
2282             ff_mpeg4_merge_partitions(s);
2283         }
2284
2285         ff_mpeg4_stuffing(&s->pb);
2286     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2287         ff_mjpeg_encode_stuffing(&s->pb);
2288     }
2289
2290     avpriv_align_put_bits(&s->pb);
2291     flush_put_bits(&s->pb);
2292
2293     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2294         s->misc_bits+= get_bits_diff(s);
2295 }
2296
2297 static int encode_thread(AVCodecContext *c, void *arg){
2298     MpegEncContext *s= *(void**)arg;
2299     int mb_x, mb_y, pdif = 0;
2300     int chr_h= 16>>s->chroma_y_shift;
2301     int i, j;
2302     MpegEncContext best_s, backup_s;
2303     uint8_t bit_buf[2][MAX_MB_BYTES];
2304     uint8_t bit_buf2[2][MAX_MB_BYTES];
2305     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2306     PutBitContext pb[2], pb2[2], tex_pb[2];
2307 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2308
2309     ff_check_alignment();
2310
2311     for(i=0; i<2; i++){
2312         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2313         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2314         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2315     }
2316
2317     s->last_bits= put_bits_count(&s->pb);
2318     s->mv_bits=0;
2319     s->misc_bits=0;
2320     s->i_tex_bits=0;
2321     s->p_tex_bits=0;
2322     s->i_count=0;
2323     s->f_count=0;
2324     s->b_count=0;
2325     s->skip_count=0;
2326
2327     for(i=0; i<3; i++){
2328         /* init last dc values */
2329         /* note: quant matrix value (8) is implied here */
2330         s->last_dc[i] = 128 << s->intra_dc_precision;
2331
2332         s->current_picture.f.error[i] = 0;
2333     }
2334     if(s->codec_id==CODEC_ID_AMV){
2335         s->last_dc[0] = 128*8/13;
2336         s->last_dc[1] = 128*8/14;
2337         s->last_dc[2] = 128*8/14;
2338     }
2339     s->mb_skip_run = 0;
2340     memset(s->last_mv, 0, sizeof(s->last_mv));
2341
2342     s->last_mv_dir = 0;
2343
2344     switch(s->codec_id){
2345     case CODEC_ID_H263:
2346     case CODEC_ID_H263P:
2347     case CODEC_ID_FLV1:
2348         if (CONFIG_H263_ENCODER)
2349             s->gob_index = ff_h263_get_gob_height(s);
2350         break;
2351     case CODEC_ID_MPEG4:
2352         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2353             ff_mpeg4_init_partitions(s);
2354         break;
2355     }
2356
2357     s->resync_mb_x=0;
2358     s->resync_mb_y=0;
2359     s->first_slice_line = 1;
2360     s->ptr_lastgob = s->pb.buf;
2361     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2362 //    printf("row %d at %X\n", s->mb_y, (int)s);
2363         s->mb_x=0;
2364         s->mb_y= mb_y;
2365
2366         ff_set_qscale(s, s->qscale);
2367         ff_init_block_index(s);
2368
2369         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2370             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2371             int mb_type= s->mb_type[xy];
2372 //            int d;
2373             int dmin= INT_MAX;
2374             int dir;
2375
2376             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2377                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2378                 return -1;
2379             }
2380             if(s->data_partitioning){
2381                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2382                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2383                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2384                     return -1;
2385                 }
2386             }
2387
2388             s->mb_x = mb_x;
2389             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2390             ff_update_block_index(s);
2391
2392             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2393                 ff_h261_reorder_mb_index(s);
2394                 xy= s->mb_y*s->mb_stride + s->mb_x;
2395                 mb_type= s->mb_type[xy];
2396             }
2397
2398             /* write gob / video packet header  */
2399             if(s->rtp_mode){
2400                 int current_packet_size, is_gob_start;
2401
2402                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2403
2404                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2405
2406                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2407
2408                 switch(s->codec_id){
2409                 case CODEC_ID_H263:
2410                 case CODEC_ID_H263P:
2411                     if(!s->h263_slice_structured)
2412                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2413                     break;
2414                 case CODEC_ID_MPEG2VIDEO:
2415                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2416                 case CODEC_ID_MPEG1VIDEO:
2417                     if(s->mb_skip_run) is_gob_start=0;
2418                     break;
2419                 }
2420
2421                 if(is_gob_start){
2422                     if(s->start_mb_y != mb_y || mb_x!=0){
2423                         write_slice_end(s);
2424
2425                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2426                             ff_mpeg4_init_partitions(s);
2427                         }
2428                     }
2429
2430                     assert((put_bits_count(&s->pb)&7) == 0);
2431                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2432
2433                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2434                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2435                         int d= 100 / s->avctx->error_rate;
2436                         if(r % d == 0){
2437                             current_packet_size=0;
2438                             s->pb.buf_ptr= s->ptr_lastgob;
2439                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2440                         }
2441                     }
2442
2443                     if (s->avctx->rtp_callback){
2444                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2445                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2446                     }
2447
2448                     switch(s->codec_id){
2449                     case CODEC_ID_MPEG4:
2450                         if (CONFIG_MPEG4_ENCODER) {
2451                             ff_mpeg4_encode_video_packet_header(s);
2452                             ff_mpeg4_clean_buffers(s);
2453                         }
2454                     break;
2455                     case CODEC_ID_MPEG1VIDEO:
2456                     case CODEC_ID_MPEG2VIDEO:
2457                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2458                             ff_mpeg1_encode_slice_header(s);
2459                             ff_mpeg1_clean_buffers(s);
2460                         }
2461                     break;
2462                     case CODEC_ID_H263:
2463                     case CODEC_ID_H263P:
2464                         if (CONFIG_H263_ENCODER)
2465                             h263_encode_gob_header(s, mb_y);
2466                     break;
2467                     }
2468
2469                     if(s->flags&CODEC_FLAG_PASS1){
2470                         int bits= put_bits_count(&s->pb);
2471                         s->misc_bits+= bits - s->last_bits;
2472                         s->last_bits= bits;
2473                     }
2474
2475                     s->ptr_lastgob += current_packet_size;
2476                     s->first_slice_line=1;
2477                     s->resync_mb_x=mb_x;
2478                     s->resync_mb_y=mb_y;
2479                 }
2480             }
2481
2482             if(  (s->resync_mb_x   == s->mb_x)
2483                && s->resync_mb_y+1 == s->mb_y){
2484                 s->first_slice_line=0;
2485             }
2486
2487             s->mb_skipped=0;
2488             s->dquant=0; //only for QP_RD
2489
2490             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
2491                 int next_block=0;
2492                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2493
2494                 copy_context_before_encode(&backup_s, s, -1);
2495                 backup_s.pb= s->pb;
2496                 best_s.data_partitioning= s->data_partitioning;
2497                 best_s.partitioned_frame= s->partitioned_frame;
2498                 if(s->data_partitioning){
2499                     backup_s.pb2= s->pb2;
2500                     backup_s.tex_pb= s->tex_pb;
2501                 }
2502
2503                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2504                     s->mv_dir = MV_DIR_FORWARD;
2505                     s->mv_type = MV_TYPE_16X16;
2506                     s->mb_intra= 0;
2507                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2508                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2509                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2510                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2511                 }
2512                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2513                     s->mv_dir = MV_DIR_FORWARD;
2514                     s->mv_type = MV_TYPE_FIELD;
2515                     s->mb_intra= 0;
2516                     for(i=0; i<2; i++){
2517                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2518                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2519                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2520                     }
2521                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2522                                  &dmin, &next_block, 0, 0);
2523                 }
2524                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2525                     s->mv_dir = MV_DIR_FORWARD;
2526                     s->mv_type = MV_TYPE_16X16;
2527                     s->mb_intra= 0;
2528                     s->mv[0][0][0] = 0;
2529                     s->mv[0][0][1] = 0;
2530                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2531                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2532                 }
2533                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2534                     s->mv_dir = MV_DIR_FORWARD;
2535                     s->mv_type = MV_TYPE_8X8;
2536                     s->mb_intra= 0;
2537                     for(i=0; i<4; i++){
2538                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2539                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2540                     }
2541                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2542                                  &dmin, &next_block, 0, 0);
2543                 }
2544                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2545                     s->mv_dir = MV_DIR_FORWARD;
2546                     s->mv_type = MV_TYPE_16X16;
2547                     s->mb_intra= 0;
2548                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2549                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2550                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2551                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2552                 }
2553                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2554                     s->mv_dir = MV_DIR_BACKWARD;
2555                     s->mv_type = MV_TYPE_16X16;
2556                     s->mb_intra= 0;
2557                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2558                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2559                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2560                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2561                 }
2562                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2563                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2564                     s->mv_type = MV_TYPE_16X16;
2565                     s->mb_intra= 0;
2566                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2567                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2568                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2569                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2570                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2571                                  &dmin, &next_block, 0, 0);
2572                 }
2573                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2574                     s->mv_dir = MV_DIR_FORWARD;
2575                     s->mv_type = MV_TYPE_FIELD;
2576                     s->mb_intra= 0;
2577                     for(i=0; i<2; i++){
2578                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2579                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2580                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2581                     }
2582                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2583                                  &dmin, &next_block, 0, 0);
2584                 }
2585                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2586                     s->mv_dir = MV_DIR_BACKWARD;
2587                     s->mv_type = MV_TYPE_FIELD;
2588                     s->mb_intra= 0;
2589                     for(i=0; i<2; i++){
2590                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2591                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2592                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2593                     }
2594                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2595                                  &dmin, &next_block, 0, 0);
2596                 }
2597                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2598                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2599                     s->mv_type = MV_TYPE_FIELD;
2600                     s->mb_intra= 0;
2601                     for(dir=0; dir<2; dir++){
2602                         for(i=0; i<2; i++){
2603                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2604                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2605                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2606                         }
2607                     }
2608                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2609                                  &dmin, &next_block, 0, 0);
2610                 }
2611                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2612                     s->mv_dir = 0;
2613                     s->mv_type = MV_TYPE_16X16;
2614                     s->mb_intra= 1;
2615                     s->mv[0][0][0] = 0;
2616                     s->mv[0][0][1] = 0;
2617                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2618                                  &dmin, &next_block, 0, 0);
2619                     if(s->h263_pred || s->h263_aic){
2620                         if(best_s.mb_intra)
2621                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2622                         else
2623                             ff_clean_intra_table_entries(s); //old mode?
2624                     }
2625                 }
2626
2627                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
2628                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2629                         const int last_qp= backup_s.qscale;
2630                         int qpi, qp, dc[6];
2631                         DCTELEM ac[6][16];
2632                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2633                         static const int dquant_tab[4]={-1,1,-2,2};
2634
2635                         assert(backup_s.dquant == 0);
2636
2637                         //FIXME intra
2638                         s->mv_dir= best_s.mv_dir;
2639                         s->mv_type = MV_TYPE_16X16;
2640                         s->mb_intra= best_s.mb_intra;
2641                         s->mv[0][0][0] = best_s.mv[0][0][0];
2642                         s->mv[0][0][1] = best_s.mv[0][0][1];
2643                         s->mv[1][0][0] = best_s.mv[1][0][0];
2644                         s->mv[1][0][1] = best_s.mv[1][0][1];
2645
2646                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2647                         for(; qpi<4; qpi++){
2648                             int dquant= dquant_tab[qpi];
2649                             qp= last_qp + dquant;
2650                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2651                                 continue;
2652                             backup_s.dquant= dquant;
2653                             if(s->mb_intra && s->dc_val[0]){
2654                                 for(i=0; i<6; i++){
2655                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2656                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2657                                 }
2658                             }
2659
2660                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2661                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2662                             if(best_s.qscale != qp){
2663                                 if(s->mb_intra && s->dc_val[0]){
2664                                     for(i=0; i<6; i++){
2665                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2666                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2667                                     }
2668                                 }
2669                             }
2670                         }
2671                     }
2672                 }
2673                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2674                     int mx= s->b_direct_mv_table[xy][0];
2675                     int my= s->b_direct_mv_table[xy][1];
2676
2677                     backup_s.dquant = 0;
2678                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2679                     s->mb_intra= 0;
2680                     ff_mpeg4_set_direct_mv(s, mx, my);
2681                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2682                                  &dmin, &next_block, mx, my);
2683                 }
2684                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2685                     backup_s.dquant = 0;
2686                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2687                     s->mb_intra= 0;
2688                     ff_mpeg4_set_direct_mv(s, 0, 0);
2689                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2690                                  &dmin, &next_block, 0, 0);
2691                 }
2692                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
2693                     int coded=0;
2694                     for(i=0; i<6; i++)
2695                         coded |= s->block_last_index[i];
2696                     if(coded){
2697                         int mx,my;
2698                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2699                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2700                             mx=my=0; //FIXME find the one we actually used
2701                             ff_mpeg4_set_direct_mv(s, mx, my);
2702                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2703                             mx= s->mv[1][0][0];
2704                             my= s->mv[1][0][1];
2705                         }else{
2706                             mx= s->mv[0][0][0];
2707                             my= s->mv[0][0][1];
2708                         }
2709
2710                         s->mv_dir= best_s.mv_dir;
2711                         s->mv_type = best_s.mv_type;
2712                         s->mb_intra= 0;
2713 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2714                         s->mv[0][0][1] = best_s.mv[0][0][1];
2715                         s->mv[1][0][0] = best_s.mv[1][0][0];
2716                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2717                         backup_s.dquant= 0;
2718                         s->skipdct=1;
2719                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2720                                         &dmin, &next_block, mx, my);
2721                         s->skipdct=0;
2722                     }
2723                 }
2724
2725                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2726
2727                 copy_context_after_encode(s, &best_s, -1);
2728
2729                 pb_bits_count= put_bits_count(&s->pb);
2730                 flush_put_bits(&s->pb);
2731                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2732                 s->pb= backup_s.pb;
2733
2734                 if(s->data_partitioning){
2735                     pb2_bits_count= put_bits_count(&s->pb2);
2736                     flush_put_bits(&s->pb2);
2737                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2738                     s->pb2= backup_s.pb2;
2739
2740                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2741                     flush_put_bits(&s->tex_pb);
2742                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2743                     s->tex_pb= backup_s.tex_pb;
2744                 }
2745                 s->last_bits= put_bits_count(&s->pb);
2746
2747                 if (CONFIG_H263_ENCODER &&
2748                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2749                     ff_h263_update_motion_val(s);
2750
2751                 if(next_block==0){ //FIXME 16 vs linesize16
2752                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2753                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2754                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2755                 }
2756
2757                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2758                     MPV_decode_mb(s, s->block);
2759             } else {
2760                 int motion_x = 0, motion_y = 0;
2761                 s->mv_type=MV_TYPE_16X16;
2762                 // only one MB-Type possible
2763
2764                 switch(mb_type){
2765                 case CANDIDATE_MB_TYPE_INTRA:
2766                     s->mv_dir = 0;
2767                     s->mb_intra= 1;
2768                     motion_x= s->mv[0][0][0] = 0;
2769                     motion_y= s->mv[0][0][1] = 0;
2770                     break;
2771                 case CANDIDATE_MB_TYPE_INTER:
2772                     s->mv_dir = MV_DIR_FORWARD;
2773                     s->mb_intra= 0;
2774                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2775                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2776                     break;
2777                 case CANDIDATE_MB_TYPE_INTER_I:
2778                     s->mv_dir = MV_DIR_FORWARD;
2779                     s->mv_type = MV_TYPE_FIELD;
2780                     s->mb_intra= 0;
2781                     for(i=0; i<2; i++){
2782                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2783                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2784                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2785                     }
2786                     break;
2787                 case CANDIDATE_MB_TYPE_INTER4V:
2788                     s->mv_dir = MV_DIR_FORWARD;
2789                     s->mv_type = MV_TYPE_8X8;
2790                     s->mb_intra= 0;
2791                     for(i=0; i<4; i++){
2792                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2793                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2794                     }
2795                     break;
2796                 case CANDIDATE_MB_TYPE_DIRECT:
2797                     if (CONFIG_MPEG4_ENCODER) {
2798                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2799                         s->mb_intra= 0;
2800                         motion_x=s->b_direct_mv_table[xy][0];
2801                         motion_y=s->b_direct_mv_table[xy][1];
2802                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2803                     }
2804                     break;
2805                 case CANDIDATE_MB_TYPE_DIRECT0:
2806                     if (CONFIG_MPEG4_ENCODER) {
2807                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2808                         s->mb_intra= 0;
2809                         ff_mpeg4_set_direct_mv(s, 0, 0);
2810                     }
2811                     break;
2812                 case CANDIDATE_MB_TYPE_BIDIR:
2813                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2814                     s->mb_intra= 0;
2815                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2816                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2817                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2818                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2819                     break;
2820                 case CANDIDATE_MB_TYPE_BACKWARD:
2821                     s->mv_dir = MV_DIR_BACKWARD;
2822                     s->mb_intra= 0;
2823                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2824                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2825                     break;
2826                 case CANDIDATE_MB_TYPE_FORWARD:
2827                     s->mv_dir = MV_DIR_FORWARD;
2828                     s->mb_intra= 0;
2829                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2830                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2831 //                    printf(" %d %d ", motion_x, motion_y);
2832                     break;
2833                 case CANDIDATE_MB_TYPE_FORWARD_I:
2834                     s->mv_dir = MV_DIR_FORWARD;
2835                     s->mv_type = MV_TYPE_FIELD;
2836                     s->mb_intra= 0;
2837                     for(i=0; i<2; i++){
2838                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2839                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2840                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2841                     }
2842                     break;
2843                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2844                     s->mv_dir = MV_DIR_BACKWARD;
2845                     s->mv_type = MV_TYPE_FIELD;
2846                     s->mb_intra= 0;
2847                     for(i=0; i<2; i++){
2848                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2849                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2850                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2851                     }
2852                     break;
2853                 case CANDIDATE_MB_TYPE_BIDIR_I:
2854                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2855                     s->mv_type = MV_TYPE_FIELD;
2856                     s->mb_intra= 0;
2857                     for(dir=0; dir<2; dir++){
2858                         for(i=0; i<2; i++){
2859                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2860                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2861                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2862                         }
2863                     }
2864                     break;
2865                 default:
2866                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2867                 }
2868
2869                 encode_mb(s, motion_x, motion_y);
2870
2871                 // RAL: Update last macroblock type
2872                 s->last_mv_dir = s->mv_dir;
2873
2874                 if (CONFIG_H263_ENCODER &&
2875                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2876                     ff_h263_update_motion_val(s);
2877
2878                 MPV_decode_mb(s, s->block);
2879             }
2880
2881             /* clean the MV table in IPS frames for direct mode in B frames */
2882             if(s->mb_intra /* && I,P,S_TYPE */){
2883                 s->p_mv_table[xy][0]=0;
2884                 s->p_mv_table[xy][1]=0;
2885             }
2886
2887             if(s->flags&CODEC_FLAG_PSNR){
2888                 int w= 16;
2889                 int h= 16;
2890
2891                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2892                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2893
2894                 s->current_picture.f.error[0] += sse(
2895                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2896                     s->dest[0], w, h, s->linesize);
2897                 s->current_picture.f.error[1] += sse(
2898                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2899                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2900                 s->current_picture.f.error[2] += sse(
2901                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2902                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2903             }
2904             if(s->loop_filter){
2905                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2906                     ff_h263_loop_filter(s);
2907             }
2908 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
2909         }
2910     }
2911
2912     //not beautiful here but we must write it before flushing so it has to be here
2913     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2914         msmpeg4_encode_ext_header(s);
2915
2916     write_slice_end(s);
2917
2918     /* Send the last GOB if RTP */
2919     if (s->avctx->rtp_callback) {
2920         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2921         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2922         /* Call the RTP callback to send the last GOB */
2923         emms_c();
2924         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2925     }
2926
2927     return 0;
2928 }
2929
2930 #define MERGE(field) dst->field += src->field; src->field=0
2931 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2932     MERGE(me.scene_change_score);
2933     MERGE(me.mc_mb_var_sum_temp);
2934     MERGE(me.mb_var_sum_temp);
2935 }
2936
2937 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2938     int i;
2939
2940     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2941     MERGE(dct_count[1]);
2942     MERGE(mv_bits);
2943     MERGE(i_tex_bits);
2944     MERGE(p_tex_bits);
2945     MERGE(i_count);
2946     MERGE(f_count);
2947     MERGE(b_count);
2948     MERGE(skip_count);
2949     MERGE(misc_bits);
2950     MERGE(error_count);
2951     MERGE(padding_bug_score);
2952     MERGE(current_picture.f.error[0]);
2953     MERGE(current_picture.f.error[1]);
2954     MERGE(current_picture.f.error[2]);
2955
2956     if(dst->avctx->noise_reduction){
2957         for(i=0; i<64; i++){
2958             MERGE(dct_error_sum[0][i]);
2959             MERGE(dct_error_sum[1][i]);
2960         }
2961     }
2962
2963     assert(put_bits_count(&src->pb) % 8 ==0);
2964     assert(put_bits_count(&dst->pb) % 8 ==0);
2965     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
2966     flush_put_bits(&dst->pb);
2967 }
2968
2969 static int estimate_qp(MpegEncContext *s, int dry_run){
2970     if (s->next_lambda){
2971         s->current_picture_ptr->f.quality =
2972         s->current_picture.f.quality = s->next_lambda;
2973         if(!dry_run) s->next_lambda= 0;
2974     } else if (!s->fixed_qscale) {
2975         s->current_picture_ptr->f.quality =
2976         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
2977         if (s->current_picture.f.quality < 0)
2978             return -1;
2979     }
2980
2981     if(s->adaptive_quant){
2982         switch(s->codec_id){
2983         case CODEC_ID_MPEG4:
2984             if (CONFIG_MPEG4_ENCODER)
2985                 ff_clean_mpeg4_qscales(s);
2986             break;
2987         case CODEC_ID_H263:
2988         case CODEC_ID_H263P:
2989         case CODEC_ID_FLV1:
2990             if (CONFIG_H263_ENCODER)
2991                 ff_clean_h263_qscales(s);
2992             break;
2993         default:
2994             ff_init_qscale_tab(s);
2995         }
2996
2997         s->lambda= s->lambda_table[0];
2998         //FIXME broken
2999     }else
3000         s->lambda = s->current_picture.f.quality;
3001 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3002     update_qscale(s);
3003     return 0;
3004 }
3005
3006 /* must be called before writing the header */
3007 static void set_frame_distances(MpegEncContext * s){
3008     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3009     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3010
3011     if(s->pict_type==AV_PICTURE_TYPE_B){
3012         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3013         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3014     }else{
3015         s->pp_time= s->time - s->last_non_b_time;
3016         s->last_non_b_time= s->time;
3017         assert(s->picture_number==0 || s->pp_time > 0);
3018     }
3019 }
3020
3021 static int encode_picture(MpegEncContext *s, int picture_number)
3022 {
3023     int i;
3024     int bits;
3025     int context_count = s->slice_context_count;
3026
3027     s->picture_number = picture_number;
3028
3029     /* Reset the average MB variance */
3030     s->me.mb_var_sum_temp    =
3031     s->me.mc_mb_var_sum_temp = 0;
3032
3033     /* we need to initialize some time vars before we can encode b-frames */
3034     // RAL: Condition added for MPEG1VIDEO
3035     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3036         set_frame_distances(s);
3037     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3038         ff_set_mpeg4_time(s);
3039
3040     s->me.scene_change_score=0;
3041
3042 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3043
3044     if(s->pict_type==AV_PICTURE_TYPE_I){
3045         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3046         else                        s->no_rounding=0;
3047     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3048         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3049             s->no_rounding ^= 1;
3050     }
3051
3052     if(s->flags & CODEC_FLAG_PASS2){
3053         if (estimate_qp(s,1) < 0)
3054             return -1;
3055         ff_get_2pass_fcode(s);
3056     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3057         if(s->pict_type==AV_PICTURE_TYPE_B)
3058             s->lambda= s->last_lambda_for[s->pict_type];
3059         else
3060             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3061         update_qscale(s);
3062     }
3063
3064     if(s->codec_id != CODEC_ID_AMV){
3065         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3066         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3067         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3068         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3069     }
3070
3071     s->mb_intra=0; //for the rate distortion & bit compare functions
3072     for(i=1; i<context_count; i++){
3073         ff_update_duplicate_context(s->thread_context[i], s);
3074     }
3075
3076     if(ff_init_me(s)<0)
3077         return -1;
3078
3079     /* Estimate motion for every MB */
3080     if(s->pict_type != AV_PICTURE_TYPE_I){
3081         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3082         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3083         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3084             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3085                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3086             }
3087         }
3088
3089         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3090     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3091         /* I-Frame */
3092         for(i=0; i<s->mb_stride*s->mb_height; i++)
3093             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3094
3095         if(!s->fixed_qscale){
3096             /* finding spatial complexity for I-frame rate control */
3097             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3098         }
3099     }
3100     for(i=1; i<context_count; i++){
3101         merge_context_after_me(s, s->thread_context[i]);
3102     }
3103     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3104     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3105     emms_c();
3106
3107     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3108         s->pict_type= AV_PICTURE_TYPE_I;
3109         for(i=0; i<s->mb_stride*s->mb_height; i++)
3110             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3111 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3112     }
3113
3114     if(!s->umvplus){
3115         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3116             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3117
3118             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3119                 int a,b;
3120                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3121                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3122                 s->f_code= FFMAX3(s->f_code, a, b);
3123             }
3124
3125             ff_fix_long_p_mvs(s);
3126             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3127             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3128                 int j;
3129                 for(i=0; i<2; i++){
3130                     for(j=0; j<2; j++)
3131                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3132                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3133                 }
3134             }
3135         }
3136
3137         if(s->pict_type==AV_PICTURE_TYPE_B){
3138             int a, b;
3139
3140             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3141             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3142             s->f_code = FFMAX(a, b);
3143
3144             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3145             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3146             s->b_code = FFMAX(a, b);
3147
3148             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3149             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3150             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3151             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3152             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3153                 int dir, j;
3154                 for(dir=0; dir<2; dir++){
3155                     for(i=0; i<2; i++){
3156                         for(j=0; j<2; j++){
3157                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3158                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3159                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3160                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3161                         }
3162                     }
3163                 }
3164             }
3165         }
3166     }
3167
3168     if (estimate_qp(s, 0) < 0)
3169         return -1;
3170
3171     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3172         s->qscale= 3; //reduce clipping problems
3173
3174     if (s->out_format == FMT_MJPEG) {
3175         /* for mjpeg, we do include qscale in the matrix */
3176         for(i=1;i<64;i++){
3177             int j= s->dsp.idct_permutation[i];
3178
3179             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3180         }
3181         s->y_dc_scale_table=
3182         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3183         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3184         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3185                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3186         s->qscale= 8;
3187     }
3188     if(s->codec_id == CODEC_ID_AMV){
3189         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3190         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3191         for(i=1;i<64;i++){
3192             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3193
3194             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3195             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3196         }
3197         s->y_dc_scale_table= y;
3198         s->c_dc_scale_table= c;
3199         s->intra_matrix[0] = 13;
3200         s->chroma_intra_matrix[0] = 14;
3201         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3202                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3203         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3204                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3205         s->qscale= 8;
3206     }
3207
3208     //FIXME var duplication
3209     s->current_picture_ptr->f.key_frame =
3210     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3211     s->current_picture_ptr->f.pict_type =
3212     s->current_picture.f.pict_type = s->pict_type;
3213
3214     if (s->current_picture.f.key_frame)
3215         s->picture_in_gop_number=0;
3216
3217     s->last_bits= put_bits_count(&s->pb);
3218     switch(s->out_format) {
3219     case FMT_MJPEG:
3220         if (CONFIG_MJPEG_ENCODER)
3221             ff_mjpeg_encode_picture_header(s);
3222         break;
3223     case FMT_H261:
3224         if (CONFIG_H261_ENCODER)
3225             ff_h261_encode_picture_header(s, picture_number);
3226         break;
3227     case FMT_H263:
3228         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3229             ff_wmv2_encode_picture_header(s, picture_number);
3230         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3231             msmpeg4_encode_picture_header(s, picture_number);
3232         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3233             mpeg4_encode_picture_header(s, picture_number);
3234         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3235             rv10_encode_picture_header(s, picture_number);
3236         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3237             rv20_encode_picture_header(s, picture_number);
3238         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3239             ff_flv_encode_picture_header(s, picture_number);
3240         else if (CONFIG_H263_ENCODER)
3241             h263_encode_picture_header(s, picture_number);
3242         break;
3243     case FMT_MPEG1:
3244         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3245             mpeg1_encode_picture_header(s, picture_number);
3246         break;
3247     case FMT_H264:
3248         break;
3249     default:
3250         assert(0);
3251     }
3252     bits= put_bits_count(&s->pb);
3253     s->header_bits= bits - s->last_bits;
3254
3255     for(i=1; i<context_count; i++){
3256         update_duplicate_context_after_me(s->thread_context[i], s);
3257     }
3258     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3259     for(i=1; i<context_count; i++){
3260         merge_context_after_encode(s, s->thread_context[i]);
3261     }
3262     emms_c();
3263     return 0;
3264 }
3265
3266 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3267     const int intra= s->mb_intra;
3268     int i;
3269
3270     s->dct_count[intra]++;
3271
3272     for(i=0; i<64; i++){
3273         int level= block[i];
3274
3275         if(level){
3276             if(level>0){
3277                 s->dct_error_sum[intra][i] += level;
3278                 level -= s->dct_offset[intra][i];
3279                 if(level<0) level=0;
3280             }else{
3281                 s->dct_error_sum[intra][i] -= level;
3282                 level += s->dct_offset[intra][i];
3283                 if(level>0) level=0;
3284             }
3285             block[i]= level;
3286         }
3287     }
3288 }
3289
3290 static int dct_quantize_trellis_c(MpegEncContext *s,
3291                                   DCTELEM *block, int n,
3292                                   int qscale, int *overflow){
3293     const int *qmat;
3294     const uint8_t *scantable= s->intra_scantable.scantable;
3295     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3296     int max=0;
3297     unsigned int threshold1, threshold2;
3298     int bias=0;
3299     int run_tab[65];
3300     int level_tab[65];
3301     int score_tab[65];
3302     int survivor[65];
3303     int survivor_count;
3304     int last_run=0;
3305     int last_level=0;
3306     int last_score= 0;
3307     int last_i;
3308     int coeff[2][64];
3309     int coeff_count[64];
3310     int qmul, qadd, start_i, last_non_zero, i, dc;
3311     const int esc_length= s->ac_esc_length;
3312     uint8_t * length;
3313     uint8_t * last_length;
3314     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3315
3316     s->dsp.fdct (block);
3317
3318     if(s->dct_error_sum)
3319         s->denoise_dct(s, block);
3320     qmul= qscale*16;
3321     qadd= ((qscale-1)|1)*8;
3322
3323     if (s->mb_intra) {
3324         int q;
3325         if (!s->h263_aic) {
3326             if (n < 4)
3327                 q = s->y_dc_scale;
3328             else
3329                 q = s->c_dc_scale;
3330             q = q << 3;
3331         } else{
3332             /* For AIC we skip quant/dequant of INTRADC */
3333             q = 1 << 3;
3334             qadd=0;
3335         }
3336
3337         /* note: block[0] is assumed to be positive */
3338         block[0] = (block[0] + (q >> 1)) / q;
3339         start_i = 1;
3340         last_non_zero = 0;
3341         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3342         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3343             bias= 1<<(QMAT_SHIFT-1);
3344         length     = s->intra_ac_vlc_length;
3345         last_length= s->intra_ac_vlc_last_length;
3346     } else {
3347         start_i = 0;
3348         last_non_zero = -1;
3349         qmat = s->q_inter_matrix[qscale];
3350         length     = s->inter_ac_vlc_length;
3351         last_length= s->inter_ac_vlc_last_length;
3352     }
3353     last_i= start_i;
3354
3355     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3356     threshold2= (threshold1<<1);
3357
3358     for(i=63; i>=start_i; i--) {
3359         const int j = scantable[i];
3360         int level = block[j] * qmat[j];
3361
3362         if(((unsigned)(level+threshold1))>threshold2){
3363             last_non_zero = i;
3364             break;
3365         }
3366     }
3367
3368     for(i=start_i; i<=last_non_zero; i++) {
3369         const int j = scantable[i];
3370         int level = block[j] * qmat[j];
3371
3372 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3373 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3374         if(((unsigned)(level+threshold1))>threshold2){
3375             if(level>0){
3376                 level= (bias + level)>>QMAT_SHIFT;
3377                 coeff[0][i]= level;
3378                 coeff[1][i]= level-1;
3379 //                coeff[2][k]= level-2;
3380             }else{
3381                 level= (bias - level)>>QMAT_SHIFT;
3382                 coeff[0][i]= -level;
3383                 coeff[1][i]= -level+1;
3384 //                coeff[2][k]= -level+2;
3385             }
3386             coeff_count[i]= FFMIN(level, 2);
3387             assert(coeff_count[i]);
3388             max |=level;
3389         }else{
3390             coeff[0][i]= (level>>31)|1;
3391             coeff_count[i]= 1;
3392         }
3393     }
3394
3395     *overflow= s->max_qcoeff < max; //overflow might have happened
3396
3397     if(last_non_zero < start_i){
3398         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3399         return last_non_zero;
3400     }
3401
3402     score_tab[start_i]= 0;
3403     survivor[0]= start_i;
3404     survivor_count= 1;
3405
3406     for(i=start_i; i<=last_non_zero; i++){
3407         int level_index, j, zero_distortion;
3408         int dct_coeff= FFABS(block[ scantable[i] ]);
3409         int best_score=256*256*256*120;
3410
3411         if (   s->dsp.fdct == fdct_ifast
3412 #ifndef FAAN_POSTSCALE
3413             || s->dsp.fdct == ff_faandct
3414 #endif
3415            )
3416             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3417         zero_distortion= dct_coeff*dct_coeff;
3418
3419         for(level_index=0; level_index < coeff_count[i]; level_index++){
3420             int distortion;
3421             int level= coeff[level_index][i];
3422             const int alevel= FFABS(level);
3423             int unquant_coeff;
3424
3425             assert(level);
3426
3427             if(s->out_format == FMT_H263){
3428                 unquant_coeff= alevel*qmul + qadd;
3429             }else{ //MPEG1
3430                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3431                 if(s->mb_intra){
3432                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3433                         unquant_coeff =   (unquant_coeff - 1) | 1;
3434                 }else{
3435                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3436                         unquant_coeff =   (unquant_coeff - 1) | 1;
3437                 }
3438                 unquant_coeff<<= 3;
3439             }
3440
3441             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3442             level+=64;
3443             if((level&(~127)) == 0){
3444                 for(j=survivor_count-1; j>=0; j--){
3445                     int run= i - survivor[j];
3446                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3447                     score += score_tab[i-run];
3448
3449                     if(score < best_score){
3450                         best_score= score;
3451                         run_tab[i+1]= run;
3452                         level_tab[i+1]= level-64;
3453                     }
3454                 }
3455
3456                 if(s->out_format == FMT_H263){
3457                     for(j=survivor_count-1; j>=0; j--){
3458                         int run= i - survivor[j];
3459                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3460                         score += score_tab[i-run];
3461                         if(score < last_score){
3462                             last_score= score;
3463                             last_run= run;
3464                             last_level= level-64;
3465                             last_i= i+1;
3466                         }
3467                     }
3468                 }
3469             }else{
3470                 distortion += esc_length*lambda;
3471                 for(j=survivor_count-1; j>=0; j--){
3472                     int run= i - survivor[j];
3473                     int score= distortion + score_tab[i-run];
3474
3475                     if(score < best_score){
3476                         best_score= score;
3477                         run_tab[i+1]= run;
3478                         level_tab[i+1]= level-64;
3479                     }
3480                 }
3481
3482                 if(s->out_format == FMT_H263){
3483                   for(j=survivor_count-1; j>=0; j--){
3484                         int run= i - survivor[j];
3485                         int score= distortion + score_tab[i-run];
3486                         if(score < last_score){
3487                             last_score= score;
3488                             last_run= run;
3489                             last_level= level-64;
3490                             last_i= i+1;
3491                         }
3492                     }
3493                 }
3494             }
3495         }
3496
3497         score_tab[i+1]= best_score;
3498
3499         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3500         if(last_non_zero <= 27){
3501             for(; survivor_count; survivor_count--){
3502                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3503                     break;
3504             }
3505         }else{
3506             for(; survivor_count; survivor_count--){
3507                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3508                     break;
3509             }
3510         }
3511
3512         survivor[ survivor_count++ ]= i+1;
3513     }
3514
3515     if(s->out_format != FMT_H263){
3516         last_score= 256*256*256*120;
3517         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3518             int score= score_tab[i];
3519             if(i) score += lambda*2; //FIXME exacter?
3520
3521             if(score < last_score){
3522                 last_score= score;
3523                 last_i= i;
3524                 last_level= level_tab[i];
3525                 last_run= run_tab[i];
3526             }
3527         }
3528     }
3529
3530     s->coded_score[n] = last_score;
3531
3532     dc= FFABS(block[0]);
3533     last_non_zero= last_i - 1;
3534     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3535
3536     if(last_non_zero < start_i)
3537         return last_non_zero;
3538
3539     if(last_non_zero == 0 && start_i == 0){
3540         int best_level= 0;
3541         int best_score= dc * dc;
3542
3543         for(i=0; i<coeff_count[0]; i++){
3544             int level= coeff[i][0];
3545             int alevel= FFABS(level);
3546             int unquant_coeff, score, distortion;
3547
3548             if(s->out_format == FMT_H263){
3549                     unquant_coeff= (alevel*qmul + qadd)>>3;
3550             }else{ //MPEG1
3551                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3552                     unquant_coeff =   (unquant_coeff - 1) | 1;
3553             }
3554             unquant_coeff = (unquant_coeff + 4) >> 3;
3555             unquant_coeff<<= 3 + 3;
3556
3557             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3558             level+=64;
3559             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3560             else                    score= distortion + esc_length*lambda;
3561
3562             if(score < best_score){
3563                 best_score= score;
3564                 best_level= level - 64;
3565             }
3566         }
3567         block[0]= best_level;
3568         s->coded_score[n] = best_score - dc*dc;
3569         if(best_level == 0) return -1;
3570         else                return last_non_zero;
3571     }
3572
3573     i= last_i;
3574     assert(last_level);
3575
3576     block[ perm_scantable[last_non_zero] ]= last_level;
3577     i -= last_run + 1;
3578
3579     for(; i>start_i; i -= run_tab[i] + 1){
3580         block[ perm_scantable[i-1] ]= level_tab[i];
3581     }
3582
3583     return last_non_zero;
3584 }
3585
3586 //#define REFINE_STATS 1
3587 static int16_t basis[64][64];
3588
3589 static void build_basis(uint8_t *perm){
3590     int i, j, x, y;
3591     emms_c();
3592     for(i=0; i<8; i++){
3593         for(j=0; j<8; j++){
3594             for(y=0; y<8; y++){
3595                 for(x=0; x<8; x++){
3596                     double s= 0.25*(1<<BASIS_SHIFT);
3597                     int index= 8*i + j;
3598                     int perm_index= perm[index];
3599                     if(i==0) s*= sqrt(0.5);
3600                     if(j==0) s*= sqrt(0.5);
3601                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3602                 }
3603             }
3604         }
3605     }
3606 }
3607
3608 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3609                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3610                         int n, int qscale){
3611     int16_t rem[64];
3612     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3613     const uint8_t *scantable= s->intra_scantable.scantable;
3614     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3615 //    unsigned int threshold1, threshold2;
3616 //    int bias=0;
3617     int run_tab[65];
3618     int prev_run=0;
3619     int prev_level=0;
3620     int qmul, qadd, start_i, last_non_zero, i, dc;
3621     uint8_t * length;
3622     uint8_t * last_length;
3623     int lambda;
3624     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3625 #ifdef REFINE_STATS
3626 static int count=0;
3627 static int after_last=0;
3628 static int to_zero=0;
3629 static int from_zero=0;
3630 static int raise=0;
3631 static int lower=0;
3632 static int messed_sign=0;
3633 #endif
3634
3635     if(basis[0][0] == 0)
3636         build_basis(s->dsp.idct_permutation);
3637
3638     qmul= qscale*2;
3639     qadd= (qscale-1)|1;
3640     if (s->mb_intra) {
3641         if (!s->h263_aic) {
3642             if (n < 4)
3643                 q = s->y_dc_scale;
3644             else
3645                 q = s->c_dc_scale;
3646         } else{
3647             /* For AIC we skip quant/dequant of INTRADC */
3648             q = 1;
3649             qadd=0;
3650         }
3651         q <<= RECON_SHIFT-3;
3652         /* note: block[0] is assumed to be positive */
3653         dc= block[0]*q;
3654 //        block[0] = (block[0] + (q >> 1)) / q;
3655         start_i = 1;
3656 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3657 //            bias= 1<<(QMAT_SHIFT-1);
3658         length     = s->intra_ac_vlc_length;
3659         last_length= s->intra_ac_vlc_last_length;
3660     } else {
3661         dc= 0;
3662         start_i = 0;
3663         length     = s->inter_ac_vlc_length;
3664         last_length= s->inter_ac_vlc_last_length;
3665     }
3666     last_non_zero = s->block_last_index[n];
3667
3668 #ifdef REFINE_STATS
3669 {START_TIMER
3670 #endif
3671     dc += (1<<(RECON_SHIFT-1));
3672     for(i=0; i<64; i++){
3673         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3674     }
3675 #ifdef REFINE_STATS
3676 STOP_TIMER("memset rem[]")}
3677 #endif
3678     sum=0;
3679     for(i=0; i<64; i++){
3680         int one= 36;
3681         int qns=4;
3682         int w;
3683
3684         w= FFABS(weight[i]) + qns*one;
3685         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3686
3687         weight[i] = w;
3688 //        w=weight[i] = (63*qns + (w/2)) / w;
3689
3690         assert(w>0);
3691         assert(w<(1<<6));
3692         sum += w*w;
3693     }
3694     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3695 #ifdef REFINE_STATS
3696 {START_TIMER
3697 #endif
3698     run=0;
3699     rle_index=0;
3700     for(i=start_i; i<=last_non_zero; i++){
3701         int j= perm_scantable[i];
3702         const int level= block[j];
3703         int coeff;
3704
3705         if(level){
3706             if(level<0) coeff= qmul*level - qadd;
3707             else        coeff= qmul*level + qadd;
3708             run_tab[rle_index++]=run;
3709             run=0;
3710
3711             s->dsp.add_8x8basis(rem, basis[j], coeff);
3712         }else{
3713             run++;
3714         }
3715     }
3716 #ifdef REFINE_STATS
3717 if(last_non_zero>0){
3718 STOP_TIMER("init rem[]")
3719 }
3720 }
3721
3722 {START_TIMER
3723 #endif
3724     for(;;){
3725         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3726         int best_coeff=0;
3727         int best_change=0;
3728         int run2, best_unquant_change=0, analyze_gradient;
3729 #ifdef REFINE_STATS
3730 {START_TIMER
3731 #endif
3732         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
3733
3734         if(analyze_gradient){
3735 #ifdef REFINE_STATS
3736 {START_TIMER
3737 #endif
3738             for(i=0; i<64; i++){
3739                 int w= weight[i];
3740
3741                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3742             }
3743 #ifdef REFINE_STATS
3744 STOP_TIMER("rem*w*w")}
3745 {START_TIMER
3746 #endif
3747             s->dsp.fdct(d1);
3748 #ifdef REFINE_STATS
3749 STOP_TIMER("dct")}
3750 #endif
3751         }
3752
3753         if(start_i){
3754             const int level= block[0];
3755             int change, old_coeff;
3756
3757             assert(s->mb_intra);
3758
3759             old_coeff= q*level;
3760
3761             for(change=-1; change<=1; change+=2){
3762                 int new_level= level + change;
3763                 int score, new_coeff;
3764
3765                 new_coeff= q*new_level;
3766                 if(new_coeff >= 2048 || new_coeff < 0)
3767                     continue;
3768
3769                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3770                 if(score<best_score){
3771                     best_score= score;
3772                     best_coeff= 0;
3773                     best_change= change;
3774                     best_unquant_change= new_coeff - old_coeff;
3775                 }
3776             }
3777         }
3778
3779         run=0;
3780         rle_index=0;
3781         run2= run_tab[rle_index++];
3782         prev_level=0;
3783         prev_run=0;
3784
3785         for(i=start_i; i<64; i++){
3786             int j= perm_scantable[i];
3787             const int level= block[j];
3788             int change, old_coeff;
3789
3790             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3791                 break;
3792
3793             if(level){
3794                 if(level<0) old_coeff= qmul*level - qadd;
3795                 else        old_coeff= qmul*level + qadd;
3796                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3797             }else{
3798                 old_coeff=0;
3799                 run2--;
3800                 assert(run2>=0 || i >= last_non_zero );
3801             }
3802
3803             for(change=-1; change<=1; change+=2){
3804                 int new_level= level + change;
3805                 int score, new_coeff, unquant_change;
3806
3807                 score=0;
3808                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3809                    continue;
3810
3811                 if(new_level){
3812                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3813                     else            new_coeff= qmul*new_level + qadd;
3814                     if(new_coeff >= 2048 || new_coeff <= -2048)
3815                         continue;
3816                     //FIXME check for overflow
3817
3818                     if(level){
3819                         if(level < 63 && level > -63){
3820                             if(i < last_non_zero)
3821                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3822                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3823                             else
3824                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3825                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3826                         }
3827                     }else{
3828                         assert(FFABS(new_level)==1);
3829
3830                         if(analyze_gradient){
3831                             int g= d1[ scantable[i] ];
3832                             if(g && (g^new_level) >= 0)
3833                                 continue;
3834                         }
3835
3836                         if(i < last_non_zero){
3837                             int next_i= i + run2 + 1;
3838                             int next_level= block[ perm_scantable[next_i] ] + 64;
3839
3840                             if(next_level&(~127))
3841                                 next_level= 0;
3842
3843                             if(next_i < last_non_zero)
3844                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3845                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3846                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3847                             else
3848                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3849                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3850                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3851                         }else{
3852                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3853                             if(prev_level){
3854                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3855                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3856                             }
3857                         }
3858                     }
3859                 }else{
3860                     new_coeff=0;
3861                     assert(FFABS(level)==1);
3862
3863                     if(i < last_non_zero){
3864                         int next_i= i + run2 + 1;
3865                         int next_level= block[ perm_scantable[next_i] ] + 64;
3866
3867                         if(next_level&(~127))
3868                             next_level= 0;
3869
3870                         if(next_i < last_non_zero)
3871                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3872                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3873                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3874                         else
3875                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3876                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3877                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3878                     }else{
3879                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3880                         if(prev_level){
3881                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3882                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3883                         }
3884                     }
3885                 }
3886
3887                 score *= lambda;
3888
3889                 unquant_change= new_coeff - old_coeff;
3890                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3891
3892                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3893                 if(score<best_score){
3894                     best_score= score;
3895                     best_coeff= i;
3896                     best_change= change;
3897                     best_unquant_change= unquant_change;
3898                 }
3899             }
3900             if(level){
3901                 prev_level= level + 64;
3902                 if(prev_level&(~127))
3903                     prev_level= 0;
3904                 prev_run= run;
3905                 run=0;
3906             }else{
3907                 run++;
3908             }
3909         }
3910 #ifdef REFINE_STATS
3911 STOP_TIMER("iterative step")}
3912 #endif
3913
3914         if(best_change){
3915             int j= perm_scantable[ best_coeff ];
3916
3917             block[j] += best_change;
3918
3919             if(best_coeff > last_non_zero){
3920                 last_non_zero= best_coeff;
3921                 assert(block[j]);
3922 #ifdef REFINE_STATS
3923 after_last++;
3924 #endif
3925             }else{
3926 #ifdef REFINE_STATS
3927 if(block[j]){
3928     if(block[j] - best_change){
3929         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3930             raise++;
3931         }else{
3932             lower++;
3933         }
3934     }else{
3935         from_zero++;
3936     }
3937 }else{
3938     to_zero++;
3939 }
3940 #endif
3941                 for(; last_non_zero>=start_i; last_non_zero--){
3942                     if(block[perm_scantable[last_non_zero]])
3943                         break;
3944                 }
3945             }
3946 #ifdef REFINE_STATS
3947 count++;
3948 if(256*256*256*64 % count == 0){
3949     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3950 }
3951 #endif
3952             run=0;
3953             rle_index=0;
3954             for(i=start_i; i<=last_non_zero; i++){
3955                 int j= perm_scantable[i];
3956                 const int level= block[j];
3957
3958                  if(level){
3959                      run_tab[rle_index++]=run;
3960                      run=0;
3961                  }else{
3962                      run++;
3963                  }
3964             }
3965
3966             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
3967         }else{
3968             break;
3969         }
3970     }
3971 #ifdef REFINE_STATS
3972 if(last_non_zero>0){
3973 STOP_TIMER("iterative search")
3974 }
3975 }
3976 #endif
3977
3978     return last_non_zero;
3979 }
3980
3981 int dct_quantize_c(MpegEncContext *s,
3982                         DCTELEM *block, int n,
3983                         int qscale, int *overflow)
3984 {
3985     int i, j, level, last_non_zero, q, start_i;
3986     const int *qmat;
3987     const uint8_t *scantable= s->intra_scantable.scantable;
3988     int bias;
3989     int max=0;
3990     unsigned int threshold1, threshold2;
3991
3992     s->dsp.fdct (block);
3993
3994     if(s->dct_error_sum)
3995         s->denoise_dct(s, block);
3996
3997     if (s->mb_intra) {
3998         if (!s->h263_aic) {
3999             if (n < 4)
4000                 q = s->y_dc_scale;
4001             else
4002                 q = s->c_dc_scale;
4003             q = q << 3;
4004         } else
4005             /* For AIC we skip quant/dequant of INTRADC */
4006             q = 1 << 3;
4007
4008         /* note: block[0] is assumed to be positive */
4009         block[0] = (block[0] + (q >> 1)) / q;
4010         start_i = 1;
4011         last_non_zero = 0;
4012         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4013         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4014     } else {
4015         start_i = 0;
4016         last_non_zero = -1;
4017         qmat = s->q_inter_matrix[qscale];
4018         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4019     }
4020     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4021     threshold2= (threshold1<<1);
4022     for(i=63;i>=start_i;i--) {
4023         j = scantable[i];
4024         level = block[j] * qmat[j];
4025
4026         if(((unsigned)(level+threshold1))>threshold2){
4027             last_non_zero = i;
4028             break;
4029         }else{
4030             block[j]=0;
4031         }
4032     }
4033     for(i=start_i; i<=last_non_zero; i++) {
4034         j = scantable[i];
4035         level = block[j] * qmat[j];
4036
4037 //        if(   bias+level >= (1<<QMAT_SHIFT)
4038 //           || bias-level >= (1<<QMAT_SHIFT)){
4039         if(((unsigned)(level+threshold1))>threshold2){
4040             if(level>0){
4041                 level= (bias + level)>>QMAT_SHIFT;
4042                 block[j]= level;
4043             }else{
4044                 level= (bias - level)>>QMAT_SHIFT;
4045                 block[j]= -level;
4046             }
4047             max |=level;
4048         }else{
4049             block[j]=0;
4050         }
4051     }
4052     *overflow= s->max_qcoeff < max; //overflow might have happened
4053
4054     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4055     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4056         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4057
4058     return last_non_zero;
4059 }
4060
4061 #define OFFSET(x) offsetof(MpegEncContext, x)
4062 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4063 static const AVOption h263_options[] = {
4064     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4065     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4066     { NULL },
4067 };
4068
4069 static const AVClass h263_class = {
4070     .class_name = "H.263 encoder",
4071     .item_name  = av_default_item_name,
4072     .option     = h263_options,
4073     .version    = LIBAVUTIL_VERSION_INT,
4074 };
4075
4076 AVCodec ff_h263_encoder = {
4077     .name           = "h263",
4078     .type           = AVMEDIA_TYPE_VIDEO,
4079     .id             = CODEC_ID_H263,
4080     .priv_data_size = sizeof(MpegEncContext),
4081     .init           = MPV_encode_init,
4082     .encode         = MPV_encode_picture,
4083     .close          = MPV_encode_end,
4084     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4085     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4086     .priv_class     = &h263_class,
4087 };
4088
4089 static const AVOption h263p_options[] = {
4090     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4091     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4092     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4093     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4094     { NULL },
4095 };
4096 static const AVClass h263p_class = {
4097     .class_name = "H.263p encoder",
4098     .item_name  = av_default_item_name,
4099     .option     = h263p_options,
4100     .version    = LIBAVUTIL_VERSION_INT,
4101 };
4102
4103 AVCodec ff_h263p_encoder = {
4104     .name           = "h263p",
4105     .type           = AVMEDIA_TYPE_VIDEO,
4106     .id             = CODEC_ID_H263P,
4107     .priv_data_size = sizeof(MpegEncContext),
4108     .init           = MPV_encode_init,
4109     .encode         = MPV_encode_picture,
4110     .close          = MPV_encode_end,
4111     .capabilities = CODEC_CAP_SLICE_THREADS,
4112     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4113     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4114     .priv_class     = &h263p_class,
4115 };
4116
4117 AVCodec ff_msmpeg4v2_encoder = {
4118     .name           = "msmpeg4v2",
4119     .type           = AVMEDIA_TYPE_VIDEO,
4120     .id             = CODEC_ID_MSMPEG4V2,
4121     .priv_data_size = sizeof(MpegEncContext),
4122     .init           = MPV_encode_init,
4123     .encode         = MPV_encode_picture,
4124     .close          = MPV_encode_end,
4125     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4126     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4127 };
4128
4129 AVCodec ff_msmpeg4v3_encoder = {
4130     .name           = "msmpeg4",
4131     .type           = AVMEDIA_TYPE_VIDEO,
4132     .id             = CODEC_ID_MSMPEG4V3,
4133     .priv_data_size = sizeof(MpegEncContext),
4134     .init           = MPV_encode_init,
4135     .encode         = MPV_encode_picture,
4136     .close          = MPV_encode_end,
4137     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4138     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4139 };
4140
4141 AVCodec ff_wmv1_encoder = {
4142     .name           = "wmv1",
4143     .type           = AVMEDIA_TYPE_VIDEO,
4144     .id             = CODEC_ID_WMV1,
4145     .priv_data_size = sizeof(MpegEncContext),
4146     .init           = MPV_encode_init,
4147     .encode         = MPV_encode_picture,
4148     .close          = MPV_encode_end,
4149     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4150     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4151 };