]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
ratecontrol: correct predictor in case of stuffing
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "h263.h"
37 #include "mjpegenc.h"
38 #include "msmpeg4.h"
39 #include "faandct.h"
40 #include "thread.h"
41 #include "aandcttab.h"
42 #include "flv.h"
43 #include "mpeg4video.h"
44 #include "internal.h"
45 #include "bytestream.h"
46 #include <limits.h>
47 #include "sp5x.h"
48
49 //#undef NDEBUG
50 //#include <assert.h>
51
52 static int encode_picture(MpegEncContext *s, int picture_number);
53 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
54 static int sse_mb(MpegEncContext *s);
55 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
56 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
57
58 /* enable all paranoid tests for rounding, overflows, etc... */
59 //#define PARANOID
60
61 //#define DEBUG
62
63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
65
66 const AVOption ff_mpv_generic_options[] = {
67     FF_MPV_COMMON_OPTS
68     { NULL },
69 };
70
71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
72                        uint16_t (*qmat16)[2][64],
73                        const uint16_t *quant_matrix,
74                        int bias, int qmin, int qmax, int intra)
75 {
76     int qscale;
77     int shift = 0;
78
79     for (qscale = qmin; qscale <= qmax; qscale++) {
80         int i;
81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
82             dsp->fdct == ff_jpeg_fdct_islow_10 ||
83             dsp->fdct == ff_faandct) {
84             for (i = 0; i < 64; i++) {
85                 const int j = dsp->idct_permutation[i];
86                 /* 16 <= qscale * quant_matrix[i] <= 7905
87                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
88                  *             19952 <=              x  <= 249205026
89                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
90                  *           3444240 >= (1 << 36) / (x) >= 275 */
91
92                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
93                                         (qscale * quant_matrix[j]));
94             }
95         } else if (dsp->fdct == ff_fdct_ifast) {
96             for (i = 0; i < 64; i++) {
97                 const int j = dsp->idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
105                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
106             }
107         } else {
108             for (i = 0; i < 64; i++) {
109                 const int j = dsp->idct_permutation[i];
110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
111                  * Assume x = qscale * quant_matrix[i]
112                  * So             16 <=              x  <= 7905
113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
114                  * so          32768 >= (1 << 19) / (x) >= 67 */
115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
116                                         (qscale * quant_matrix[j]));
117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
118                 //                    (qscale * quant_matrix[i]);
119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
120                                        (qscale * quant_matrix[j]);
121
122                 if (qmat16[qscale][0][i] == 0 ||
123                     qmat16[qscale][0][i] == 128 * 256)
124                     qmat16[qscale][0][i] = 128 * 256 - 1;
125                 qmat16[qscale][1][i] =
126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
127                                 qmat16[qscale][0][i]);
128             }
129         }
130
131         for (i = intra; i < 64; i++) {
132             int64_t max = 8191;
133             if (dsp->fdct == ff_fdct_ifast) {
134                 max = (8191LL * ff_aanscales[i]) >> 14;
135             }
136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
137                 shift++;
138             }
139         }
140     }
141     if (shift) {
142         av_log(NULL, AV_LOG_INFO,
143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
144                QMAT_SHIFT - shift);
145     }
146 }
147
148 static inline void update_qscale(MpegEncContext *s)
149 {
150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
151                 (FF_LAMBDA_SHIFT + 7);
152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
153
154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
155                  FF_LAMBDA_SHIFT;
156 }
157
158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
159 {
160     int i;
161
162     if (matrix) {
163         put_bits(pb, 1, 1);
164         for (i = 0; i < 64; i++) {
165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
166         }
167     } else
168         put_bits(pb, 1, 0);
169 }
170
171 /**
172  * init s->current_picture.qscale_table from s->lambda_table
173  */
174 void ff_init_qscale_tab(MpegEncContext *s)
175 {
176     int8_t * const qscale_table = s->current_picture.f.qscale_table;
177     int i;
178
179     for (i = 0; i < s->mb_num; i++) {
180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
183                                                   s->avctx->qmax);
184     }
185 }
186
187 static void copy_picture_attributes(MpegEncContext *s,
188                                     AVFrame *dst,
189                                     AVFrame *src)
190 {
191     int i;
192
193     dst->pict_type              = src->pict_type;
194     dst->quality                = src->quality;
195     dst->coded_picture_number   = src->coded_picture_number;
196     dst->display_picture_number = src->display_picture_number;
197     //dst->reference              = src->reference;
198     dst->pts                    = src->pts;
199     dst->interlaced_frame       = src->interlaced_frame;
200     dst->top_field_first        = src->top_field_first;
201
202     if (s->avctx->me_threshold) {
203         if (!src->motion_val[0])
204             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
205         if (!src->mb_type)
206             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
207         if (!src->ref_index[0])
208             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
209         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
210             av_log(s->avctx, AV_LOG_ERROR,
211                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
212                    src->motion_subsample_log2, dst->motion_subsample_log2);
213
214         memcpy(dst->mb_type, src->mb_type,
215                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
216
217         for (i = 0; i < 2; i++) {
218             int stride = ((16 * s->mb_width ) >>
219                           src->motion_subsample_log2) + 1;
220             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
221
222             if (src->motion_val[i] &&
223                 src->motion_val[i] != dst->motion_val[i]) {
224                 memcpy(dst->motion_val[i], src->motion_val[i],
225                        2 * stride * height * sizeof(int16_t));
226             }
227             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
228                 memcpy(dst->ref_index[i], src->ref_index[i],
229                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
230             }
231         }
232     }
233 }
234
235 static void update_duplicate_context_after_me(MpegEncContext *dst,
236                                               MpegEncContext *src)
237 {
238 #define COPY(a) dst->a= src->a
239     COPY(pict_type);
240     COPY(current_picture);
241     COPY(f_code);
242     COPY(b_code);
243     COPY(qscale);
244     COPY(lambda);
245     COPY(lambda2);
246     COPY(picture_in_gop_number);
247     COPY(gop_picture_number);
248     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
249     COPY(progressive_frame);    // FIXME don't set in encode_header
250     COPY(partitioned_frame);    // FIXME don't set in encode_header
251 #undef COPY
252 }
253
254 /**
255  * Set the given MpegEncContext to defaults for encoding.
256  * the changed fields will not depend upon the prior state of the MpegEncContext.
257  */
258 static void MPV_encode_defaults(MpegEncContext *s)
259 {
260     int i;
261     ff_MPV_common_defaults(s);
262
263     for (i = -16; i < 16; i++) {
264         default_fcode_tab[i + MAX_MV] = 1;
265     }
266     s->me.mv_penalty = default_mv_penalty;
267     s->fcode_tab     = default_fcode_tab;
268 }
269
270 /* init video encoder */
271 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
272 {
273     MpegEncContext *s = avctx->priv_data;
274     int i;
275     int chroma_h_shift, chroma_v_shift;
276
277     MPV_encode_defaults(s);
278
279     switch (avctx->codec_id) {
280     case AV_CODEC_ID_MPEG2VIDEO:
281         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
282             avctx->pix_fmt != PIX_FMT_YUV422P) {
283             av_log(avctx, AV_LOG_ERROR,
284                    "only YUV420 and YUV422 are supported\n");
285             return -1;
286         }
287         break;
288     case AV_CODEC_ID_LJPEG:
289         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
290             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
291             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
292             avctx->pix_fmt != PIX_FMT_BGR0     &&
293             avctx->pix_fmt != PIX_FMT_BGRA     &&
294             avctx->pix_fmt != PIX_FMT_BGR24    &&
295             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
296               avctx->pix_fmt != PIX_FMT_YUV422P &&
297               avctx->pix_fmt != PIX_FMT_YUV444P) ||
298              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
299             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
300             return -1;
301         }
302         break;
303     case AV_CODEC_ID_MJPEG:
304     case AV_CODEC_ID_AMV:
305         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
306             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
307             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
308               avctx->pix_fmt != PIX_FMT_YUV422P) ||
309              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
310             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
311             return -1;
312         }
313         break;
314     default:
315         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
316             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
317             return -1;
318         }
319     }
320
321     switch (avctx->pix_fmt) {
322     case PIX_FMT_YUVJ422P:
323     case PIX_FMT_YUV422P:
324         s->chroma_format = CHROMA_422;
325         break;
326     case PIX_FMT_YUVJ420P:
327     case PIX_FMT_YUV420P:
328     default:
329         s->chroma_format = CHROMA_420;
330         break;
331     }
332
333     s->bit_rate = avctx->bit_rate;
334     s->width    = avctx->width;
335     s->height   = avctx->height;
336     if (avctx->gop_size > 600 &&
337         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
338         av_log(avctx, AV_LOG_WARNING,
339                "keyframe interval too large!, reducing it from %d to %d\n",
340                avctx->gop_size, 600);
341         avctx->gop_size = 600;
342     }
343     s->gop_size     = avctx->gop_size;
344     s->avctx        = avctx;
345     s->flags        = avctx->flags;
346     s->flags2       = avctx->flags2;
347     s->max_b_frames = avctx->max_b_frames;
348     s->codec_id     = avctx->codec->id;
349 #if FF_API_MPV_GLOBAL_OPTS
350     if (avctx->luma_elim_threshold)
351         s->luma_elim_threshold   = avctx->luma_elim_threshold;
352     if (avctx->chroma_elim_threshold)
353         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
354 #endif
355     s->strict_std_compliance = avctx->strict_std_compliance;
356     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
357     s->mpeg_quant         = avctx->mpeg_quant;
358     s->rtp_mode           = !!avctx->rtp_payload_size;
359     s->intra_dc_precision = avctx->intra_dc_precision;
360     s->user_specified_pts = AV_NOPTS_VALUE;
361
362     if (s->gop_size <= 1) {
363         s->intra_only = 1;
364         s->gop_size   = 12;
365     } else {
366         s->intra_only = 0;
367     }
368
369     s->me_method = avctx->me_method;
370
371     /* Fixed QSCALE */
372     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
373
374 #if FF_API_MPV_GLOBAL_OPTS
375     if (s->flags & CODEC_FLAG_QP_RD)
376         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
377 #endif
378
379     s->adaptive_quant = (s->avctx->lumi_masking ||
380                          s->avctx->dark_masking ||
381                          s->avctx->temporal_cplx_masking ||
382                          s->avctx->spatial_cplx_masking  ||
383                          s->avctx->p_masking      ||
384                          s->avctx->border_masking ||
385                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
386                         !s->fixed_qscale;
387
388     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
389
390     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
391         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
392         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
393             return -1;
394     }
395
396     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
397         av_log(avctx, AV_LOG_INFO,
398                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
399     }
400
401     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
402         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
403         return -1;
404     }
405
406     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
407         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
408         return -1;
409     }
410
411     if (avctx->rc_max_rate &&
412         avctx->rc_max_rate == avctx->bit_rate &&
413         avctx->rc_max_rate != avctx->rc_min_rate) {
414         av_log(avctx, AV_LOG_INFO,
415                "impossible bitrate constraints, this will fail\n");
416     }
417
418     if (avctx->rc_buffer_size &&
419         avctx->bit_rate * (int64_t)avctx->time_base.num >
420             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
421         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
422         return -1;
423     }
424
425     if (!s->fixed_qscale &&
426         avctx->bit_rate * av_q2d(avctx->time_base) >
427             avctx->bit_rate_tolerance) {
428         av_log(avctx, AV_LOG_ERROR,
429                "bitrate tolerance too small for bitrate\n");
430         return -1;
431     }
432
433     if (s->avctx->rc_max_rate &&
434         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
435         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
436          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
437         90000LL * (avctx->rc_buffer_size - 1) >
438             s->avctx->rc_max_rate * 0xFFFFLL) {
439         av_log(avctx, AV_LOG_INFO,
440                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
441                "specified vbv buffer is too large for the given bitrate!\n");
442     }
443
444     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
445         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
446         s->codec_id != AV_CODEC_ID_FLV1) {
447         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
448         return -1;
449     }
450
451     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
452         av_log(avctx, AV_LOG_ERROR,
453                "OBMC is only supported with simple mb decision\n");
454         return -1;
455     }
456
457     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
458         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
459         return -1;
460     }
461
462     if (s->max_b_frames                    &&
463         s->codec_id != AV_CODEC_ID_MPEG4      &&
464         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
465         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
466         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
467         return -1;
468     }
469
470     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
471          s->codec_id == AV_CODEC_ID_H263  ||
472          s->codec_id == AV_CODEC_ID_H263P) &&
473         (avctx->sample_aspect_ratio.num > 255 ||
474          avctx->sample_aspect_ratio.den > 255)) {
475         av_log(avctx, AV_LOG_WARNING,
476                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
477                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
478         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
479                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
480     }
481
482     if ((s->codec_id == AV_CODEC_ID_H263  ||
483          s->codec_id == AV_CODEC_ID_H263P) &&
484         (avctx->width  > 2048 ||
485          avctx->height > 1152 )) {
486         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
487         return -1;
488     }
489     if ((s->codec_id == AV_CODEC_ID_H263  ||
490          s->codec_id == AV_CODEC_ID_H263P) &&
491         ((avctx->width &3) ||
492          (avctx->height&3) )) {
493         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
494         return -1;
495     }
496
497     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
498         (avctx->width  > 4095 ||
499          avctx->height > 4095 )) {
500         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
501         return -1;
502     }
503
504     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
505         (avctx->width  > 16383 ||
506          avctx->height > 16383 )) {
507         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
508         return -1;
509     }
510
511     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
512          s->codec_id == AV_CODEC_ID_WMV2) &&
513          avctx->width & 1) {
514          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
515          return -1;
516     }
517
518     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
519         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
520         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
521         return -1;
522     }
523
524     // FIXME mpeg2 uses that too
525     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
526         av_log(avctx, AV_LOG_ERROR,
527                "mpeg2 style quantization not supported by codec\n");
528         return -1;
529     }
530
531 #if FF_API_MPV_GLOBAL_OPTS
532     if (s->flags & CODEC_FLAG_CBP_RD)
533         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
534 #endif
535
536     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
537         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
538         return -1;
539     }
540
541     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
542         s->avctx->mb_decision != FF_MB_DECISION_RD) {
543         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
544         return -1;
545     }
546
547     if (s->avctx->scenechange_threshold < 1000000000 &&
548         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
549         av_log(avctx, AV_LOG_ERROR,
550                "closed gop with scene change detection are not supported yet, "
551                "set threshold to 1000000000\n");
552         return -1;
553     }
554
555     if (s->flags & CODEC_FLAG_LOW_DELAY) {
556         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
557             av_log(avctx, AV_LOG_ERROR,
558                   "low delay forcing is only available for mpeg2\n");
559             return -1;
560         }
561         if (s->max_b_frames != 0) {
562             av_log(avctx, AV_LOG_ERROR,
563                    "b frames cannot be used with low delay\n");
564             return -1;
565         }
566     }
567
568     if (s->q_scale_type == 1) {
569         if (avctx->qmax > 12) {
570             av_log(avctx, AV_LOG_ERROR,
571                    "non linear quant only supports qmax <= 12 currently\n");
572             return -1;
573         }
574     }
575
576     if (s->avctx->thread_count > 1         &&
577         s->codec_id != AV_CODEC_ID_MPEG4      &&
578         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
579         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
580         s->codec_id != AV_CODEC_ID_MJPEG      &&
581         (s->codec_id != AV_CODEC_ID_H263P)) {
582         av_log(avctx, AV_LOG_ERROR,
583                "multi threaded encoding not supported by codec\n");
584         return -1;
585     }
586
587     if (s->avctx->thread_count < 1) {
588         av_log(avctx, AV_LOG_ERROR,
589                "automatic thread number detection not supported by codec, "
590                "patch welcome\n");
591         return -1;
592     }
593
594     if (s->avctx->thread_count > 1)
595         s->rtp_mode = 1;
596
597     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
598         s->h263_slice_structured = 1;
599
600     if (!avctx->time_base.den || !avctx->time_base.num) {
601         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
602         return -1;
603     }
604
605     i = (INT_MAX / 2 + 128) >> 8;
606     if (avctx->me_threshold >= i) {
607         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
608                i - 1);
609         return -1;
610     }
611     if (avctx->mb_threshold >= i) {
612         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
613                i - 1);
614         return -1;
615     }
616
617     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
618         av_log(avctx, AV_LOG_INFO,
619                "notice: b_frame_strategy only affects the first pass\n");
620         avctx->b_frame_strategy = 0;
621     }
622
623     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
624     if (i > 1) {
625         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
626         avctx->time_base.den /= i;
627         avctx->time_base.num /= i;
628         //return -1;
629     }
630
631     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
632         // (a + x * 3 / 8) / x
633         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
634         s->inter_quant_bias = 0;
635     } else {
636         s->intra_quant_bias = 0;
637         // (a - x / 4) / x
638         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
639     }
640
641     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
642         s->intra_quant_bias = avctx->intra_quant_bias;
643     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
644         s->inter_quant_bias = avctx->inter_quant_bias;
645
646     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
647
648     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
649                                   &chroma_v_shift);
650
651     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
652         s->avctx->time_base.den > (1 << 16) - 1) {
653         av_log(avctx, AV_LOG_ERROR,
654                "timebase %d/%d not supported by MPEG 4 standard, "
655                "the maximum admitted value for the timebase denominator "
656                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
657                (1 << 16) - 1);
658         return -1;
659     }
660     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
661
662 #if FF_API_MPV_GLOBAL_OPTS
663     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
664         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
665     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
666         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
667     if (avctx->quantizer_noise_shaping)
668         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
669 #endif
670
671     switch (avctx->codec->id) {
672     case AV_CODEC_ID_MPEG1VIDEO:
673         s->out_format = FMT_MPEG1;
674         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
675         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
676         break;
677     case AV_CODEC_ID_MPEG2VIDEO:
678         s->out_format = FMT_MPEG1;
679         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
680         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
681         s->rtp_mode   = 1;
682         break;
683     case AV_CODEC_ID_LJPEG:
684     case AV_CODEC_ID_MJPEG:
685     case AV_CODEC_ID_AMV:
686         s->out_format = FMT_MJPEG;
687         s->intra_only = 1; /* force intra only for jpeg */
688         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
689             (avctx->pix_fmt == PIX_FMT_BGR0
690              || s->avctx->pix_fmt == PIX_FMT_BGRA
691              || s->avctx->pix_fmt == PIX_FMT_BGR24)) {
692             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
693             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
694             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
695         } else {
696             s->mjpeg_vsample[0] = 2;
697             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
698             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
699             s->mjpeg_hsample[0] = 2;
700             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
701             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
702         }
703         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
704             ff_mjpeg_encode_init(s) < 0)
705             return -1;
706         avctx->delay = 0;
707         s->low_delay = 1;
708         break;
709     case AV_CODEC_ID_H261:
710         if (!CONFIG_H261_ENCODER)
711             return -1;
712         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
713             av_log(avctx, AV_LOG_ERROR,
714                    "The specified picture size of %dx%d is not valid for the "
715                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
716                     s->width, s->height);
717             return -1;
718         }
719         s->out_format = FMT_H261;
720         avctx->delay  = 0;
721         s->low_delay  = 1;
722         break;
723     case AV_CODEC_ID_H263:
724         if (!CONFIG_H263_ENCODER)
725             return -1;
726         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
727                              s->width, s->height) == 8) {
728             av_log(avctx, AV_LOG_ERROR,
729                    "The specified picture size of %dx%d is not valid for "
730                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
731                    "352x288, 704x576, and 1408x1152. "
732                    "Try H.263+.\n", s->width, s->height);
733             return -1;
734         }
735         s->out_format = FMT_H263;
736         avctx->delay  = 0;
737         s->low_delay  = 1;
738         break;
739     case AV_CODEC_ID_H263P:
740         s->out_format = FMT_H263;
741         s->h263_plus  = 1;
742         /* Fx */
743         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
744         s->modified_quant  = s->h263_aic;
745         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
746         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
747
748         /* /Fx */
749         /* These are just to be sure */
750         avctx->delay = 0;
751         s->low_delay = 1;
752         break;
753     case AV_CODEC_ID_FLV1:
754         s->out_format      = FMT_H263;
755         s->h263_flv        = 2; /* format = 1; 11-bit codes */
756         s->unrestricted_mv = 1;
757         s->rtp_mode  = 0; /* don't allow GOB */
758         avctx->delay = 0;
759         s->low_delay = 1;
760         break;
761     case AV_CODEC_ID_RV10:
762         s->out_format = FMT_H263;
763         avctx->delay  = 0;
764         s->low_delay  = 1;
765         break;
766     case AV_CODEC_ID_RV20:
767         s->out_format      = FMT_H263;
768         avctx->delay       = 0;
769         s->low_delay       = 1;
770         s->modified_quant  = 1;
771         s->h263_aic        = 1;
772         s->h263_plus       = 1;
773         s->loop_filter     = 1;
774         s->unrestricted_mv = 0;
775         break;
776     case AV_CODEC_ID_MPEG4:
777         s->out_format      = FMT_H263;
778         s->h263_pred       = 1;
779         s->unrestricted_mv = 1;
780         s->low_delay       = s->max_b_frames ? 0 : 1;
781         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
782         break;
783     case AV_CODEC_ID_MSMPEG4V2:
784         s->out_format      = FMT_H263;
785         s->h263_pred       = 1;
786         s->unrestricted_mv = 1;
787         s->msmpeg4_version = 2;
788         avctx->delay       = 0;
789         s->low_delay       = 1;
790         break;
791     case AV_CODEC_ID_MSMPEG4V3:
792         s->out_format        = FMT_H263;
793         s->h263_pred         = 1;
794         s->unrestricted_mv   = 1;
795         s->msmpeg4_version   = 3;
796         s->flipflop_rounding = 1;
797         avctx->delay         = 0;
798         s->low_delay         = 1;
799         break;
800     case AV_CODEC_ID_WMV1:
801         s->out_format        = FMT_H263;
802         s->h263_pred         = 1;
803         s->unrestricted_mv   = 1;
804         s->msmpeg4_version   = 4;
805         s->flipflop_rounding = 1;
806         avctx->delay         = 0;
807         s->low_delay         = 1;
808         break;
809     case AV_CODEC_ID_WMV2:
810         s->out_format        = FMT_H263;
811         s->h263_pred         = 1;
812         s->unrestricted_mv   = 1;
813         s->msmpeg4_version   = 5;
814         s->flipflop_rounding = 1;
815         avctx->delay         = 0;
816         s->low_delay         = 1;
817         break;
818     default:
819         return -1;
820     }
821
822     avctx->has_b_frames = !s->low_delay;
823
824     s->encoding = 1;
825
826     s->progressive_frame    =
827     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
828                                                 CODEC_FLAG_INTERLACED_ME) ||
829                                 s->alternate_scan);
830
831     /* init */
832     if (ff_MPV_common_init(s) < 0)
833         return -1;
834
835     if (ARCH_X86)
836         ff_MPV_encode_init_x86(s);
837
838     if (!s->dct_quantize)
839         s->dct_quantize = ff_dct_quantize_c;
840     if (!s->denoise_dct)
841         s->denoise_dct  = denoise_dct_c;
842     s->fast_dct_quantize = s->dct_quantize;
843     if (avctx->trellis)
844         s->dct_quantize  = dct_quantize_trellis_c;
845
846     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
847         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
848
849     s->quant_precision = 5;
850
851     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
852     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
853
854     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
855         ff_h261_encode_init(s);
856     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
857         ff_h263_encode_init(s);
858     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
859         ff_msmpeg4_encode_init(s);
860     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
861         && s->out_format == FMT_MPEG1)
862         ff_mpeg1_encode_init(s);
863
864     /* init q matrix */
865     for (i = 0; i < 64; i++) {
866         int j = s->dsp.idct_permutation[i];
867         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
868             s->mpeg_quant) {
869             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
870             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
871         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
872             s->intra_matrix[j] =
873             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
874         } else {
875             /* mpeg1/2 */
876             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
877             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
878         }
879         if (s->avctx->intra_matrix)
880             s->intra_matrix[j] = s->avctx->intra_matrix[i];
881         if (s->avctx->inter_matrix)
882             s->inter_matrix[j] = s->avctx->inter_matrix[i];
883     }
884
885     /* precompute matrix */
886     /* for mjpeg, we do include qscale in the matrix */
887     if (s->out_format != FMT_MJPEG) {
888         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
889                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
890                           31, 1);
891         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
892                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
893                           31, 0);
894     }
895
896     if (ff_rate_control_init(s) < 0)
897         return -1;
898
899     return 0;
900 }
901
902 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
903 {
904     MpegEncContext *s = avctx->priv_data;
905
906     ff_rate_control_uninit(s);
907
908     ff_MPV_common_end(s);
909     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
910         s->out_format == FMT_MJPEG)
911         ff_mjpeg_encode_close(s);
912
913     av_freep(&avctx->extradata);
914
915     return 0;
916 }
917
918 static int get_sae(uint8_t *src, int ref, int stride)
919 {
920     int x,y;
921     int acc = 0;
922
923     for (y = 0; y < 16; y++) {
924         for (x = 0; x < 16; x++) {
925             acc += FFABS(src[x + y * stride] - ref);
926         }
927     }
928
929     return acc;
930 }
931
932 static int get_intra_count(MpegEncContext *s, uint8_t *src,
933                            uint8_t *ref, int stride)
934 {
935     int x, y, w, h;
936     int acc = 0;
937
938     w = s->width  & ~15;
939     h = s->height & ~15;
940
941     for (y = 0; y < h; y += 16) {
942         for (x = 0; x < w; x += 16) {
943             int offset = x + y * stride;
944             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
945                                      16);
946             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
947             int sae  = get_sae(src + offset, mean, stride);
948
949             acc += sae + 500 < sad;
950         }
951     }
952     return acc;
953 }
954
955
956 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
957 {
958     AVFrame *pic = NULL;
959     int64_t pts;
960     int i;
961     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
962                                                  (s->low_delay ? 0 : 1);
963     int direct = 1;
964
965     if (pic_arg) {
966         pts = pic_arg->pts;
967         pic_arg->display_picture_number = s->input_picture_number++;
968
969         if (pts != AV_NOPTS_VALUE) {
970             if (s->user_specified_pts != AV_NOPTS_VALUE) {
971                 int64_t time = pts;
972                 int64_t last = s->user_specified_pts;
973
974                 if (time <= last) {
975                     av_log(s->avctx, AV_LOG_ERROR,
976                            "Error, Invalid timestamp=%"PRId64", "
977                            "last=%"PRId64"\n", pts, s->user_specified_pts);
978                     return -1;
979                 }
980
981                 if (!s->low_delay && pic_arg->display_picture_number == 1)
982                     s->dts_delta = time - last;
983             }
984             s->user_specified_pts = pts;
985         } else {
986             if (s->user_specified_pts != AV_NOPTS_VALUE) {
987                 s->user_specified_pts =
988                 pts = s->user_specified_pts + 1;
989                 av_log(s->avctx, AV_LOG_INFO,
990                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
991                        pts);
992             } else {
993                 pts = pic_arg->display_picture_number;
994             }
995         }
996     }
997
998   if (pic_arg) {
999     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
1000         direct = 0;
1001     if (pic_arg->linesize[0] != s->linesize)
1002         direct = 0;
1003     if (pic_arg->linesize[1] != s->uvlinesize)
1004         direct = 0;
1005     if (pic_arg->linesize[2] != s->uvlinesize)
1006         direct = 0;
1007
1008     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
1009     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
1010
1011     if (direct) {
1012         i = ff_find_unused_picture(s, 1);
1013         if (i < 0)
1014             return i;
1015
1016         pic = &s->picture[i].f;
1017         pic->reference = 3;
1018
1019         for (i = 0; i < 4; i++) {
1020             pic->data[i]     = pic_arg->data[i];
1021             pic->linesize[i] = pic_arg->linesize[i];
1022         }
1023         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1024             return -1;
1025         }
1026     } else {
1027         i = ff_find_unused_picture(s, 0);
1028         if (i < 0)
1029             return i;
1030
1031         pic = &s->picture[i].f;
1032         pic->reference = 3;
1033
1034         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1035             return -1;
1036         }
1037
1038         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1039             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1040             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1041             // empty
1042         } else {
1043             int h_chroma_shift, v_chroma_shift;
1044             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1045                                           &v_chroma_shift);
1046
1047             for (i = 0; i < 3; i++) {
1048                 int src_stride = pic_arg->linesize[i];
1049                 int dst_stride = i ? s->uvlinesize : s->linesize;
1050                 int h_shift = i ? h_chroma_shift : 0;
1051                 int v_shift = i ? v_chroma_shift : 0;
1052                 int w = s->width  >> h_shift;
1053                 int h = s->height >> v_shift;
1054                 uint8_t *src = pic_arg->data[i];
1055                 uint8_t *dst = pic->data[i];
1056
1057                 if(s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1058                     h= ((s->height+15)/16*16)>>v_shift;
1059                 }
1060
1061                 if (!s->avctx->rc_buffer_size)
1062                     dst += INPLACE_OFFSET;
1063
1064                 if (src_stride == dst_stride)
1065                     memcpy(dst, src, src_stride * h);
1066                 else {
1067                     while (h--) {
1068                         memcpy(dst, src, w);
1069                         dst += dst_stride;
1070                         src += src_stride;
1071                     }
1072                 }
1073             }
1074         }
1075     }
1076     copy_picture_attributes(s, pic, pic_arg);
1077     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1078   }
1079
1080     /* shift buffer entries */
1081     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1082         s->input_picture[i - 1] = s->input_picture[i];
1083
1084     s->input_picture[encoding_delay] = (Picture*) pic;
1085
1086     return 0;
1087 }
1088
1089 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1090 {
1091     int x, y, plane;
1092     int score = 0;
1093     int64_t score64 = 0;
1094
1095     for (plane = 0; plane < 3; plane++) {
1096         const int stride = p->f.linesize[plane];
1097         const int bw = plane ? 1 : 2;
1098         for (y = 0; y < s->mb_height * bw; y++) {
1099             for (x = 0; x < s->mb_width * bw; x++) {
1100                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1101                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1102                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1103                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1104
1105                 switch (s->avctx->frame_skip_exp) {
1106                 case 0: score    =  FFMAX(score, v);          break;
1107                 case 1: score   += FFABS(v);                  break;
1108                 case 2: score   += v * v;                     break;
1109                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1110                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1111                 }
1112             }
1113         }
1114     }
1115
1116     if (score)
1117         score64 = score;
1118
1119     if (score64 < s->avctx->frame_skip_threshold)
1120         return 1;
1121     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1122         return 1;
1123     return 0;
1124 }
1125
1126 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1127 {
1128     AVPacket pkt = { 0 };
1129     int ret, got_output;
1130
1131     av_init_packet(&pkt);
1132     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1133     if (ret < 0)
1134         return ret;
1135
1136     ret = pkt.size;
1137     av_free_packet(&pkt);
1138     return ret;
1139 }
1140
1141 static int estimate_best_b_count(MpegEncContext *s)
1142 {
1143     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1144     AVCodecContext *c = avcodec_alloc_context3(NULL);
1145     AVFrame input[FF_MAX_B_FRAMES + 2];
1146     const int scale = s->avctx->brd_scale;
1147     int i, j, out_size, p_lambda, b_lambda, lambda2;
1148     int64_t best_rd  = INT64_MAX;
1149     int best_b_count = -1;
1150
1151     av_assert0(scale >= 0 && scale <= 3);
1152
1153     //emms_c();
1154     //s->next_picture_ptr->quality;
1155     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1156     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1157     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1158     if (!b_lambda) // FIXME we should do this somewhere else
1159         b_lambda = p_lambda;
1160     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1161                FF_LAMBDA_SHIFT;
1162
1163     c->width        = s->width  >> scale;
1164     c->height       = s->height >> scale;
1165     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1166                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1167     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1168     c->mb_decision  = s->avctx->mb_decision;
1169     c->me_cmp       = s->avctx->me_cmp;
1170     c->mb_cmp       = s->avctx->mb_cmp;
1171     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1172     c->pix_fmt      = PIX_FMT_YUV420P;
1173     c->time_base    = s->avctx->time_base;
1174     c->max_b_frames = s->max_b_frames;
1175
1176     if (avcodec_open2(c, codec, NULL) < 0)
1177         return -1;
1178
1179     for (i = 0; i < s->max_b_frames + 2; i++) {
1180         int ysize = c->width * c->height;
1181         int csize = (c->width / 2) * (c->height / 2);
1182         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1183                                                 s->next_picture_ptr;
1184
1185         avcodec_get_frame_defaults(&input[i]);
1186         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1187         input[i].data[1]     = input[i].data[0] + ysize;
1188         input[i].data[2]     = input[i].data[1] + csize;
1189         input[i].linesize[0] = c->width;
1190         input[i].linesize[1] =
1191         input[i].linesize[2] = c->width / 2;
1192
1193         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1194             pre_input = *pre_input_ptr;
1195
1196             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1197                 pre_input.f.data[0] += INPLACE_OFFSET;
1198                 pre_input.f.data[1] += INPLACE_OFFSET;
1199                 pre_input.f.data[2] += INPLACE_OFFSET;
1200             }
1201
1202             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1203                                  pre_input.f.data[0], pre_input.f.linesize[0],
1204                                  c->width,      c->height);
1205             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1206                                  pre_input.f.data[1], pre_input.f.linesize[1],
1207                                  c->width >> 1, c->height >> 1);
1208             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1209                                  pre_input.f.data[2], pre_input.f.linesize[2],
1210                                  c->width >> 1, c->height >> 1);
1211         }
1212     }
1213
1214     for (j = 0; j < s->max_b_frames + 1; j++) {
1215         int64_t rd = 0;
1216
1217         if (!s->input_picture[j])
1218             break;
1219
1220         c->error[0] = c->error[1] = c->error[2] = 0;
1221
1222         input[0].pict_type = AV_PICTURE_TYPE_I;
1223         input[0].quality   = 1 * FF_QP2LAMBDA;
1224
1225         out_size = encode_frame(c, &input[0]);
1226
1227         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1228
1229         for (i = 0; i < s->max_b_frames + 1; i++) {
1230             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1231
1232             input[i + 1].pict_type = is_p ?
1233                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1234             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1235
1236             out_size = encode_frame(c, &input[i + 1]);
1237
1238             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1239         }
1240
1241         /* get the delayed frames */
1242         while (out_size) {
1243             out_size = encode_frame(c, NULL);
1244             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1245         }
1246
1247         rd += c->error[0] + c->error[1] + c->error[2];
1248
1249         if (rd < best_rd) {
1250             best_rd = rd;
1251             best_b_count = j;
1252         }
1253     }
1254
1255     avcodec_close(c);
1256     av_freep(&c);
1257
1258     for (i = 0; i < s->max_b_frames + 2; i++) {
1259         av_freep(&input[i].data[0]);
1260     }
1261
1262     return best_b_count;
1263 }
1264
1265 static int select_input_picture(MpegEncContext *s)
1266 {
1267     int i;
1268
1269     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1270         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1271     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1272
1273     /* set next picture type & ordering */
1274     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1275         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1276             s->next_picture_ptr == NULL || s->intra_only) {
1277             s->reordered_input_picture[0] = s->input_picture[0];
1278             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1279             s->reordered_input_picture[0]->f.coded_picture_number =
1280                 s->coded_picture_number++;
1281         } else {
1282             int b_frames;
1283
1284             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1285                 if (s->picture_in_gop_number < s->gop_size &&
1286                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1287                     // FIXME check that te gop check above is +-1 correct
1288                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1289                     //       s->input_picture[0]->f.data[0],
1290                     //       s->input_picture[0]->pts);
1291
1292                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1293                         for (i = 0; i < 4; i++)
1294                             s->input_picture[0]->f.data[i] = NULL;
1295                         s->input_picture[0]->f.type = 0;
1296                     } else {
1297                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1298                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1299
1300                         s->avctx->release_buffer(s->avctx,
1301                                                  &s->input_picture[0]->f);
1302                     }
1303
1304                     emms_c();
1305                     ff_vbv_update(s, 0);
1306
1307                     goto no_output_pic;
1308                 }
1309             }
1310
1311             if (s->flags & CODEC_FLAG_PASS2) {
1312                 for (i = 0; i < s->max_b_frames + 1; i++) {
1313                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1314
1315                     if (pict_num >= s->rc_context.num_entries)
1316                         break;
1317                     if (!s->input_picture[i]) {
1318                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1319                         break;
1320                     }
1321
1322                     s->input_picture[i]->f.pict_type =
1323                         s->rc_context.entry[pict_num].new_pict_type;
1324                 }
1325             }
1326
1327             if (s->avctx->b_frame_strategy == 0) {
1328                 b_frames = s->max_b_frames;
1329                 while (b_frames && !s->input_picture[b_frames])
1330                     b_frames--;
1331             } else if (s->avctx->b_frame_strategy == 1) {
1332                 for (i = 1; i < s->max_b_frames + 1; i++) {
1333                     if (s->input_picture[i] &&
1334                         s->input_picture[i]->b_frame_score == 0) {
1335                         s->input_picture[i]->b_frame_score =
1336                             get_intra_count(s,
1337                                             s->input_picture[i    ]->f.data[0],
1338                                             s->input_picture[i - 1]->f.data[0],
1339                                             s->linesize) + 1;
1340                     }
1341                 }
1342                 for (i = 0; i < s->max_b_frames + 1; i++) {
1343                     if (s->input_picture[i] == NULL ||
1344                         s->input_picture[i]->b_frame_score - 1 >
1345                             s->mb_num / s->avctx->b_sensitivity)
1346                         break;
1347                 }
1348
1349                 b_frames = FFMAX(0, i - 1);
1350
1351                 /* reset scores */
1352                 for (i = 0; i < b_frames + 1; i++) {
1353                     s->input_picture[i]->b_frame_score = 0;
1354                 }
1355             } else if (s->avctx->b_frame_strategy == 2) {
1356                 b_frames = estimate_best_b_count(s);
1357             } else {
1358                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1359                 b_frames = 0;
1360             }
1361
1362             emms_c();
1363             //static int b_count = 0;
1364             //b_count += b_frames;
1365             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1366
1367             for (i = b_frames - 1; i >= 0; i--) {
1368                 int type = s->input_picture[i]->f.pict_type;
1369                 if (type && type != AV_PICTURE_TYPE_B)
1370                     b_frames = i;
1371             }
1372             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1373                 b_frames == s->max_b_frames) {
1374                 av_log(s->avctx, AV_LOG_ERROR,
1375                        "warning, too many b frames in a row\n");
1376             }
1377
1378             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1379                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1380                     s->gop_size > s->picture_in_gop_number) {
1381                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1382                 } else {
1383                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1384                         b_frames = 0;
1385                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1386                 }
1387             }
1388
1389             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1390                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1391                 b_frames--;
1392
1393             s->reordered_input_picture[0] = s->input_picture[b_frames];
1394             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1395                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1396             s->reordered_input_picture[0]->f.coded_picture_number =
1397                 s->coded_picture_number++;
1398             for (i = 0; i < b_frames; i++) {
1399                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1400                 s->reordered_input_picture[i + 1]->f.pict_type =
1401                     AV_PICTURE_TYPE_B;
1402                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1403                     s->coded_picture_number++;
1404             }
1405         }
1406     }
1407 no_output_pic:
1408     if (s->reordered_input_picture[0]) {
1409         s->reordered_input_picture[0]->f.reference =
1410            s->reordered_input_picture[0]->f.pict_type !=
1411                AV_PICTURE_TYPE_B ? 3 : 0;
1412
1413         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1414
1415         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1416             s->avctx->rc_buffer_size) {
1417             // input is a shared pix, so we can't modifiy it -> alloc a new
1418             // one & ensure that the shared one is reuseable
1419
1420             Picture *pic;
1421             int i = ff_find_unused_picture(s, 0);
1422             if (i < 0)
1423                 return i;
1424             pic = &s->picture[i];
1425
1426             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1427             if (ff_alloc_picture(s, pic, 0) < 0) {
1428                 return -1;
1429             }
1430
1431             /* mark us unused / free shared pic */
1432             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1433                 s->avctx->release_buffer(s->avctx,
1434                                          &s->reordered_input_picture[0]->f);
1435             for (i = 0; i < 4; i++)
1436                 s->reordered_input_picture[0]->f.data[i] = NULL;
1437             s->reordered_input_picture[0]->f.type = 0;
1438
1439             copy_picture_attributes(s, &pic->f,
1440                                     &s->reordered_input_picture[0]->f);
1441
1442             s->current_picture_ptr = pic;
1443         } else {
1444             // input is not a shared pix -> reuse buffer for current_pix
1445
1446             assert(s->reordered_input_picture[0]->f.type ==
1447                        FF_BUFFER_TYPE_USER ||
1448                    s->reordered_input_picture[0]->f.type ==
1449                        FF_BUFFER_TYPE_INTERNAL);
1450
1451             s->current_picture_ptr = s->reordered_input_picture[0];
1452             for (i = 0; i < 4; i++) {
1453                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1454             }
1455         }
1456         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1457
1458         s->picture_number = s->new_picture.f.display_picture_number;
1459         //printf("dpn:%d\n", s->picture_number);
1460     } else {
1461         memset(&s->new_picture, 0, sizeof(Picture));
1462     }
1463     return 0;
1464 }
1465
1466 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1467                           AVFrame *pic_arg, int *got_packet)
1468 {
1469     MpegEncContext *s = avctx->priv_data;
1470     int i, stuffing_count, ret;
1471     int context_count = s->slice_context_count;
1472
1473     s->picture_in_gop_number++;
1474
1475     if (load_input_picture(s, pic_arg) < 0)
1476         return -1;
1477
1478     if (select_input_picture(s) < 0) {
1479         return -1;
1480     }
1481
1482     /* output? */
1483     if (s->new_picture.f.data[0]) {
1484         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1485             return ret;
1486         if (s->mb_info) {
1487             s->mb_info_ptr = av_packet_new_side_data(pkt,
1488                                  AV_PKT_DATA_H263_MB_INFO,
1489                                  s->mb_width*s->mb_height*12);
1490             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1491         }
1492
1493         for (i = 0; i < context_count; i++) {
1494             int start_y = s->thread_context[i]->start_mb_y;
1495             int   end_y = s->thread_context[i]->  end_mb_y;
1496             int h       = s->mb_height;
1497             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1498             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1499
1500             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1501         }
1502
1503         s->pict_type = s->new_picture.f.pict_type;
1504         //emms_c();
1505         //printf("qs:%f %f %d\n", s->new_picture.quality,
1506         //       s->current_picture.quality, s->qscale);
1507         ff_MPV_frame_start(s, avctx);
1508 vbv_retry:
1509         if (encode_picture(s, s->picture_number) < 0)
1510             return -1;
1511
1512         avctx->header_bits = s->header_bits;
1513         avctx->mv_bits     = s->mv_bits;
1514         avctx->misc_bits   = s->misc_bits;
1515         avctx->i_tex_bits  = s->i_tex_bits;
1516         avctx->p_tex_bits  = s->p_tex_bits;
1517         avctx->i_count     = s->i_count;
1518         // FIXME f/b_count in avctx
1519         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1520         avctx->skip_count  = s->skip_count;
1521
1522         ff_MPV_frame_end(s);
1523
1524         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1525             ff_mjpeg_encode_picture_trailer(s);
1526
1527         if (avctx->rc_buffer_size) {
1528             RateControlContext *rcc = &s->rc_context;
1529             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1530
1531             if (put_bits_count(&s->pb) > max_size &&
1532                 s->lambda < s->avctx->lmax) {
1533                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1534                                        (s->qscale + 1) / s->qscale);
1535                 if (s->adaptive_quant) {
1536                     int i;
1537                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1538                         s->lambda_table[i] =
1539                             FFMAX(s->lambda_table[i] + 1,
1540                                   s->lambda_table[i] * (s->qscale + 1) /
1541                                   s->qscale);
1542                 }
1543                 s->mb_skipped = 0;        // done in MPV_frame_start()
1544                 // done in encode_picture() so we must undo it
1545                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1546                     if (s->flipflop_rounding          ||
1547                         s->codec_id == AV_CODEC_ID_H263P ||
1548                         s->codec_id == AV_CODEC_ID_MPEG4)
1549                         s->no_rounding ^= 1;
1550                 }
1551                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1552                     s->time_base       = s->last_time_base;
1553                     s->last_non_b_time = s->time - s->pp_time;
1554                 }
1555                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1556                 for (i = 0; i < context_count; i++) {
1557                     PutBitContext *pb = &s->thread_context[i]->pb;
1558                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1559                 }
1560                 goto vbv_retry;
1561             }
1562
1563             assert(s->avctx->rc_max_rate);
1564         }
1565
1566         if (s->flags & CODEC_FLAG_PASS1)
1567             ff_write_pass1_stats(s);
1568
1569         for (i = 0; i < 4; i++) {
1570             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1571             avctx->error[i] += s->current_picture_ptr->f.error[i];
1572         }
1573
1574         if (s->flags & CODEC_FLAG_PASS1)
1575             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1576                    avctx->i_tex_bits + avctx->p_tex_bits ==
1577                        put_bits_count(&s->pb));
1578         flush_put_bits(&s->pb);
1579         s->frame_bits  = put_bits_count(&s->pb);
1580
1581         stuffing_count = ff_vbv_update(s, s->frame_bits);
1582         s->stuffing_bits = 8*stuffing_count;
1583         if (stuffing_count) {
1584             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1585                     stuffing_count + 50) {
1586                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1587                 return -1;
1588             }
1589
1590             switch (s->codec_id) {
1591             case AV_CODEC_ID_MPEG1VIDEO:
1592             case AV_CODEC_ID_MPEG2VIDEO:
1593                 while (stuffing_count--) {
1594                     put_bits(&s->pb, 8, 0);
1595                 }
1596             break;
1597             case AV_CODEC_ID_MPEG4:
1598                 put_bits(&s->pb, 16, 0);
1599                 put_bits(&s->pb, 16, 0x1C3);
1600                 stuffing_count -= 4;
1601                 while (stuffing_count--) {
1602                     put_bits(&s->pb, 8, 0xFF);
1603                 }
1604             break;
1605             default:
1606                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1607             }
1608             flush_put_bits(&s->pb);
1609             s->frame_bits  = put_bits_count(&s->pb);
1610         }
1611
1612         /* update mpeg1/2 vbv_delay for CBR */
1613         if (s->avctx->rc_max_rate                          &&
1614             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1615             s->out_format == FMT_MPEG1                     &&
1616             90000LL * (avctx->rc_buffer_size - 1) <=
1617                 s->avctx->rc_max_rate * 0xFFFFLL) {
1618             int vbv_delay, min_delay;
1619             double inbits  = s->avctx->rc_max_rate *
1620                              av_q2d(s->avctx->time_base);
1621             int    minbits = s->frame_bits - 8 *
1622                              (s->vbv_delay_ptr - s->pb.buf - 1);
1623             double bits    = s->rc_context.buffer_index + minbits - inbits;
1624
1625             if (bits < 0)
1626                 av_log(s->avctx, AV_LOG_ERROR,
1627                        "Internal error, negative bits\n");
1628
1629             assert(s->repeat_first_field == 0);
1630
1631             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1632             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1633                         s->avctx->rc_max_rate;
1634
1635             vbv_delay = FFMAX(vbv_delay, min_delay);
1636
1637             av_assert0(vbv_delay < 0xFFFF);
1638
1639             s->vbv_delay_ptr[0] &= 0xF8;
1640             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1641             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1642             s->vbv_delay_ptr[2] &= 0x07;
1643             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1644             avctx->vbv_delay     = vbv_delay * 300;
1645         }
1646         s->total_bits     += s->frame_bits;
1647         avctx->frame_bits  = s->frame_bits;
1648
1649         pkt->pts = s->current_picture.f.pts;
1650         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1651             if (!s->current_picture.f.coded_picture_number)
1652                 pkt->dts = pkt->pts - s->dts_delta;
1653             else
1654                 pkt->dts = s->reordered_pts;
1655             s->reordered_pts = pkt->pts;
1656         } else
1657             pkt->dts = pkt->pts;
1658         if (s->current_picture.f.key_frame)
1659             pkt->flags |= AV_PKT_FLAG_KEY;
1660         if (s->mb_info)
1661             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1662     } else {
1663         s->frame_bits = 0;
1664     }
1665     assert((s->frame_bits & 7) == 0);
1666
1667     pkt->size = s->frame_bits / 8;
1668     *got_packet = !!pkt->size;
1669     return 0;
1670 }
1671
1672 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1673                                                 int n, int threshold)
1674 {
1675     static const char tab[64] = {
1676         3, 2, 2, 1, 1, 1, 1, 1,
1677         1, 1, 1, 1, 1, 1, 1, 1,
1678         1, 1, 1, 1, 1, 1, 1, 1,
1679         0, 0, 0, 0, 0, 0, 0, 0,
1680         0, 0, 0, 0, 0, 0, 0, 0,
1681         0, 0, 0, 0, 0, 0, 0, 0,
1682         0, 0, 0, 0, 0, 0, 0, 0,
1683         0, 0, 0, 0, 0, 0, 0, 0
1684     };
1685     int score = 0;
1686     int run = 0;
1687     int i;
1688     DCTELEM *block = s->block[n];
1689     const int last_index = s->block_last_index[n];
1690     int skip_dc;
1691
1692     if (threshold < 0) {
1693         skip_dc = 0;
1694         threshold = -threshold;
1695     } else
1696         skip_dc = 1;
1697
1698     /* Are all we could set to zero already zero? */
1699     if (last_index <= skip_dc - 1)
1700         return;
1701
1702     for (i = 0; i <= last_index; i++) {
1703         const int j = s->intra_scantable.permutated[i];
1704         const int level = FFABS(block[j]);
1705         if (level == 1) {
1706             if (skip_dc && i == 0)
1707                 continue;
1708             score += tab[run];
1709             run = 0;
1710         } else if (level > 1) {
1711             return;
1712         } else {
1713             run++;
1714         }
1715     }
1716     if (score >= threshold)
1717         return;
1718     for (i = skip_dc; i <= last_index; i++) {
1719         const int j = s->intra_scantable.permutated[i];
1720         block[j] = 0;
1721     }
1722     if (block[0])
1723         s->block_last_index[n] = 0;
1724     else
1725         s->block_last_index[n] = -1;
1726 }
1727
1728 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1729                                int last_index)
1730 {
1731     int i;
1732     const int maxlevel = s->max_qcoeff;
1733     const int minlevel = s->min_qcoeff;
1734     int overflow = 0;
1735
1736     if (s->mb_intra) {
1737         i = 1; // skip clipping of intra dc
1738     } else
1739         i = 0;
1740
1741     for (; i <= last_index; i++) {
1742         const int j = s->intra_scantable.permutated[i];
1743         int level = block[j];
1744
1745         if (level > maxlevel) {
1746             level = maxlevel;
1747             overflow++;
1748         } else if (level < minlevel) {
1749             level = minlevel;
1750             overflow++;
1751         }
1752
1753         block[j] = level;
1754     }
1755
1756     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1757         av_log(s->avctx, AV_LOG_INFO,
1758                "warning, clipping %d dct coefficients to %d..%d\n",
1759                overflow, minlevel, maxlevel);
1760 }
1761
1762 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1763 {
1764     int x, y;
1765     // FIXME optimize
1766     for (y = 0; y < 8; y++) {
1767         for (x = 0; x < 8; x++) {
1768             int x2, y2;
1769             int sum = 0;
1770             int sqr = 0;
1771             int count = 0;
1772
1773             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1774                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1775                     int v = ptr[x2 + y2 * stride];
1776                     sum += v;
1777                     sqr += v * v;
1778                     count++;
1779                 }
1780             }
1781             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1782         }
1783     }
1784 }
1785
1786 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1787                                                 int motion_x, int motion_y,
1788                                                 int mb_block_height,
1789                                                 int mb_block_count)
1790 {
1791     int16_t weight[8][64];
1792     DCTELEM orig[8][64];
1793     const int mb_x = s->mb_x;
1794     const int mb_y = s->mb_y;
1795     int i;
1796     int skip_dct[8];
1797     int dct_offset = s->linesize * 8; // default for progressive frames
1798     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1799     int wrap_y, wrap_c;
1800
1801     for (i = 0; i < mb_block_count; i++)
1802         skip_dct[i] = s->skipdct;
1803
1804     if (s->adaptive_quant) {
1805         const int last_qp = s->qscale;
1806         const int mb_xy = mb_x + mb_y * s->mb_stride;
1807
1808         s->lambda = s->lambda_table[mb_xy];
1809         update_qscale(s);
1810
1811         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1812             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1813             s->dquant = s->qscale - last_qp;
1814
1815             if (s->out_format == FMT_H263) {
1816                 s->dquant = av_clip(s->dquant, -2, 2);
1817
1818                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1819                     if (!s->mb_intra) {
1820                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1821                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1822                                 s->dquant = 0;
1823                         }
1824                         if (s->mv_type == MV_TYPE_8X8)
1825                             s->dquant = 0;
1826                     }
1827                 }
1828             }
1829         }
1830         ff_set_qscale(s, last_qp + s->dquant);
1831     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1832         ff_set_qscale(s, s->qscale + s->dquant);
1833
1834     wrap_y = s->linesize;
1835     wrap_c = s->uvlinesize;
1836     ptr_y  = s->new_picture.f.data[0] +
1837              (mb_y * 16 * wrap_y)              + mb_x * 16;
1838     ptr_cb = s->new_picture.f.data[1] +
1839              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1840     ptr_cr = s->new_picture.f.data[2] +
1841              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1842
1843     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1844         uint8_t *ebuf = s->edge_emu_buffer + 32;
1845         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1846                                 mb_y * 16, s->width, s->height);
1847         ptr_y = ebuf;
1848         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1849                                 mb_block_height, mb_x * 8, mb_y * 8,
1850                                 (s->width+1) >> 1, (s->height+1) >> 1);
1851         ptr_cb = ebuf + 18 * wrap_y;
1852         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1853                                 mb_block_height, mb_x * 8, mb_y * 8,
1854                                 (s->width+1) >> 1, (s->height+1) >> 1);
1855         ptr_cr = ebuf + 18 * wrap_y + 8;
1856     }
1857
1858     if (s->mb_intra) {
1859         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1860             int progressive_score, interlaced_score;
1861
1862             s->interlaced_dct = 0;
1863             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1864                                                     NULL, wrap_y, 8) +
1865                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1866                                                     NULL, wrap_y, 8) - 400;
1867
1868             if (progressive_score > 0) {
1869                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1870                                                        NULL, wrap_y * 2, 8) +
1871                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1872                                                        NULL, wrap_y * 2, 8);
1873                 if (progressive_score > interlaced_score) {
1874                     s->interlaced_dct = 1;
1875
1876                     dct_offset = wrap_y;
1877                     wrap_y <<= 1;
1878                     if (s->chroma_format == CHROMA_422)
1879                         wrap_c <<= 1;
1880                 }
1881             }
1882         }
1883
1884         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1885         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1886         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1887         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1888
1889         if (s->flags & CODEC_FLAG_GRAY) {
1890             skip_dct[4] = 1;
1891             skip_dct[5] = 1;
1892         } else {
1893             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1894             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1895             if (!s->chroma_y_shift) { /* 422 */
1896                 s->dsp.get_pixels(s->block[6],
1897                                   ptr_cb + (dct_offset >> 1), wrap_c);
1898                 s->dsp.get_pixels(s->block[7],
1899                                   ptr_cr + (dct_offset >> 1), wrap_c);
1900             }
1901         }
1902     } else {
1903         op_pixels_func (*op_pix)[4];
1904         qpel_mc_func (*op_qpix)[16];
1905         uint8_t *dest_y, *dest_cb, *dest_cr;
1906
1907         dest_y  = s->dest[0];
1908         dest_cb = s->dest[1];
1909         dest_cr = s->dest[2];
1910
1911         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1912             op_pix  = s->dsp.put_pixels_tab;
1913             op_qpix = s->dsp.put_qpel_pixels_tab;
1914         } else {
1915             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1916             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1917         }
1918
1919         if (s->mv_dir & MV_DIR_FORWARD) {
1920             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1921                           s->last_picture.f.data,
1922                           op_pix, op_qpix);
1923             op_pix  = s->dsp.avg_pixels_tab;
1924             op_qpix = s->dsp.avg_qpel_pixels_tab;
1925         }
1926         if (s->mv_dir & MV_DIR_BACKWARD) {
1927             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1928                           s->next_picture.f.data,
1929                           op_pix, op_qpix);
1930         }
1931
1932         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1933             int progressive_score, interlaced_score;
1934
1935             s->interlaced_dct = 0;
1936             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1937                                                     ptr_y,              wrap_y,
1938                                                     8) +
1939                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1940                                                     ptr_y + wrap_y * 8, wrap_y,
1941                                                     8) - 400;
1942
1943             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1944                 progressive_score -= 400;
1945
1946             if (progressive_score > 0) {
1947                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1948                                                        ptr_y,
1949                                                        wrap_y * 2, 8) +
1950                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1951                                                        ptr_y + wrap_y,
1952                                                        wrap_y * 2, 8);
1953
1954                 if (progressive_score > interlaced_score) {
1955                     s->interlaced_dct = 1;
1956
1957                     dct_offset = wrap_y;
1958                     wrap_y <<= 1;
1959                     if (s->chroma_format == CHROMA_422)
1960                         wrap_c <<= 1;
1961                 }
1962             }
1963         }
1964
1965         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1966         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1967         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1968                            dest_y + dct_offset, wrap_y);
1969         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1970                            dest_y + dct_offset + 8, wrap_y);
1971
1972         if (s->flags & CODEC_FLAG_GRAY) {
1973             skip_dct[4] = 1;
1974             skip_dct[5] = 1;
1975         } else {
1976             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1977             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1978             if (!s->chroma_y_shift) { /* 422 */
1979                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1980                                    dest_cb + (dct_offset >> 1), wrap_c);
1981                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1982                                    dest_cr + (dct_offset >> 1), wrap_c);
1983             }
1984         }
1985         /* pre quantization */
1986         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1987                 2 * s->qscale * s->qscale) {
1988             // FIXME optimize
1989             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1990                               wrap_y, 8) < 20 * s->qscale)
1991                 skip_dct[0] = 1;
1992             if (s->dsp.sad[1](NULL, ptr_y + 8,
1993                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1994                 skip_dct[1] = 1;
1995             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1996                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1997                 skip_dct[2] = 1;
1998             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1999                               dest_y + dct_offset + 8,
2000                               wrap_y, 8) < 20 * s->qscale)
2001                 skip_dct[3] = 1;
2002             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2003                               wrap_c, 8) < 20 * s->qscale)
2004                 skip_dct[4] = 1;
2005             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2006                               wrap_c, 8) < 20 * s->qscale)
2007                 skip_dct[5] = 1;
2008             if (!s->chroma_y_shift) { /* 422 */
2009                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2010                                   dest_cb + (dct_offset >> 1),
2011                                   wrap_c, 8) < 20 * s->qscale)
2012                     skip_dct[6] = 1;
2013                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2014                                   dest_cr + (dct_offset >> 1),
2015                                   wrap_c, 8) < 20 * s->qscale)
2016                     skip_dct[7] = 1;
2017             }
2018         }
2019     }
2020
2021     if (s->quantizer_noise_shaping) {
2022         if (!skip_dct[0])
2023             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2024         if (!skip_dct[1])
2025             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2026         if (!skip_dct[2])
2027             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2028         if (!skip_dct[3])
2029             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2030         if (!skip_dct[4])
2031             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2032         if (!skip_dct[5])
2033             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2034         if (!s->chroma_y_shift) { /* 422 */
2035             if (!skip_dct[6])
2036                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2037                                   wrap_c);
2038             if (!skip_dct[7])
2039                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2040                                   wrap_c);
2041         }
2042         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
2043     }
2044
2045     /* DCT & quantize */
2046     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2047     {
2048         for (i = 0; i < mb_block_count; i++) {
2049             if (!skip_dct[i]) {
2050                 int overflow;
2051                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2052                 // FIXME we could decide to change to quantizer instead of
2053                 // clipping
2054                 // JS: I don't think that would be a good idea it could lower
2055                 //     quality instead of improve it. Just INTRADC clipping
2056                 //     deserves changes in quantizer
2057                 if (overflow)
2058                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2059             } else
2060                 s->block_last_index[i] = -1;
2061         }
2062         if (s->quantizer_noise_shaping) {
2063             for (i = 0; i < mb_block_count; i++) {
2064                 if (!skip_dct[i]) {
2065                     s->block_last_index[i] =
2066                         dct_quantize_refine(s, s->block[i], weight[i],
2067                                             orig[i], i, s->qscale);
2068                 }
2069             }
2070         }
2071
2072         if (s->luma_elim_threshold && !s->mb_intra)
2073             for (i = 0; i < 4; i++)
2074                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2075         if (s->chroma_elim_threshold && !s->mb_intra)
2076             for (i = 4; i < mb_block_count; i++)
2077                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2078
2079         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2080             for (i = 0; i < mb_block_count; i++) {
2081                 if (s->block_last_index[i] == -1)
2082                     s->coded_score[i] = INT_MAX / 256;
2083             }
2084         }
2085     }
2086
2087     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2088         s->block_last_index[4] =
2089         s->block_last_index[5] = 0;
2090         s->block[4][0] =
2091         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2092     }
2093
2094     // non c quantize code returns incorrect block_last_index FIXME
2095     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2096         for (i = 0; i < mb_block_count; i++) {
2097             int j;
2098             if (s->block_last_index[i] > 0) {
2099                 for (j = 63; j > 0; j--) {
2100                     if (s->block[i][s->intra_scantable.permutated[j]])
2101                         break;
2102                 }
2103                 s->block_last_index[i] = j;
2104             }
2105         }
2106     }
2107
2108     /* huffman encode */
2109     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2110     case AV_CODEC_ID_MPEG1VIDEO:
2111     case AV_CODEC_ID_MPEG2VIDEO:
2112         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2113             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2114         break;
2115     case AV_CODEC_ID_MPEG4:
2116         if (CONFIG_MPEG4_ENCODER)
2117             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2118         break;
2119     case AV_CODEC_ID_MSMPEG4V2:
2120     case AV_CODEC_ID_MSMPEG4V3:
2121     case AV_CODEC_ID_WMV1:
2122         if (CONFIG_MSMPEG4_ENCODER)
2123             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2124         break;
2125     case AV_CODEC_ID_WMV2:
2126         if (CONFIG_WMV2_ENCODER)
2127             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2128         break;
2129     case AV_CODEC_ID_H261:
2130         if (CONFIG_H261_ENCODER)
2131             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2132         break;
2133     case AV_CODEC_ID_H263:
2134     case AV_CODEC_ID_H263P:
2135     case AV_CODEC_ID_FLV1:
2136     case AV_CODEC_ID_RV10:
2137     case AV_CODEC_ID_RV20:
2138         if (CONFIG_H263_ENCODER)
2139             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2140         break;
2141     case AV_CODEC_ID_MJPEG:
2142     case AV_CODEC_ID_AMV:
2143         if (CONFIG_MJPEG_ENCODER)
2144             ff_mjpeg_encode_mb(s, s->block);
2145         break;
2146     default:
2147         av_assert1(0);
2148     }
2149 }
2150
2151 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2152 {
2153     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2154     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2155 }
2156
2157 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2158     int i;
2159
2160     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2161
2162     /* mpeg1 */
2163     d->mb_skip_run= s->mb_skip_run;
2164     for(i=0; i<3; i++)
2165         d->last_dc[i] = s->last_dc[i];
2166
2167     /* statistics */
2168     d->mv_bits= s->mv_bits;
2169     d->i_tex_bits= s->i_tex_bits;
2170     d->p_tex_bits= s->p_tex_bits;
2171     d->i_count= s->i_count;
2172     d->f_count= s->f_count;
2173     d->b_count= s->b_count;
2174     d->skip_count= s->skip_count;
2175     d->misc_bits= s->misc_bits;
2176     d->last_bits= 0;
2177
2178     d->mb_skipped= 0;
2179     d->qscale= s->qscale;
2180     d->dquant= s->dquant;
2181
2182     d->esc3_level_length= s->esc3_level_length;
2183 }
2184
2185 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2186     int i;
2187
2188     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2189     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2190
2191     /* mpeg1 */
2192     d->mb_skip_run= s->mb_skip_run;
2193     for(i=0; i<3; i++)
2194         d->last_dc[i] = s->last_dc[i];
2195
2196     /* statistics */
2197     d->mv_bits= s->mv_bits;
2198     d->i_tex_bits= s->i_tex_bits;
2199     d->p_tex_bits= s->p_tex_bits;
2200     d->i_count= s->i_count;
2201     d->f_count= s->f_count;
2202     d->b_count= s->b_count;
2203     d->skip_count= s->skip_count;
2204     d->misc_bits= s->misc_bits;
2205
2206     d->mb_intra= s->mb_intra;
2207     d->mb_skipped= s->mb_skipped;
2208     d->mv_type= s->mv_type;
2209     d->mv_dir= s->mv_dir;
2210     d->pb= s->pb;
2211     if(s->data_partitioning){
2212         d->pb2= s->pb2;
2213         d->tex_pb= s->tex_pb;
2214     }
2215     d->block= s->block;
2216     for(i=0; i<8; i++)
2217         d->block_last_index[i]= s->block_last_index[i];
2218     d->interlaced_dct= s->interlaced_dct;
2219     d->qscale= s->qscale;
2220
2221     d->esc3_level_length= s->esc3_level_length;
2222 }
2223
2224 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2225                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2226                            int *dmin, int *next_block, int motion_x, int motion_y)
2227 {
2228     int score;
2229     uint8_t *dest_backup[3];
2230
2231     copy_context_before_encode(s, backup, type);
2232
2233     s->block= s->blocks[*next_block];
2234     s->pb= pb[*next_block];
2235     if(s->data_partitioning){
2236         s->pb2   = pb2   [*next_block];
2237         s->tex_pb= tex_pb[*next_block];
2238     }
2239
2240     if(*next_block){
2241         memcpy(dest_backup, s->dest, sizeof(s->dest));
2242         s->dest[0] = s->rd_scratchpad;
2243         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2244         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2245         assert(s->linesize >= 32); //FIXME
2246     }
2247
2248     encode_mb(s, motion_x, motion_y);
2249
2250     score= put_bits_count(&s->pb);
2251     if(s->data_partitioning){
2252         score+= put_bits_count(&s->pb2);
2253         score+= put_bits_count(&s->tex_pb);
2254     }
2255
2256     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2257         ff_MPV_decode_mb(s, s->block);
2258
2259         score *= s->lambda2;
2260         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2261     }
2262
2263     if(*next_block){
2264         memcpy(s->dest, dest_backup, sizeof(s->dest));
2265     }
2266
2267     if(score<*dmin){
2268         *dmin= score;
2269         *next_block^=1;
2270
2271         copy_context_after_encode(best, s, type);
2272     }
2273 }
2274
2275 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2276     uint32_t *sq = ff_squareTbl + 256;
2277     int acc=0;
2278     int x,y;
2279
2280     if(w==16 && h==16)
2281         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2282     else if(w==8 && h==8)
2283         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2284
2285     for(y=0; y<h; y++){
2286         for(x=0; x<w; x++){
2287             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2288         }
2289     }
2290
2291     av_assert2(acc>=0);
2292
2293     return acc;
2294 }
2295
2296 static int sse_mb(MpegEncContext *s){
2297     int w= 16;
2298     int h= 16;
2299
2300     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2301     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2302
2303     if(w==16 && h==16)
2304       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2305         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2306                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2307                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2308       }else{
2309         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2310                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2311                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2312       }
2313     else
2314         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2315                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2316                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2317 }
2318
2319 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2320     MpegEncContext *s= *(void**)arg;
2321
2322
2323     s->me.pre_pass=1;
2324     s->me.dia_size= s->avctx->pre_dia_size;
2325     s->first_slice_line=1;
2326     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2327         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2328             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2329         }
2330         s->first_slice_line=0;
2331     }
2332
2333     s->me.pre_pass=0;
2334
2335     return 0;
2336 }
2337
2338 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2339     MpegEncContext *s= *(void**)arg;
2340
2341     ff_check_alignment();
2342
2343     s->me.dia_size= s->avctx->dia_size;
2344     s->first_slice_line=1;
2345     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2346         s->mb_x=0; //for block init below
2347         ff_init_block_index(s);
2348         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2349             s->block_index[0]+=2;
2350             s->block_index[1]+=2;
2351             s->block_index[2]+=2;
2352             s->block_index[3]+=2;
2353
2354             /* compute motion vector & mb_type and store in context */
2355             if(s->pict_type==AV_PICTURE_TYPE_B)
2356                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2357             else
2358                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2359         }
2360         s->first_slice_line=0;
2361     }
2362     return 0;
2363 }
2364
2365 static int mb_var_thread(AVCodecContext *c, void *arg){
2366     MpegEncContext *s= *(void**)arg;
2367     int mb_x, mb_y;
2368
2369     ff_check_alignment();
2370
2371     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2372         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2373             int xx = mb_x * 16;
2374             int yy = mb_y * 16;
2375             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2376             int varc;
2377             int sum = s->dsp.pix_sum(pix, s->linesize);
2378
2379             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2380
2381             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2382             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2383             s->me.mb_var_sum_temp    += varc;
2384         }
2385     }
2386     return 0;
2387 }
2388
2389 static void write_slice_end(MpegEncContext *s){
2390     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2391         if(s->partitioned_frame){
2392             ff_mpeg4_merge_partitions(s);
2393         }
2394
2395         ff_mpeg4_stuffing(&s->pb);
2396     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2397         ff_mjpeg_encode_stuffing(s);
2398     }
2399
2400     avpriv_align_put_bits(&s->pb);
2401     flush_put_bits(&s->pb);
2402
2403     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2404         s->misc_bits+= get_bits_diff(s);
2405 }
2406
2407 static void write_mb_info(MpegEncContext *s)
2408 {
2409     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2410     int offset = put_bits_count(&s->pb);
2411     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2412     int gobn = s->mb_y / s->gob_index;
2413     int pred_x, pred_y;
2414     if (CONFIG_H263_ENCODER)
2415         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2416     bytestream_put_le32(&ptr, offset);
2417     bytestream_put_byte(&ptr, s->qscale);
2418     bytestream_put_byte(&ptr, gobn);
2419     bytestream_put_le16(&ptr, mba);
2420     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2421     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2422     /* 4MV not implemented */
2423     bytestream_put_byte(&ptr, 0); /* hmv2 */
2424     bytestream_put_byte(&ptr, 0); /* vmv2 */
2425 }
2426
2427 static void update_mb_info(MpegEncContext *s, int startcode)
2428 {
2429     if (!s->mb_info)
2430         return;
2431     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2432         s->mb_info_size += 12;
2433         s->prev_mb_info = s->last_mb_info;
2434     }
2435     if (startcode) {
2436         s->prev_mb_info = put_bits_count(&s->pb)/8;
2437         /* This might have incremented mb_info_size above, and we return without
2438          * actually writing any info into that slot yet. But in that case,
2439          * this will be called again at the start of the after writing the
2440          * start code, actually writing the mb info. */
2441         return;
2442     }
2443
2444     s->last_mb_info = put_bits_count(&s->pb)/8;
2445     if (!s->mb_info_size)
2446         s->mb_info_size += 12;
2447     write_mb_info(s);
2448 }
2449
2450 static int encode_thread(AVCodecContext *c, void *arg){
2451     MpegEncContext *s= *(void**)arg;
2452     int mb_x, mb_y, pdif = 0;
2453     int chr_h= 16>>s->chroma_y_shift;
2454     int i, j;
2455     MpegEncContext best_s, backup_s;
2456     uint8_t bit_buf[2][MAX_MB_BYTES];
2457     uint8_t bit_buf2[2][MAX_MB_BYTES];
2458     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2459     PutBitContext pb[2], pb2[2], tex_pb[2];
2460 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2461
2462     ff_check_alignment();
2463
2464     for(i=0; i<2; i++){
2465         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2466         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2467         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2468     }
2469
2470     s->last_bits= put_bits_count(&s->pb);
2471     s->mv_bits=0;
2472     s->misc_bits=0;
2473     s->i_tex_bits=0;
2474     s->p_tex_bits=0;
2475     s->i_count=0;
2476     s->f_count=0;
2477     s->b_count=0;
2478     s->skip_count=0;
2479
2480     for(i=0; i<3; i++){
2481         /* init last dc values */
2482         /* note: quant matrix value (8) is implied here */
2483         s->last_dc[i] = 128 << s->intra_dc_precision;
2484
2485         s->current_picture.f.error[i] = 0;
2486     }
2487     if(s->codec_id==AV_CODEC_ID_AMV){
2488         s->last_dc[0] = 128*8/13;
2489         s->last_dc[1] = 128*8/14;
2490         s->last_dc[2] = 128*8/14;
2491     }
2492     s->mb_skip_run = 0;
2493     memset(s->last_mv, 0, sizeof(s->last_mv));
2494
2495     s->last_mv_dir = 0;
2496
2497     switch(s->codec_id){
2498     case AV_CODEC_ID_H263:
2499     case AV_CODEC_ID_H263P:
2500     case AV_CODEC_ID_FLV1:
2501         if (CONFIG_H263_ENCODER)
2502             s->gob_index = ff_h263_get_gob_height(s);
2503         break;
2504     case AV_CODEC_ID_MPEG4:
2505         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2506             ff_mpeg4_init_partitions(s);
2507         break;
2508     }
2509
2510     s->resync_mb_x=0;
2511     s->resync_mb_y=0;
2512     s->first_slice_line = 1;
2513     s->ptr_lastgob = s->pb.buf;
2514     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2515 //    printf("row %d at %X\n", s->mb_y, (int)s);
2516         s->mb_x=0;
2517         s->mb_y= mb_y;
2518
2519         ff_set_qscale(s, s->qscale);
2520         ff_init_block_index(s);
2521
2522         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2523             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2524             int mb_type= s->mb_type[xy];
2525 //            int d;
2526             int dmin= INT_MAX;
2527             int dir;
2528
2529             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2530                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2531                 return -1;
2532             }
2533             if(s->data_partitioning){
2534                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2535                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2536                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2537                     return -1;
2538                 }
2539             }
2540
2541             s->mb_x = mb_x;
2542             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2543             ff_update_block_index(s);
2544
2545             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2546                 ff_h261_reorder_mb_index(s);
2547                 xy= s->mb_y*s->mb_stride + s->mb_x;
2548                 mb_type= s->mb_type[xy];
2549             }
2550
2551             /* write gob / video packet header  */
2552             if(s->rtp_mode){
2553                 int current_packet_size, is_gob_start;
2554
2555                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2556
2557                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2558
2559                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2560
2561                 switch(s->codec_id){
2562                 case AV_CODEC_ID_H263:
2563                 case AV_CODEC_ID_H263P:
2564                     if(!s->h263_slice_structured)
2565                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2566                     break;
2567                 case AV_CODEC_ID_MPEG2VIDEO:
2568                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2569                 case AV_CODEC_ID_MPEG1VIDEO:
2570                     if(s->mb_skip_run) is_gob_start=0;
2571                     break;
2572                 case AV_CODEC_ID_MJPEG:
2573                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2574                     break;
2575                 }
2576
2577                 if(is_gob_start){
2578                     if(s->start_mb_y != mb_y || mb_x!=0){
2579                         write_slice_end(s);
2580                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2581                             ff_mpeg4_init_partitions(s);
2582                         }
2583                     }
2584
2585                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2586                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2587
2588                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2589                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2590                         int d= 100 / s->avctx->error_rate;
2591                         if(r % d == 0){
2592                             current_packet_size=0;
2593                             s->pb.buf_ptr= s->ptr_lastgob;
2594                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2595                         }
2596                     }
2597
2598                     if (s->avctx->rtp_callback){
2599                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2600                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2601                     }
2602                     update_mb_info(s, 1);
2603
2604                     switch(s->codec_id){
2605                     case AV_CODEC_ID_MPEG4:
2606                         if (CONFIG_MPEG4_ENCODER) {
2607                             ff_mpeg4_encode_video_packet_header(s);
2608                             ff_mpeg4_clean_buffers(s);
2609                         }
2610                     break;
2611                     case AV_CODEC_ID_MPEG1VIDEO:
2612                     case AV_CODEC_ID_MPEG2VIDEO:
2613                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2614                             ff_mpeg1_encode_slice_header(s);
2615                             ff_mpeg1_clean_buffers(s);
2616                         }
2617                     break;
2618                     case AV_CODEC_ID_H263:
2619                     case AV_CODEC_ID_H263P:
2620                         if (CONFIG_H263_ENCODER)
2621                             ff_h263_encode_gob_header(s, mb_y);
2622                     break;
2623                     }
2624
2625                     if(s->flags&CODEC_FLAG_PASS1){
2626                         int bits= put_bits_count(&s->pb);
2627                         s->misc_bits+= bits - s->last_bits;
2628                         s->last_bits= bits;
2629                     }
2630
2631                     s->ptr_lastgob += current_packet_size;
2632                     s->first_slice_line=1;
2633                     s->resync_mb_x=mb_x;
2634                     s->resync_mb_y=mb_y;
2635                 }
2636             }
2637
2638             if(  (s->resync_mb_x   == s->mb_x)
2639                && s->resync_mb_y+1 == s->mb_y){
2640                 s->first_slice_line=0;
2641             }
2642
2643             s->mb_skipped=0;
2644             s->dquant=0; //only for QP_RD
2645
2646             update_mb_info(s, 0);
2647
2648             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2649                 int next_block=0;
2650                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2651
2652                 copy_context_before_encode(&backup_s, s, -1);
2653                 backup_s.pb= s->pb;
2654                 best_s.data_partitioning= s->data_partitioning;
2655                 best_s.partitioned_frame= s->partitioned_frame;
2656                 if(s->data_partitioning){
2657                     backup_s.pb2= s->pb2;
2658                     backup_s.tex_pb= s->tex_pb;
2659                 }
2660
2661                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2662                     s->mv_dir = MV_DIR_FORWARD;
2663                     s->mv_type = MV_TYPE_16X16;
2664                     s->mb_intra= 0;
2665                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2666                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2667                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2668                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2669                 }
2670                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2671                     s->mv_dir = MV_DIR_FORWARD;
2672                     s->mv_type = MV_TYPE_FIELD;
2673                     s->mb_intra= 0;
2674                     for(i=0; i<2; i++){
2675                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2676                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2677                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2678                     }
2679                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2680                                  &dmin, &next_block, 0, 0);
2681                 }
2682                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2683                     s->mv_dir = MV_DIR_FORWARD;
2684                     s->mv_type = MV_TYPE_16X16;
2685                     s->mb_intra= 0;
2686                     s->mv[0][0][0] = 0;
2687                     s->mv[0][0][1] = 0;
2688                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2689                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2690                 }
2691                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2692                     s->mv_dir = MV_DIR_FORWARD;
2693                     s->mv_type = MV_TYPE_8X8;
2694                     s->mb_intra= 0;
2695                     for(i=0; i<4; i++){
2696                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2697                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2698                     }
2699                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2700                                  &dmin, &next_block, 0, 0);
2701                 }
2702                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2703                     s->mv_dir = MV_DIR_FORWARD;
2704                     s->mv_type = MV_TYPE_16X16;
2705                     s->mb_intra= 0;
2706                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2707                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2708                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2709                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2710                 }
2711                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2712                     s->mv_dir = MV_DIR_BACKWARD;
2713                     s->mv_type = MV_TYPE_16X16;
2714                     s->mb_intra= 0;
2715                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2716                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2717                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2718                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2719                 }
2720                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2721                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2722                     s->mv_type = MV_TYPE_16X16;
2723                     s->mb_intra= 0;
2724                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2725                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2726                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2727                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2728                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2729                                  &dmin, &next_block, 0, 0);
2730                 }
2731                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2732                     s->mv_dir = MV_DIR_FORWARD;
2733                     s->mv_type = MV_TYPE_FIELD;
2734                     s->mb_intra= 0;
2735                     for(i=0; i<2; i++){
2736                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2737                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2738                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2739                     }
2740                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2741                                  &dmin, &next_block, 0, 0);
2742                 }
2743                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2744                     s->mv_dir = MV_DIR_BACKWARD;
2745                     s->mv_type = MV_TYPE_FIELD;
2746                     s->mb_intra= 0;
2747                     for(i=0; i<2; i++){
2748                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2749                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2750                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2751                     }
2752                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2753                                  &dmin, &next_block, 0, 0);
2754                 }
2755                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2756                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2757                     s->mv_type = MV_TYPE_FIELD;
2758                     s->mb_intra= 0;
2759                     for(dir=0; dir<2; dir++){
2760                         for(i=0; i<2; i++){
2761                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2762                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2763                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2764                         }
2765                     }
2766                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2767                                  &dmin, &next_block, 0, 0);
2768                 }
2769                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2770                     s->mv_dir = 0;
2771                     s->mv_type = MV_TYPE_16X16;
2772                     s->mb_intra= 1;
2773                     s->mv[0][0][0] = 0;
2774                     s->mv[0][0][1] = 0;
2775                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2776                                  &dmin, &next_block, 0, 0);
2777                     if(s->h263_pred || s->h263_aic){
2778                         if(best_s.mb_intra)
2779                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2780                         else
2781                             ff_clean_intra_table_entries(s); //old mode?
2782                     }
2783                 }
2784
2785                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2786                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2787                         const int last_qp= backup_s.qscale;
2788                         int qpi, qp, dc[6];
2789                         DCTELEM ac[6][16];
2790                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2791                         static const int dquant_tab[4]={-1,1,-2,2};
2792
2793                         av_assert2(backup_s.dquant == 0);
2794
2795                         //FIXME intra
2796                         s->mv_dir= best_s.mv_dir;
2797                         s->mv_type = MV_TYPE_16X16;
2798                         s->mb_intra= best_s.mb_intra;
2799                         s->mv[0][0][0] = best_s.mv[0][0][0];
2800                         s->mv[0][0][1] = best_s.mv[0][0][1];
2801                         s->mv[1][0][0] = best_s.mv[1][0][0];
2802                         s->mv[1][0][1] = best_s.mv[1][0][1];
2803
2804                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2805                         for(; qpi<4; qpi++){
2806                             int dquant= dquant_tab[qpi];
2807                             qp= last_qp + dquant;
2808                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2809                                 continue;
2810                             backup_s.dquant= dquant;
2811                             if(s->mb_intra && s->dc_val[0]){
2812                                 for(i=0; i<6; i++){
2813                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2814                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2815                                 }
2816                             }
2817
2818                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2819                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2820                             if(best_s.qscale != qp){
2821                                 if(s->mb_intra && s->dc_val[0]){
2822                                     for(i=0; i<6; i++){
2823                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2824                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2825                                     }
2826                                 }
2827                             }
2828                         }
2829                     }
2830                 }
2831                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2832                     int mx= s->b_direct_mv_table[xy][0];
2833                     int my= s->b_direct_mv_table[xy][1];
2834
2835                     backup_s.dquant = 0;
2836                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2837                     s->mb_intra= 0;
2838                     ff_mpeg4_set_direct_mv(s, mx, my);
2839                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2840                                  &dmin, &next_block, mx, my);
2841                 }
2842                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2843                     backup_s.dquant = 0;
2844                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2845                     s->mb_intra= 0;
2846                     ff_mpeg4_set_direct_mv(s, 0, 0);
2847                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2848                                  &dmin, &next_block, 0, 0);
2849                 }
2850                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2851                     int coded=0;
2852                     for(i=0; i<6; i++)
2853                         coded |= s->block_last_index[i];
2854                     if(coded){
2855                         int mx,my;
2856                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2857                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2858                             mx=my=0; //FIXME find the one we actually used
2859                             ff_mpeg4_set_direct_mv(s, mx, my);
2860                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2861                             mx= s->mv[1][0][0];
2862                             my= s->mv[1][0][1];
2863                         }else{
2864                             mx= s->mv[0][0][0];
2865                             my= s->mv[0][0][1];
2866                         }
2867
2868                         s->mv_dir= best_s.mv_dir;
2869                         s->mv_type = best_s.mv_type;
2870                         s->mb_intra= 0;
2871 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2872                         s->mv[0][0][1] = best_s.mv[0][0][1];
2873                         s->mv[1][0][0] = best_s.mv[1][0][0];
2874                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2875                         backup_s.dquant= 0;
2876                         s->skipdct=1;
2877                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2878                                         &dmin, &next_block, mx, my);
2879                         s->skipdct=0;
2880                     }
2881                 }
2882
2883                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2884
2885                 copy_context_after_encode(s, &best_s, -1);
2886
2887                 pb_bits_count= put_bits_count(&s->pb);
2888                 flush_put_bits(&s->pb);
2889                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2890                 s->pb= backup_s.pb;
2891
2892                 if(s->data_partitioning){
2893                     pb2_bits_count= put_bits_count(&s->pb2);
2894                     flush_put_bits(&s->pb2);
2895                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2896                     s->pb2= backup_s.pb2;
2897
2898                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2899                     flush_put_bits(&s->tex_pb);
2900                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2901                     s->tex_pb= backup_s.tex_pb;
2902                 }
2903                 s->last_bits= put_bits_count(&s->pb);
2904
2905                 if (CONFIG_H263_ENCODER &&
2906                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2907                     ff_h263_update_motion_val(s);
2908
2909                 if(next_block==0){ //FIXME 16 vs linesize16
2910                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2911                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2912                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2913                 }
2914
2915                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2916                     ff_MPV_decode_mb(s, s->block);
2917             } else {
2918                 int motion_x = 0, motion_y = 0;
2919                 s->mv_type=MV_TYPE_16X16;
2920                 // only one MB-Type possible
2921
2922                 switch(mb_type){
2923                 case CANDIDATE_MB_TYPE_INTRA:
2924                     s->mv_dir = 0;
2925                     s->mb_intra= 1;
2926                     motion_x= s->mv[0][0][0] = 0;
2927                     motion_y= s->mv[0][0][1] = 0;
2928                     break;
2929                 case CANDIDATE_MB_TYPE_INTER:
2930                     s->mv_dir = MV_DIR_FORWARD;
2931                     s->mb_intra= 0;
2932                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2933                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2934                     break;
2935                 case CANDIDATE_MB_TYPE_INTER_I:
2936                     s->mv_dir = MV_DIR_FORWARD;
2937                     s->mv_type = MV_TYPE_FIELD;
2938                     s->mb_intra= 0;
2939                     for(i=0; i<2; i++){
2940                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2941                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2942                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2943                     }
2944                     break;
2945                 case CANDIDATE_MB_TYPE_INTER4V:
2946                     s->mv_dir = MV_DIR_FORWARD;
2947                     s->mv_type = MV_TYPE_8X8;
2948                     s->mb_intra= 0;
2949                     for(i=0; i<4; i++){
2950                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2951                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2952                     }
2953                     break;
2954                 case CANDIDATE_MB_TYPE_DIRECT:
2955                     if (CONFIG_MPEG4_ENCODER) {
2956                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2957                         s->mb_intra= 0;
2958                         motion_x=s->b_direct_mv_table[xy][0];
2959                         motion_y=s->b_direct_mv_table[xy][1];
2960                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2961                     }
2962                     break;
2963                 case CANDIDATE_MB_TYPE_DIRECT0:
2964                     if (CONFIG_MPEG4_ENCODER) {
2965                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2966                         s->mb_intra= 0;
2967                         ff_mpeg4_set_direct_mv(s, 0, 0);
2968                     }
2969                     break;
2970                 case CANDIDATE_MB_TYPE_BIDIR:
2971                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2972                     s->mb_intra= 0;
2973                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2974                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2975                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2976                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2977                     break;
2978                 case CANDIDATE_MB_TYPE_BACKWARD:
2979                     s->mv_dir = MV_DIR_BACKWARD;
2980                     s->mb_intra= 0;
2981                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2982                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2983                     break;
2984                 case CANDIDATE_MB_TYPE_FORWARD:
2985                     s->mv_dir = MV_DIR_FORWARD;
2986                     s->mb_intra= 0;
2987                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2988                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2989 //                    printf(" %d %d ", motion_x, motion_y);
2990                     break;
2991                 case CANDIDATE_MB_TYPE_FORWARD_I:
2992                     s->mv_dir = MV_DIR_FORWARD;
2993                     s->mv_type = MV_TYPE_FIELD;
2994                     s->mb_intra= 0;
2995                     for(i=0; i<2; i++){
2996                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2997                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2998                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2999                     }
3000                     break;
3001                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3002                     s->mv_dir = MV_DIR_BACKWARD;
3003                     s->mv_type = MV_TYPE_FIELD;
3004                     s->mb_intra= 0;
3005                     for(i=0; i<2; i++){
3006                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3007                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3008                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3009                     }
3010                     break;
3011                 case CANDIDATE_MB_TYPE_BIDIR_I:
3012                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3013                     s->mv_type = MV_TYPE_FIELD;
3014                     s->mb_intra= 0;
3015                     for(dir=0; dir<2; dir++){
3016                         for(i=0; i<2; i++){
3017                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3018                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3019                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3020                         }
3021                     }
3022                     break;
3023                 default:
3024                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3025                 }
3026
3027                 encode_mb(s, motion_x, motion_y);
3028
3029                 // RAL: Update last macroblock type
3030                 s->last_mv_dir = s->mv_dir;
3031
3032                 if (CONFIG_H263_ENCODER &&
3033                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3034                     ff_h263_update_motion_val(s);
3035
3036                 ff_MPV_decode_mb(s, s->block);
3037             }
3038
3039             /* clean the MV table in IPS frames for direct mode in B frames */
3040             if(s->mb_intra /* && I,P,S_TYPE */){
3041                 s->p_mv_table[xy][0]=0;
3042                 s->p_mv_table[xy][1]=0;
3043             }
3044
3045             if(s->flags&CODEC_FLAG_PSNR){
3046                 int w= 16;
3047                 int h= 16;
3048
3049                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3050                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3051
3052                 s->current_picture.f.error[0] += sse(
3053                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3054                     s->dest[0], w, h, s->linesize);
3055                 s->current_picture.f.error[1] += sse(
3056                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3057                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3058                 s->current_picture.f.error[2] += sse(
3059                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3060                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3061             }
3062             if(s->loop_filter){
3063                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3064                     ff_h263_loop_filter(s);
3065             }
3066 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3067         }
3068     }
3069
3070     //not beautiful here but we must write it before flushing so it has to be here
3071     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3072         ff_msmpeg4_encode_ext_header(s);
3073
3074     write_slice_end(s);
3075
3076     /* Send the last GOB if RTP */
3077     if (s->avctx->rtp_callback) {
3078         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3079         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3080         /* Call the RTP callback to send the last GOB */
3081         emms_c();
3082         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3083     }
3084
3085     return 0;
3086 }
3087
3088 #define MERGE(field) dst->field += src->field; src->field=0
3089 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3090     MERGE(me.scene_change_score);
3091     MERGE(me.mc_mb_var_sum_temp);
3092     MERGE(me.mb_var_sum_temp);
3093 }
3094
3095 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3096     int i;
3097
3098     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3099     MERGE(dct_count[1]);
3100     MERGE(mv_bits);
3101     MERGE(i_tex_bits);
3102     MERGE(p_tex_bits);
3103     MERGE(i_count);
3104     MERGE(f_count);
3105     MERGE(b_count);
3106     MERGE(skip_count);
3107     MERGE(misc_bits);
3108     MERGE(error_count);
3109     MERGE(padding_bug_score);
3110     MERGE(current_picture.f.error[0]);
3111     MERGE(current_picture.f.error[1]);
3112     MERGE(current_picture.f.error[2]);
3113
3114     if(dst->avctx->noise_reduction){
3115         for(i=0; i<64; i++){
3116             MERGE(dct_error_sum[0][i]);
3117             MERGE(dct_error_sum[1][i]);
3118         }
3119     }
3120
3121     assert(put_bits_count(&src->pb) % 8 ==0);
3122     assert(put_bits_count(&dst->pb) % 8 ==0);
3123     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3124     flush_put_bits(&dst->pb);
3125 }
3126
3127 static int estimate_qp(MpegEncContext *s, int dry_run){
3128     if (s->next_lambda){
3129         s->current_picture_ptr->f.quality =
3130         s->current_picture.f.quality = s->next_lambda;
3131         if(!dry_run) s->next_lambda= 0;
3132     } else if (!s->fixed_qscale) {
3133         s->current_picture_ptr->f.quality =
3134         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3135         if (s->current_picture.f.quality < 0)
3136             return -1;
3137     }
3138
3139     if(s->adaptive_quant){
3140         switch(s->codec_id){
3141         case AV_CODEC_ID_MPEG4:
3142             if (CONFIG_MPEG4_ENCODER)
3143                 ff_clean_mpeg4_qscales(s);
3144             break;
3145         case AV_CODEC_ID_H263:
3146         case AV_CODEC_ID_H263P:
3147         case AV_CODEC_ID_FLV1:
3148             if (CONFIG_H263_ENCODER)
3149                 ff_clean_h263_qscales(s);
3150             break;
3151         default:
3152             ff_init_qscale_tab(s);
3153         }
3154
3155         s->lambda= s->lambda_table[0];
3156         //FIXME broken
3157     }else
3158         s->lambda = s->current_picture.f.quality;
3159 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3160     update_qscale(s);
3161     return 0;
3162 }
3163
3164 /* must be called before writing the header */
3165 static void set_frame_distances(MpegEncContext * s){
3166     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3167     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3168
3169     if(s->pict_type==AV_PICTURE_TYPE_B){
3170         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3171         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3172     }else{
3173         s->pp_time= s->time - s->last_non_b_time;
3174         s->last_non_b_time= s->time;
3175         assert(s->picture_number==0 || s->pp_time > 0);
3176     }
3177 }
3178
3179 static int encode_picture(MpegEncContext *s, int picture_number)
3180 {
3181     int i;
3182     int bits;
3183     int context_count = s->slice_context_count;
3184
3185     s->picture_number = picture_number;
3186
3187     /* Reset the average MB variance */
3188     s->me.mb_var_sum_temp    =
3189     s->me.mc_mb_var_sum_temp = 0;
3190
3191     /* we need to initialize some time vars before we can encode b-frames */
3192     // RAL: Condition added for MPEG1VIDEO
3193     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3194         set_frame_distances(s);
3195     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3196         ff_set_mpeg4_time(s);
3197
3198     s->me.scene_change_score=0;
3199
3200 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3201
3202     if(s->pict_type==AV_PICTURE_TYPE_I){
3203         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3204         else                        s->no_rounding=0;
3205     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3206         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3207             s->no_rounding ^= 1;
3208     }
3209
3210     if(s->flags & CODEC_FLAG_PASS2){
3211         if (estimate_qp(s,1) < 0)
3212             return -1;
3213         ff_get_2pass_fcode(s);
3214     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3215         if(s->pict_type==AV_PICTURE_TYPE_B)
3216             s->lambda= s->last_lambda_for[s->pict_type];
3217         else
3218             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3219         update_qscale(s);
3220     }
3221
3222     if(s->codec_id != AV_CODEC_ID_AMV){
3223         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3224         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3225         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3226         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3227     }
3228
3229     s->mb_intra=0; //for the rate distortion & bit compare functions
3230     for(i=1; i<context_count; i++){
3231         ff_update_duplicate_context(s->thread_context[i], s);
3232     }
3233
3234     if(ff_init_me(s)<0)
3235         return -1;
3236
3237     /* Estimate motion for every MB */
3238     if(s->pict_type != AV_PICTURE_TYPE_I){
3239         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3240         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3241         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3242             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3243                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3244             }
3245         }
3246
3247         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3248     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3249         /* I-Frame */
3250         for(i=0; i<s->mb_stride*s->mb_height; i++)
3251             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3252
3253         if(!s->fixed_qscale){
3254             /* finding spatial complexity for I-frame rate control */
3255             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3256         }
3257     }
3258     for(i=1; i<context_count; i++){
3259         merge_context_after_me(s, s->thread_context[i]);
3260     }
3261     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3262     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3263     emms_c();
3264
3265     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3266         s->pict_type= AV_PICTURE_TYPE_I;
3267         for(i=0; i<s->mb_stride*s->mb_height; i++)
3268             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3269 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3270         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3271     }
3272
3273     if(!s->umvplus){
3274         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3275             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3276
3277             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3278                 int a,b;
3279                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3280                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3281                 s->f_code= FFMAX3(s->f_code, a, b);
3282             }
3283
3284             ff_fix_long_p_mvs(s);
3285             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3286             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3287                 int j;
3288                 for(i=0; i<2; i++){
3289                     for(j=0; j<2; j++)
3290                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3291                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3292                 }
3293             }
3294         }
3295
3296         if(s->pict_type==AV_PICTURE_TYPE_B){
3297             int a, b;
3298
3299             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3300             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3301             s->f_code = FFMAX(a, b);
3302
3303             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3304             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3305             s->b_code = FFMAX(a, b);
3306
3307             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3308             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3309             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3310             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3311             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3312                 int dir, j;
3313                 for(dir=0; dir<2; dir++){
3314                     for(i=0; i<2; i++){
3315                         for(j=0; j<2; j++){
3316                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3317                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3318                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3319                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3320                         }
3321                     }
3322                 }
3323             }
3324         }
3325     }
3326
3327     if (estimate_qp(s, 0) < 0)
3328         return -1;
3329
3330     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3331         s->qscale= 3; //reduce clipping problems
3332
3333     if (s->out_format == FMT_MJPEG) {
3334         /* for mjpeg, we do include qscale in the matrix */
3335         for(i=1;i<64;i++){
3336             int j= s->dsp.idct_permutation[i];
3337
3338             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3339         }
3340         s->y_dc_scale_table=
3341         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3342         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3343         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3344                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3345         s->qscale= 8;
3346     }
3347     if(s->codec_id == AV_CODEC_ID_AMV){
3348         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3349         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3350         for(i=1;i<64;i++){
3351             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3352
3353             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3354             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3355         }
3356         s->y_dc_scale_table= y;
3357         s->c_dc_scale_table= c;
3358         s->intra_matrix[0] = 13;
3359         s->chroma_intra_matrix[0] = 14;
3360         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3361                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3362         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3363                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3364         s->qscale= 8;
3365     }
3366
3367     //FIXME var duplication
3368     s->current_picture_ptr->f.key_frame =
3369     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3370     s->current_picture_ptr->f.pict_type =
3371     s->current_picture.f.pict_type = s->pict_type;
3372
3373     if (s->current_picture.f.key_frame)
3374         s->picture_in_gop_number=0;
3375
3376     s->mb_x = s->mb_y = 0;
3377     s->last_bits= put_bits_count(&s->pb);
3378     switch(s->out_format) {
3379     case FMT_MJPEG:
3380         if (CONFIG_MJPEG_ENCODER)
3381             ff_mjpeg_encode_picture_header(s);
3382         break;
3383     case FMT_H261:
3384         if (CONFIG_H261_ENCODER)
3385             ff_h261_encode_picture_header(s, picture_number);
3386         break;
3387     case FMT_H263:
3388         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3389             ff_wmv2_encode_picture_header(s, picture_number);
3390         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3391             ff_msmpeg4_encode_picture_header(s, picture_number);
3392         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3393             ff_mpeg4_encode_picture_header(s, picture_number);
3394         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3395             ff_rv10_encode_picture_header(s, picture_number);
3396         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3397             ff_rv20_encode_picture_header(s, picture_number);
3398         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3399             ff_flv_encode_picture_header(s, picture_number);
3400         else if (CONFIG_H263_ENCODER)
3401             ff_h263_encode_picture_header(s, picture_number);
3402         break;
3403     case FMT_MPEG1:
3404         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3405             ff_mpeg1_encode_picture_header(s, picture_number);
3406         break;
3407     case FMT_H264:
3408         break;
3409     default:
3410         av_assert0(0);
3411     }
3412     bits= put_bits_count(&s->pb);
3413     s->header_bits= bits - s->last_bits;
3414
3415     for(i=1; i<context_count; i++){
3416         update_duplicate_context_after_me(s->thread_context[i], s);
3417     }
3418     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3419     for(i=1; i<context_count; i++){
3420         merge_context_after_encode(s, s->thread_context[i]);
3421     }
3422     emms_c();
3423     return 0;
3424 }
3425
3426 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3427     const int intra= s->mb_intra;
3428     int i;
3429
3430     s->dct_count[intra]++;
3431
3432     for(i=0; i<64; i++){
3433         int level= block[i];
3434
3435         if(level){
3436             if(level>0){
3437                 s->dct_error_sum[intra][i] += level;
3438                 level -= s->dct_offset[intra][i];
3439                 if(level<0) level=0;
3440             }else{
3441                 s->dct_error_sum[intra][i] -= level;
3442                 level += s->dct_offset[intra][i];
3443                 if(level>0) level=0;
3444             }
3445             block[i]= level;
3446         }
3447     }
3448 }
3449
3450 static int dct_quantize_trellis_c(MpegEncContext *s,
3451                                   DCTELEM *block, int n,
3452                                   int qscale, int *overflow){
3453     const int *qmat;
3454     const uint8_t *scantable= s->intra_scantable.scantable;
3455     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3456     int max=0;
3457     unsigned int threshold1, threshold2;
3458     int bias=0;
3459     int run_tab[65];
3460     int level_tab[65];
3461     int score_tab[65];
3462     int survivor[65];
3463     int survivor_count;
3464     int last_run=0;
3465     int last_level=0;
3466     int last_score= 0;
3467     int last_i;
3468     int coeff[2][64];
3469     int coeff_count[64];
3470     int qmul, qadd, start_i, last_non_zero, i, dc;
3471     const int esc_length= s->ac_esc_length;
3472     uint8_t * length;
3473     uint8_t * last_length;
3474     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3475
3476     s->dsp.fdct (block);
3477
3478     if(s->dct_error_sum)
3479         s->denoise_dct(s, block);
3480     qmul= qscale*16;
3481     qadd= ((qscale-1)|1)*8;
3482
3483     if (s->mb_intra) {
3484         int q;
3485         if (!s->h263_aic) {
3486             if (n < 4)
3487                 q = s->y_dc_scale;
3488             else
3489                 q = s->c_dc_scale;
3490             q = q << 3;
3491         } else{
3492             /* For AIC we skip quant/dequant of INTRADC */
3493             q = 1 << 3;
3494             qadd=0;
3495         }
3496
3497         /* note: block[0] is assumed to be positive */
3498         block[0] = (block[0] + (q >> 1)) / q;
3499         start_i = 1;
3500         last_non_zero = 0;
3501         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3502         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3503             bias= 1<<(QMAT_SHIFT-1);
3504         length     = s->intra_ac_vlc_length;
3505         last_length= s->intra_ac_vlc_last_length;
3506     } else {
3507         start_i = 0;
3508         last_non_zero = -1;
3509         qmat = s->q_inter_matrix[qscale];
3510         length     = s->inter_ac_vlc_length;
3511         last_length= s->inter_ac_vlc_last_length;
3512     }
3513     last_i= start_i;
3514
3515     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3516     threshold2= (threshold1<<1);
3517
3518     for(i=63; i>=start_i; i--) {
3519         const int j = scantable[i];
3520         int level = block[j] * qmat[j];
3521
3522         if(((unsigned)(level+threshold1))>threshold2){
3523             last_non_zero = i;
3524             break;
3525         }
3526     }
3527
3528     for(i=start_i; i<=last_non_zero; i++) {
3529         const int j = scantable[i];
3530         int level = block[j] * qmat[j];
3531
3532 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3533 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3534         if(((unsigned)(level+threshold1))>threshold2){
3535             if(level>0){
3536                 level= (bias + level)>>QMAT_SHIFT;
3537                 coeff[0][i]= level;
3538                 coeff[1][i]= level-1;
3539 //                coeff[2][k]= level-2;
3540             }else{
3541                 level= (bias - level)>>QMAT_SHIFT;
3542                 coeff[0][i]= -level;
3543                 coeff[1][i]= -level+1;
3544 //                coeff[2][k]= -level+2;
3545             }
3546             coeff_count[i]= FFMIN(level, 2);
3547             av_assert2(coeff_count[i]);
3548             max |=level;
3549         }else{
3550             coeff[0][i]= (level>>31)|1;
3551             coeff_count[i]= 1;
3552         }
3553     }
3554
3555     *overflow= s->max_qcoeff < max; //overflow might have happened
3556
3557     if(last_non_zero < start_i){
3558         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3559         return last_non_zero;
3560     }
3561
3562     score_tab[start_i]= 0;
3563     survivor[0]= start_i;
3564     survivor_count= 1;
3565
3566     for(i=start_i; i<=last_non_zero; i++){
3567         int level_index, j, zero_distortion;
3568         int dct_coeff= FFABS(block[ scantable[i] ]);
3569         int best_score=256*256*256*120;
3570
3571         if (s->dsp.fdct == ff_fdct_ifast)
3572             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3573         zero_distortion= dct_coeff*dct_coeff;
3574
3575         for(level_index=0; level_index < coeff_count[i]; level_index++){
3576             int distortion;
3577             int level= coeff[level_index][i];
3578             const int alevel= FFABS(level);
3579             int unquant_coeff;
3580
3581             av_assert2(level);
3582
3583             if(s->out_format == FMT_H263){
3584                 unquant_coeff= alevel*qmul + qadd;
3585             }else{ //MPEG1
3586                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3587                 if(s->mb_intra){
3588                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3589                         unquant_coeff =   (unquant_coeff - 1) | 1;
3590                 }else{
3591                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3592                         unquant_coeff =   (unquant_coeff - 1) | 1;
3593                 }
3594                 unquant_coeff<<= 3;
3595             }
3596
3597             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3598             level+=64;
3599             if((level&(~127)) == 0){
3600                 for(j=survivor_count-1; j>=0; j--){
3601                     int run= i - survivor[j];
3602                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3603                     score += score_tab[i-run];
3604
3605                     if(score < best_score){
3606                         best_score= score;
3607                         run_tab[i+1]= run;
3608                         level_tab[i+1]= level-64;
3609                     }
3610                 }
3611
3612                 if(s->out_format == FMT_H263){
3613                     for(j=survivor_count-1; j>=0; j--){
3614                         int run= i - survivor[j];
3615                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3616                         score += score_tab[i-run];
3617                         if(score < last_score){
3618                             last_score= score;
3619                             last_run= run;
3620                             last_level= level-64;
3621                             last_i= i+1;
3622                         }
3623                     }
3624                 }
3625             }else{
3626                 distortion += esc_length*lambda;
3627                 for(j=survivor_count-1; j>=0; j--){
3628                     int run= i - survivor[j];
3629                     int score= distortion + score_tab[i-run];
3630
3631                     if(score < best_score){
3632                         best_score= score;
3633                         run_tab[i+1]= run;
3634                         level_tab[i+1]= level-64;
3635                     }
3636                 }
3637
3638                 if(s->out_format == FMT_H263){
3639                   for(j=survivor_count-1; j>=0; j--){
3640                         int run= i - survivor[j];
3641                         int score= distortion + score_tab[i-run];
3642                         if(score < last_score){
3643                             last_score= score;
3644                             last_run= run;
3645                             last_level= level-64;
3646                             last_i= i+1;
3647                         }
3648                     }
3649                 }
3650             }
3651         }
3652
3653         score_tab[i+1]= best_score;
3654
3655         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3656         if(last_non_zero <= 27){
3657             for(; survivor_count; survivor_count--){
3658                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3659                     break;
3660             }
3661         }else{
3662             for(; survivor_count; survivor_count--){
3663                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3664                     break;
3665             }
3666         }
3667
3668         survivor[ survivor_count++ ]= i+1;
3669     }
3670
3671     if(s->out_format != FMT_H263){
3672         last_score= 256*256*256*120;
3673         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3674             int score= score_tab[i];
3675             if(i) score += lambda*2; //FIXME exacter?
3676
3677             if(score < last_score){
3678                 last_score= score;
3679                 last_i= i;
3680                 last_level= level_tab[i];
3681                 last_run= run_tab[i];
3682             }
3683         }
3684     }
3685
3686     s->coded_score[n] = last_score;
3687
3688     dc= FFABS(block[0]);
3689     last_non_zero= last_i - 1;
3690     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3691
3692     if(last_non_zero < start_i)
3693         return last_non_zero;
3694
3695     if(last_non_zero == 0 && start_i == 0){
3696         int best_level= 0;
3697         int best_score= dc * dc;
3698
3699         for(i=0; i<coeff_count[0]; i++){
3700             int level= coeff[i][0];
3701             int alevel= FFABS(level);
3702             int unquant_coeff, score, distortion;
3703
3704             if(s->out_format == FMT_H263){
3705                     unquant_coeff= (alevel*qmul + qadd)>>3;
3706             }else{ //MPEG1
3707                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3708                     unquant_coeff =   (unquant_coeff - 1) | 1;
3709             }
3710             unquant_coeff = (unquant_coeff + 4) >> 3;
3711             unquant_coeff<<= 3 + 3;
3712
3713             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3714             level+=64;
3715             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3716             else                    score= distortion + esc_length*lambda;
3717
3718             if(score < best_score){
3719                 best_score= score;
3720                 best_level= level - 64;
3721             }
3722         }
3723         block[0]= best_level;
3724         s->coded_score[n] = best_score - dc*dc;
3725         if(best_level == 0) return -1;
3726         else                return last_non_zero;
3727     }
3728
3729     i= last_i;
3730     av_assert2(last_level);
3731
3732     block[ perm_scantable[last_non_zero] ]= last_level;
3733     i -= last_run + 1;
3734
3735     for(; i>start_i; i -= run_tab[i] + 1){
3736         block[ perm_scantable[i-1] ]= level_tab[i];
3737     }
3738
3739     return last_non_zero;
3740 }
3741
3742 //#define REFINE_STATS 1
3743 static int16_t basis[64][64];
3744
3745 static void build_basis(uint8_t *perm){
3746     int i, j, x, y;
3747     emms_c();
3748     for(i=0; i<8; i++){
3749         for(j=0; j<8; j++){
3750             for(y=0; y<8; y++){
3751                 for(x=0; x<8; x++){
3752                     double s= 0.25*(1<<BASIS_SHIFT);
3753                     int index= 8*i + j;
3754                     int perm_index= perm[index];
3755                     if(i==0) s*= sqrt(0.5);
3756                     if(j==0) s*= sqrt(0.5);
3757                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3758                 }
3759             }
3760         }
3761     }
3762 }
3763
3764 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3765                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3766                         int n, int qscale){
3767     int16_t rem[64];
3768     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3769     const uint8_t *scantable= s->intra_scantable.scantable;
3770     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3771 //    unsigned int threshold1, threshold2;
3772 //    int bias=0;
3773     int run_tab[65];
3774     int prev_run=0;
3775     int prev_level=0;
3776     int qmul, qadd, start_i, last_non_zero, i, dc;
3777     uint8_t * length;
3778     uint8_t * last_length;
3779     int lambda;
3780     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3781 #ifdef REFINE_STATS
3782 static int count=0;
3783 static int after_last=0;
3784 static int to_zero=0;
3785 static int from_zero=0;
3786 static int raise=0;
3787 static int lower=0;
3788 static int messed_sign=0;
3789 #endif
3790
3791     if(basis[0][0] == 0)
3792         build_basis(s->dsp.idct_permutation);
3793
3794     qmul= qscale*2;
3795     qadd= (qscale-1)|1;
3796     if (s->mb_intra) {
3797         if (!s->h263_aic) {
3798             if (n < 4)
3799                 q = s->y_dc_scale;
3800             else
3801                 q = s->c_dc_scale;
3802         } else{
3803             /* For AIC we skip quant/dequant of INTRADC */
3804             q = 1;
3805             qadd=0;
3806         }
3807         q <<= RECON_SHIFT-3;
3808         /* note: block[0] is assumed to be positive */
3809         dc= block[0]*q;
3810 //        block[0] = (block[0] + (q >> 1)) / q;
3811         start_i = 1;
3812 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3813 //            bias= 1<<(QMAT_SHIFT-1);
3814         length     = s->intra_ac_vlc_length;
3815         last_length= s->intra_ac_vlc_last_length;
3816     } else {
3817         dc= 0;
3818         start_i = 0;
3819         length     = s->inter_ac_vlc_length;
3820         last_length= s->inter_ac_vlc_last_length;
3821     }
3822     last_non_zero = s->block_last_index[n];
3823
3824 #ifdef REFINE_STATS
3825 {START_TIMER
3826 #endif
3827     dc += (1<<(RECON_SHIFT-1));
3828     for(i=0; i<64; i++){
3829         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3830     }
3831 #ifdef REFINE_STATS
3832 STOP_TIMER("memset rem[]")}
3833 #endif
3834     sum=0;
3835     for(i=0; i<64; i++){
3836         int one= 36;
3837         int qns=4;
3838         int w;
3839
3840         w= FFABS(weight[i]) + qns*one;
3841         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3842
3843         weight[i] = w;
3844 //        w=weight[i] = (63*qns + (w/2)) / w;
3845
3846         av_assert2(w>0);
3847         av_assert2(w<(1<<6));
3848         sum += w*w;
3849     }
3850     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3851 #ifdef REFINE_STATS
3852 {START_TIMER
3853 #endif
3854     run=0;
3855     rle_index=0;
3856     for(i=start_i; i<=last_non_zero; i++){
3857         int j= perm_scantable[i];
3858         const int level= block[j];
3859         int coeff;
3860
3861         if(level){
3862             if(level<0) coeff= qmul*level - qadd;
3863             else        coeff= qmul*level + qadd;
3864             run_tab[rle_index++]=run;
3865             run=0;
3866
3867             s->dsp.add_8x8basis(rem, basis[j], coeff);
3868         }else{
3869             run++;
3870         }
3871     }
3872 #ifdef REFINE_STATS
3873 if(last_non_zero>0){
3874 STOP_TIMER("init rem[]")
3875 }
3876 }
3877
3878 {START_TIMER
3879 #endif
3880     for(;;){
3881         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3882         int best_coeff=0;
3883         int best_change=0;
3884         int run2, best_unquant_change=0, analyze_gradient;
3885 #ifdef REFINE_STATS
3886 {START_TIMER
3887 #endif
3888         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3889
3890         if(analyze_gradient){
3891 #ifdef REFINE_STATS
3892 {START_TIMER
3893 #endif
3894             for(i=0; i<64; i++){
3895                 int w= weight[i];
3896
3897                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3898             }
3899 #ifdef REFINE_STATS
3900 STOP_TIMER("rem*w*w")}
3901 {START_TIMER
3902 #endif
3903             s->dsp.fdct(d1);
3904 #ifdef REFINE_STATS
3905 STOP_TIMER("dct")}
3906 #endif
3907         }
3908
3909         if(start_i){
3910             const int level= block[0];
3911             int change, old_coeff;
3912
3913             av_assert2(s->mb_intra);
3914
3915             old_coeff= q*level;
3916
3917             for(change=-1; change<=1; change+=2){
3918                 int new_level= level + change;
3919                 int score, new_coeff;
3920
3921                 new_coeff= q*new_level;
3922                 if(new_coeff >= 2048 || new_coeff < 0)
3923                     continue;
3924
3925                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3926                 if(score<best_score){
3927                     best_score= score;
3928                     best_coeff= 0;
3929                     best_change= change;
3930                     best_unquant_change= new_coeff - old_coeff;
3931                 }
3932             }
3933         }
3934
3935         run=0;
3936         rle_index=0;
3937         run2= run_tab[rle_index++];
3938         prev_level=0;
3939         prev_run=0;
3940
3941         for(i=start_i; i<64; i++){
3942             int j= perm_scantable[i];
3943             const int level= block[j];
3944             int change, old_coeff;
3945
3946             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3947                 break;
3948
3949             if(level){
3950                 if(level<0) old_coeff= qmul*level - qadd;
3951                 else        old_coeff= qmul*level + qadd;
3952                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3953             }else{
3954                 old_coeff=0;
3955                 run2--;
3956                 av_assert2(run2>=0 || i >= last_non_zero );
3957             }
3958
3959             for(change=-1; change<=1; change+=2){
3960                 int new_level= level + change;
3961                 int score, new_coeff, unquant_change;
3962
3963                 score=0;
3964                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3965                    continue;
3966
3967                 if(new_level){
3968                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3969                     else            new_coeff= qmul*new_level + qadd;
3970                     if(new_coeff >= 2048 || new_coeff <= -2048)
3971                         continue;
3972                     //FIXME check for overflow
3973
3974                     if(level){
3975                         if(level < 63 && level > -63){
3976                             if(i < last_non_zero)
3977                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3978                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3979                             else
3980                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3981                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3982                         }
3983                     }else{
3984                         av_assert2(FFABS(new_level)==1);
3985
3986                         if(analyze_gradient){
3987                             int g= d1[ scantable[i] ];
3988                             if(g && (g^new_level) >= 0)
3989                                 continue;
3990                         }
3991
3992                         if(i < last_non_zero){
3993                             int next_i= i + run2 + 1;
3994                             int next_level= block[ perm_scantable[next_i] ] + 64;
3995
3996                             if(next_level&(~127))
3997                                 next_level= 0;
3998
3999                             if(next_i < last_non_zero)
4000                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4001                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4002                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4003                             else
4004                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4005                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4006                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4007                         }else{
4008                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4009                             if(prev_level){
4010                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4011                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4012                             }
4013                         }
4014                     }
4015                 }else{
4016                     new_coeff=0;
4017                     av_assert2(FFABS(level)==1);
4018
4019                     if(i < last_non_zero){
4020                         int next_i= i + run2 + 1;
4021                         int next_level= block[ perm_scantable[next_i] ] + 64;
4022
4023                         if(next_level&(~127))
4024                             next_level= 0;
4025
4026                         if(next_i < last_non_zero)
4027                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4028                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4029                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4030                         else
4031                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4032                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4033                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4034                     }else{
4035                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4036                         if(prev_level){
4037                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4038                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4039                         }
4040                     }
4041                 }
4042
4043                 score *= lambda;
4044
4045                 unquant_change= new_coeff - old_coeff;
4046                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4047
4048                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4049                 if(score<best_score){
4050                     best_score= score;
4051                     best_coeff= i;
4052                     best_change= change;
4053                     best_unquant_change= unquant_change;
4054                 }
4055             }
4056             if(level){
4057                 prev_level= level + 64;
4058                 if(prev_level&(~127))
4059                     prev_level= 0;
4060                 prev_run= run;
4061                 run=0;
4062             }else{
4063                 run++;
4064             }
4065         }
4066 #ifdef REFINE_STATS
4067 STOP_TIMER("iterative step")}
4068 #endif
4069
4070         if(best_change){
4071             int j= perm_scantable[ best_coeff ];
4072
4073             block[j] += best_change;
4074
4075             if(best_coeff > last_non_zero){
4076                 last_non_zero= best_coeff;
4077                 av_assert2(block[j]);
4078 #ifdef REFINE_STATS
4079 after_last++;
4080 #endif
4081             }else{
4082 #ifdef REFINE_STATS
4083 if(block[j]){
4084     if(block[j] - best_change){
4085         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4086             raise++;
4087         }else{
4088             lower++;
4089         }
4090     }else{
4091         from_zero++;
4092     }
4093 }else{
4094     to_zero++;
4095 }
4096 #endif
4097                 for(; last_non_zero>=start_i; last_non_zero--){
4098                     if(block[perm_scantable[last_non_zero]])
4099                         break;
4100                 }
4101             }
4102 #ifdef REFINE_STATS
4103 count++;
4104 if(256*256*256*64 % count == 0){
4105     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4106 }
4107 #endif
4108             run=0;
4109             rle_index=0;
4110             for(i=start_i; i<=last_non_zero; i++){
4111                 int j= perm_scantable[i];
4112                 const int level= block[j];
4113
4114                  if(level){
4115                      run_tab[rle_index++]=run;
4116                      run=0;
4117                  }else{
4118                      run++;
4119                  }
4120             }
4121
4122             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4123         }else{
4124             break;
4125         }
4126     }
4127 #ifdef REFINE_STATS
4128 if(last_non_zero>0){
4129 STOP_TIMER("iterative search")
4130 }
4131 }
4132 #endif
4133
4134     return last_non_zero;
4135 }
4136
4137 int ff_dct_quantize_c(MpegEncContext *s,
4138                         DCTELEM *block, int n,
4139                         int qscale, int *overflow)
4140 {
4141     int i, j, level, last_non_zero, q, start_i;
4142     const int *qmat;
4143     const uint8_t *scantable= s->intra_scantable.scantable;
4144     int bias;
4145     int max=0;
4146     unsigned int threshold1, threshold2;
4147
4148     s->dsp.fdct (block);
4149
4150     if(s->dct_error_sum)
4151         s->denoise_dct(s, block);
4152
4153     if (s->mb_intra) {
4154         if (!s->h263_aic) {
4155             if (n < 4)
4156                 q = s->y_dc_scale;
4157             else
4158                 q = s->c_dc_scale;
4159             q = q << 3;
4160         } else
4161             /* For AIC we skip quant/dequant of INTRADC */
4162             q = 1 << 3;
4163
4164         /* note: block[0] is assumed to be positive */
4165         block[0] = (block[0] + (q >> 1)) / q;
4166         start_i = 1;
4167         last_non_zero = 0;
4168         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4169         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4170     } else {
4171         start_i = 0;
4172         last_non_zero = -1;
4173         qmat = s->q_inter_matrix[qscale];
4174         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4175     }
4176     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4177     threshold2= (threshold1<<1);
4178     for(i=63;i>=start_i;i--) {
4179         j = scantable[i];
4180         level = block[j] * qmat[j];
4181
4182         if(((unsigned)(level+threshold1))>threshold2){
4183             last_non_zero = i;
4184             break;
4185         }else{
4186             block[j]=0;
4187         }
4188     }
4189     for(i=start_i; i<=last_non_zero; i++) {
4190         j = scantable[i];
4191         level = block[j] * qmat[j];
4192
4193 //        if(   bias+level >= (1<<QMAT_SHIFT)
4194 //           || bias-level >= (1<<QMAT_SHIFT)){
4195         if(((unsigned)(level+threshold1))>threshold2){
4196             if(level>0){
4197                 level= (bias + level)>>QMAT_SHIFT;
4198                 block[j]= level;
4199             }else{
4200                 level= (bias - level)>>QMAT_SHIFT;
4201                 block[j]= -level;
4202             }
4203             max |=level;
4204         }else{
4205             block[j]=0;
4206         }
4207     }
4208     *overflow= s->max_qcoeff < max; //overflow might have happened
4209
4210     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4211     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4212         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4213
4214     return last_non_zero;
4215 }
4216
4217 #define OFFSET(x) offsetof(MpegEncContext, x)
4218 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4219 static const AVOption h263_options[] = {
4220     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4221     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4222     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4223     FF_MPV_COMMON_OPTS
4224     { NULL },
4225 };
4226
4227 static const AVClass h263_class = {
4228     .class_name = "H.263 encoder",
4229     .item_name  = av_default_item_name,
4230     .option     = h263_options,
4231     .version    = LIBAVUTIL_VERSION_INT,
4232 };
4233
4234 AVCodec ff_h263_encoder = {
4235     .name           = "h263",
4236     .type           = AVMEDIA_TYPE_VIDEO,
4237     .id             = AV_CODEC_ID_H263,
4238     .priv_data_size = sizeof(MpegEncContext),
4239     .init           = ff_MPV_encode_init,
4240     .encode2        = ff_MPV_encode_picture,
4241     .close          = ff_MPV_encode_end,
4242     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4243     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4244     .priv_class     = &h263_class,
4245 };
4246
4247 static const AVOption h263p_options[] = {
4248     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4249     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4250     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4251     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4252     FF_MPV_COMMON_OPTS
4253     { NULL },
4254 };
4255 static const AVClass h263p_class = {
4256     .class_name = "H.263p encoder",
4257     .item_name  = av_default_item_name,
4258     .option     = h263p_options,
4259     .version    = LIBAVUTIL_VERSION_INT,
4260 };
4261
4262 AVCodec ff_h263p_encoder = {
4263     .name           = "h263p",
4264     .type           = AVMEDIA_TYPE_VIDEO,
4265     .id             = AV_CODEC_ID_H263P,
4266     .priv_data_size = sizeof(MpegEncContext),
4267     .init           = ff_MPV_encode_init,
4268     .encode2        = ff_MPV_encode_picture,
4269     .close          = ff_MPV_encode_end,
4270     .capabilities   = CODEC_CAP_SLICE_THREADS,
4271     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4272     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4273     .priv_class     = &h263p_class,
4274 };
4275
4276 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4277
4278 AVCodec ff_msmpeg4v2_encoder = {
4279     .name           = "msmpeg4v2",
4280     .type           = AVMEDIA_TYPE_VIDEO,
4281     .id             = AV_CODEC_ID_MSMPEG4V2,
4282     .priv_data_size = sizeof(MpegEncContext),
4283     .init           = ff_MPV_encode_init,
4284     .encode2        = ff_MPV_encode_picture,
4285     .close          = ff_MPV_encode_end,
4286     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4287     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4288     .priv_class     = &msmpeg4v2_class,
4289 };
4290
4291 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4292
4293 AVCodec ff_msmpeg4v3_encoder = {
4294     .name           = "msmpeg4",
4295     .type           = AVMEDIA_TYPE_VIDEO,
4296     .id             = AV_CODEC_ID_MSMPEG4V3,
4297     .priv_data_size = sizeof(MpegEncContext),
4298     .init           = ff_MPV_encode_init,
4299     .encode2        = ff_MPV_encode_picture,
4300     .close          = ff_MPV_encode_end,
4301     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4302     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4303     .priv_class     = &msmpeg4v3_class,
4304 };
4305
4306 FF_MPV_GENERIC_CLASS(wmv1)
4307
4308 AVCodec ff_wmv1_encoder = {
4309     .name           = "wmv1",
4310     .type           = AVMEDIA_TYPE_VIDEO,
4311     .id             = AV_CODEC_ID_WMV1,
4312     .priv_data_size = sizeof(MpegEncContext),
4313     .init           = ff_MPV_encode_init,
4314     .encode2        = ff_MPV_encode_picture,
4315     .close          = ff_MPV_encode_end,
4316     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4317     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4318     .priv_class     = &wmv1_class,
4319 };