]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge remote-tracking branch 'qatar/master'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "h263.h"
37 #include "mjpegenc.h"
38 #include "msmpeg4.h"
39 #include "faandct.h"
40 #include "thread.h"
41 #include "aandcttab.h"
42 #include "flv.h"
43 #include "mpeg4video.h"
44 #include "internal.h"
45 #include "bytestream.h"
46 #include <limits.h>
47 #include "sp5x.h"
48
49 //#undef NDEBUG
50 //#include <assert.h>
51
52 static int encode_picture(MpegEncContext *s, int picture_number);
53 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
54 static int sse_mb(MpegEncContext *s);
55 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
56 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
57
58 /* enable all paranoid tests for rounding, overflows, etc... */
59 //#define PARANOID
60
61 //#define DEBUG
62
63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
65
66 const AVOption ff_mpv_generic_options[] = {
67     FF_MPV_COMMON_OPTS
68     { NULL },
69 };
70
71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
72                        uint16_t (*qmat16)[2][64],
73                        const uint16_t *quant_matrix,
74                        int bias, int qmin, int qmax, int intra)
75 {
76     int qscale;
77     int shift = 0;
78
79     for (qscale = qmin; qscale <= qmax; qscale++) {
80         int i;
81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
82             dsp->fdct == ff_jpeg_fdct_islow_10 ||
83             dsp->fdct == ff_faandct) {
84             for (i = 0; i < 64; i++) {
85                 const int j = dsp->idct_permutation[i];
86                 /* 16 <= qscale * quant_matrix[i] <= 7905
87                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
88                  *             19952 <=              x  <= 249205026
89                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
90                  *           3444240 >= (1 << 36) / (x) >= 275 */
91
92                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
93                                         (qscale * quant_matrix[j]));
94             }
95         } else if (dsp->fdct == ff_fdct_ifast) {
96             for (i = 0; i < 64; i++) {
97                 const int j = dsp->idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
105                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
106             }
107         } else {
108             for (i = 0; i < 64; i++) {
109                 const int j = dsp->idct_permutation[i];
110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
111                  * Assume x = qscale * quant_matrix[i]
112                  * So             16 <=              x  <= 7905
113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
114                  * so          32768 >= (1 << 19) / (x) >= 67 */
115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
116                                         (qscale * quant_matrix[j]));
117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
118                 //                    (qscale * quant_matrix[i]);
119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
120                                        (qscale * quant_matrix[j]);
121
122                 if (qmat16[qscale][0][i] == 0 ||
123                     qmat16[qscale][0][i] == 128 * 256)
124                     qmat16[qscale][0][i] = 128 * 256 - 1;
125                 qmat16[qscale][1][i] =
126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
127                                 qmat16[qscale][0][i]);
128             }
129         }
130
131         for (i = intra; i < 64; i++) {
132             int64_t max = 8191;
133             if (dsp->fdct == ff_fdct_ifast) {
134                 max = (8191LL * ff_aanscales[i]) >> 14;
135             }
136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
137                 shift++;
138             }
139         }
140     }
141     if (shift) {
142         av_log(NULL, AV_LOG_INFO,
143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
144                QMAT_SHIFT - shift);
145     }
146 }
147
148 static inline void update_qscale(MpegEncContext *s)
149 {
150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
151                 (FF_LAMBDA_SHIFT + 7);
152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
153
154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
155                  FF_LAMBDA_SHIFT;
156 }
157
158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
159 {
160     int i;
161
162     if (matrix) {
163         put_bits(pb, 1, 1);
164         for (i = 0; i < 64; i++) {
165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
166         }
167     } else
168         put_bits(pb, 1, 0);
169 }
170
171 /**
172  * init s->current_picture.qscale_table from s->lambda_table
173  */
174 void ff_init_qscale_tab(MpegEncContext *s)
175 {
176     int8_t * const qscale_table = s->current_picture.f.qscale_table;
177     int i;
178
179     for (i = 0; i < s->mb_num; i++) {
180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
183                                                   s->avctx->qmax);
184     }
185 }
186
187 static void copy_picture_attributes(MpegEncContext *s,
188                                     AVFrame *dst,
189                                     AVFrame *src)
190 {
191     int i;
192
193     dst->pict_type              = src->pict_type;
194     dst->quality                = src->quality;
195     dst->coded_picture_number   = src->coded_picture_number;
196     dst->display_picture_number = src->display_picture_number;
197     //dst->reference              = src->reference;
198     dst->pts                    = src->pts;
199     dst->interlaced_frame       = src->interlaced_frame;
200     dst->top_field_first        = src->top_field_first;
201
202     if (s->avctx->me_threshold) {
203         if (!src->motion_val[0])
204             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
205         if (!src->mb_type)
206             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
207         if (!src->ref_index[0])
208             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
209         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
210             av_log(s->avctx, AV_LOG_ERROR,
211                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
212                    src->motion_subsample_log2, dst->motion_subsample_log2);
213
214         memcpy(dst->mb_type, src->mb_type,
215                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
216
217         for (i = 0; i < 2; i++) {
218             int stride = ((16 * s->mb_width ) >>
219                           src->motion_subsample_log2) + 1;
220             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
221
222             if (src->motion_val[i] &&
223                 src->motion_val[i] != dst->motion_val[i]) {
224                 memcpy(dst->motion_val[i], src->motion_val[i],
225                        2 * stride * height * sizeof(int16_t));
226             }
227             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
228                 memcpy(dst->ref_index[i], src->ref_index[i],
229                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
230             }
231         }
232     }
233 }
234
235 static void update_duplicate_context_after_me(MpegEncContext *dst,
236                                               MpegEncContext *src)
237 {
238 #define COPY(a) dst->a= src->a
239     COPY(pict_type);
240     COPY(current_picture);
241     COPY(f_code);
242     COPY(b_code);
243     COPY(qscale);
244     COPY(lambda);
245     COPY(lambda2);
246     COPY(picture_in_gop_number);
247     COPY(gop_picture_number);
248     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
249     COPY(progressive_frame);    // FIXME don't set in encode_header
250     COPY(partitioned_frame);    // FIXME don't set in encode_header
251 #undef COPY
252 }
253
254 /**
255  * Set the given MpegEncContext to defaults for encoding.
256  * the changed fields will not depend upon the prior state of the MpegEncContext.
257  */
258 static void MPV_encode_defaults(MpegEncContext *s)
259 {
260     int i;
261     ff_MPV_common_defaults(s);
262
263     for (i = -16; i < 16; i++) {
264         default_fcode_tab[i + MAX_MV] = 1;
265     }
266     s->me.mv_penalty = default_mv_penalty;
267     s->fcode_tab     = default_fcode_tab;
268 }
269
270 /* init video encoder */
271 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
272 {
273     MpegEncContext *s = avctx->priv_data;
274     int i;
275     int chroma_h_shift, chroma_v_shift;
276
277     MPV_encode_defaults(s);
278
279     switch (avctx->codec_id) {
280     case AV_CODEC_ID_MPEG2VIDEO:
281         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
282             avctx->pix_fmt != PIX_FMT_YUV422P) {
283             av_log(avctx, AV_LOG_ERROR,
284                    "only YUV420 and YUV422 are supported\n");
285             return -1;
286         }
287         break;
288     case AV_CODEC_ID_LJPEG:
289         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
290             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
291             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
292             avctx->pix_fmt != PIX_FMT_BGR0     &&
293             avctx->pix_fmt != PIX_FMT_BGRA     &&
294             avctx->pix_fmt != PIX_FMT_BGR24    &&
295             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
296               avctx->pix_fmt != PIX_FMT_YUV422P &&
297               avctx->pix_fmt != PIX_FMT_YUV444P) ||
298              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
299             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
300             return -1;
301         }
302         break;
303     case AV_CODEC_ID_MJPEG:
304     case AV_CODEC_ID_AMV:
305         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
306             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
307             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
308               avctx->pix_fmt != PIX_FMT_YUV422P) ||
309              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
310             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
311             return -1;
312         }
313         break;
314     default:
315         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
316             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
317             return -1;
318         }
319     }
320
321     switch (avctx->pix_fmt) {
322     case PIX_FMT_YUVJ422P:
323     case PIX_FMT_YUV422P:
324         s->chroma_format = CHROMA_422;
325         break;
326     case PIX_FMT_YUVJ420P:
327     case PIX_FMT_YUV420P:
328     default:
329         s->chroma_format = CHROMA_420;
330         break;
331     }
332
333     s->bit_rate = avctx->bit_rate;
334     s->width    = avctx->width;
335     s->height   = avctx->height;
336     if (avctx->gop_size > 600 &&
337         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
338         av_log(avctx, AV_LOG_WARNING,
339                "keyframe interval too large!, reducing it from %d to %d\n",
340                avctx->gop_size, 600);
341         avctx->gop_size = 600;
342     }
343     s->gop_size     = avctx->gop_size;
344     s->avctx        = avctx;
345     s->flags        = avctx->flags;
346     s->flags2       = avctx->flags2;
347     s->max_b_frames = avctx->max_b_frames;
348     s->codec_id     = avctx->codec->id;
349 #if FF_API_MPV_GLOBAL_OPTS
350     if (avctx->luma_elim_threshold)
351         s->luma_elim_threshold   = avctx->luma_elim_threshold;
352     if (avctx->chroma_elim_threshold)
353         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
354 #endif
355     s->strict_std_compliance = avctx->strict_std_compliance;
356     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
357     s->mpeg_quant         = avctx->mpeg_quant;
358     s->rtp_mode           = !!avctx->rtp_payload_size;
359     s->intra_dc_precision = avctx->intra_dc_precision;
360     s->user_specified_pts = AV_NOPTS_VALUE;
361
362     if (s->gop_size <= 1) {
363         s->intra_only = 1;
364         s->gop_size   = 12;
365     } else {
366         s->intra_only = 0;
367     }
368
369     s->me_method = avctx->me_method;
370
371     /* Fixed QSCALE */
372     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
373
374 #if FF_API_MPV_GLOBAL_OPTS
375     if (s->flags & CODEC_FLAG_QP_RD)
376         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
377 #endif
378
379     s->adaptive_quant = (s->avctx->lumi_masking ||
380                          s->avctx->dark_masking ||
381                          s->avctx->temporal_cplx_masking ||
382                          s->avctx->spatial_cplx_masking  ||
383                          s->avctx->p_masking      ||
384                          s->avctx->border_masking ||
385                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
386                         !s->fixed_qscale;
387
388     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
389
390     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
391         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
392         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
393             return -1;
394     }
395
396     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
397         av_log(avctx, AV_LOG_INFO,
398                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
399     }
400
401     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
402         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
403         return -1;
404     }
405
406     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
407         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
408         return -1;
409     }
410
411     if (avctx->rc_max_rate &&
412         avctx->rc_max_rate == avctx->bit_rate &&
413         avctx->rc_max_rate != avctx->rc_min_rate) {
414         av_log(avctx, AV_LOG_INFO,
415                "impossible bitrate constraints, this will fail\n");
416     }
417
418     if (avctx->rc_buffer_size &&
419         avctx->bit_rate * (int64_t)avctx->time_base.num >
420             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
421         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
422         return -1;
423     }
424
425     if (!s->fixed_qscale &&
426         avctx->bit_rate * av_q2d(avctx->time_base) >
427             avctx->bit_rate_tolerance) {
428         av_log(avctx, AV_LOG_ERROR,
429                "bitrate tolerance too small for bitrate\n");
430         return -1;
431     }
432
433     if (s->avctx->rc_max_rate &&
434         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
435         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
436          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
437         90000LL * (avctx->rc_buffer_size - 1) >
438             s->avctx->rc_max_rate * 0xFFFFLL) {
439         av_log(avctx, AV_LOG_INFO,
440                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
441                "specified vbv buffer is too large for the given bitrate!\n");
442     }
443
444     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
445         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
446         s->codec_id != AV_CODEC_ID_FLV1) {
447         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
448         return -1;
449     }
450
451     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
452         av_log(avctx, AV_LOG_ERROR,
453                "OBMC is only supported with simple mb decision\n");
454         return -1;
455     }
456
457     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
458         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
459         return -1;
460     }
461
462     if (s->max_b_frames                    &&
463         s->codec_id != AV_CODEC_ID_MPEG4      &&
464         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
465         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
466         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
467         return -1;
468     }
469
470     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
471          s->codec_id == AV_CODEC_ID_H263  ||
472          s->codec_id == AV_CODEC_ID_H263P) &&
473         (avctx->sample_aspect_ratio.num > 255 ||
474          avctx->sample_aspect_ratio.den > 255)) {
475         av_log(avctx, AV_LOG_WARNING,
476                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
477                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
478         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
479                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
480     }
481
482     if ((s->codec_id == AV_CODEC_ID_H263  ||
483          s->codec_id == AV_CODEC_ID_H263P) &&
484         (avctx->width  > 2048 ||
485          avctx->height > 1152 )) {
486         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
487         return -1;
488     }
489     if ((s->codec_id == AV_CODEC_ID_H263  ||
490          s->codec_id == AV_CODEC_ID_H263P) &&
491         ((avctx->width &3) ||
492          (avctx->height&3) )) {
493         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
494         return -1;
495     }
496
497     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
498         (avctx->width  > 4095 ||
499          avctx->height > 4095 )) {
500         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
501         return -1;
502     }
503
504     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
505         (avctx->width  > 16383 ||
506          avctx->height > 16383 )) {
507         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
508         return -1;
509     }
510
511     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
512          s->codec_id == AV_CODEC_ID_WMV2) &&
513          avctx->width & 1) {
514          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
515          return -1;
516     }
517
518     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
519         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
520         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
521         return -1;
522     }
523
524     // FIXME mpeg2 uses that too
525     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
526         av_log(avctx, AV_LOG_ERROR,
527                "mpeg2 style quantization not supported by codec\n");
528         return -1;
529     }
530
531 #if FF_API_MPV_GLOBAL_OPTS
532     if (s->flags & CODEC_FLAG_CBP_RD)
533         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
534 #endif
535
536     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
537         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
538         return -1;
539     }
540
541     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
542         s->avctx->mb_decision != FF_MB_DECISION_RD) {
543         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
544         return -1;
545     }
546
547     if (s->avctx->scenechange_threshold < 1000000000 &&
548         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
549         av_log(avctx, AV_LOG_ERROR,
550                "closed gop with scene change detection are not supported yet, "
551                "set threshold to 1000000000\n");
552         return -1;
553     }
554
555     if (s->flags & CODEC_FLAG_LOW_DELAY) {
556         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
557             av_log(avctx, AV_LOG_ERROR,
558                   "low delay forcing is only available for mpeg2\n");
559             return -1;
560         }
561         if (s->max_b_frames != 0) {
562             av_log(avctx, AV_LOG_ERROR,
563                    "b frames cannot be used with low delay\n");
564             return -1;
565         }
566     }
567
568     if (s->q_scale_type == 1) {
569         if (avctx->qmax > 12) {
570             av_log(avctx, AV_LOG_ERROR,
571                    "non linear quant only supports qmax <= 12 currently\n");
572             return -1;
573         }
574     }
575
576     if (s->avctx->thread_count > 1         &&
577         s->codec_id != AV_CODEC_ID_MPEG4      &&
578         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
579         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
580         s->codec_id != AV_CODEC_ID_MJPEG      &&
581         (s->codec_id != AV_CODEC_ID_H263P)) {
582         av_log(avctx, AV_LOG_ERROR,
583                "multi threaded encoding not supported by codec\n");
584         return -1;
585     }
586
587     if (s->avctx->thread_count < 1) {
588         av_log(avctx, AV_LOG_ERROR,
589                "automatic thread number detection not supported by codec, "
590                "patch welcome\n");
591         return -1;
592     }
593
594     if (s->avctx->thread_count > 1)
595         s->rtp_mode = 1;
596
597     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
598         s->h263_slice_structured = 1;
599
600     if (!avctx->time_base.den || !avctx->time_base.num) {
601         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
602         return -1;
603     }
604
605     i = (INT_MAX / 2 + 128) >> 8;
606     if (avctx->me_threshold >= i) {
607         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
608                i - 1);
609         return -1;
610     }
611     if (avctx->mb_threshold >= i) {
612         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
613                i - 1);
614         return -1;
615     }
616
617     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
618         av_log(avctx, AV_LOG_INFO,
619                "notice: b_frame_strategy only affects the first pass\n");
620         avctx->b_frame_strategy = 0;
621     }
622
623     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
624     if (i > 1) {
625         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
626         avctx->time_base.den /= i;
627         avctx->time_base.num /= i;
628         //return -1;
629     }
630
631     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
632         // (a + x * 3 / 8) / x
633         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
634         s->inter_quant_bias = 0;
635     } else {
636         s->intra_quant_bias = 0;
637         // (a - x / 4) / x
638         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
639     }
640
641     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
642         s->intra_quant_bias = avctx->intra_quant_bias;
643     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
644         s->inter_quant_bias = avctx->inter_quant_bias;
645
646     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
647
648     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
649                                   &chroma_v_shift);
650
651     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
652         s->avctx->time_base.den > (1 << 16) - 1) {
653         av_log(avctx, AV_LOG_ERROR,
654                "timebase %d/%d not supported by MPEG 4 standard, "
655                "the maximum admitted value for the timebase denominator "
656                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
657                (1 << 16) - 1);
658         return -1;
659     }
660     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
661
662 #if FF_API_MPV_GLOBAL_OPTS
663     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
664         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
665     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
666         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
667     if (avctx->quantizer_noise_shaping)
668         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
669 #endif
670
671     switch (avctx->codec->id) {
672     case AV_CODEC_ID_MPEG1VIDEO:
673         s->out_format = FMT_MPEG1;
674         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
675         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
676         break;
677     case AV_CODEC_ID_MPEG2VIDEO:
678         s->out_format = FMT_MPEG1;
679         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
680         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
681         s->rtp_mode   = 1;
682         break;
683     case AV_CODEC_ID_LJPEG:
684     case AV_CODEC_ID_MJPEG:
685     case AV_CODEC_ID_AMV:
686         s->out_format = FMT_MJPEG;
687         s->intra_only = 1; /* force intra only for jpeg */
688         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
689             (avctx->pix_fmt == PIX_FMT_BGR0
690              || s->avctx->pix_fmt == PIX_FMT_BGRA
691              || s->avctx->pix_fmt == PIX_FMT_BGR24)) {
692             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
693             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
694             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
695         } else {
696             s->mjpeg_vsample[0] = 2;
697             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
698             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
699             s->mjpeg_hsample[0] = 2;
700             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
701             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
702         }
703         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
704             ff_mjpeg_encode_init(s) < 0)
705             return -1;
706         avctx->delay = 0;
707         s->low_delay = 1;
708         break;
709     case AV_CODEC_ID_H261:
710         if (!CONFIG_H261_ENCODER)
711             return -1;
712         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
713             av_log(avctx, AV_LOG_ERROR,
714                    "The specified picture size of %dx%d is not valid for the "
715                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
716                     s->width, s->height);
717             return -1;
718         }
719         s->out_format = FMT_H261;
720         avctx->delay  = 0;
721         s->low_delay  = 1;
722         break;
723     case AV_CODEC_ID_H263:
724         if (!CONFIG_H263_ENCODER)
725             return -1;
726         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
727                              s->width, s->height) == 8) {
728             av_log(avctx, AV_LOG_ERROR,
729                    "The specified picture size of %dx%d is not valid for "
730                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
731                    "352x288, 704x576, and 1408x1152. "
732                    "Try H.263+.\n", s->width, s->height);
733             return -1;
734         }
735         s->out_format = FMT_H263;
736         avctx->delay  = 0;
737         s->low_delay  = 1;
738         break;
739     case AV_CODEC_ID_H263P:
740         s->out_format = FMT_H263;
741         s->h263_plus  = 1;
742         /* Fx */
743         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
744         s->modified_quant  = s->h263_aic;
745         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
746         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
747
748         /* /Fx */
749         /* These are just to be sure */
750         avctx->delay = 0;
751         s->low_delay = 1;
752         break;
753     case AV_CODEC_ID_FLV1:
754         s->out_format      = FMT_H263;
755         s->h263_flv        = 2; /* format = 1; 11-bit codes */
756         s->unrestricted_mv = 1;
757         s->rtp_mode  = 0; /* don't allow GOB */
758         avctx->delay = 0;
759         s->low_delay = 1;
760         break;
761     case AV_CODEC_ID_RV10:
762         s->out_format = FMT_H263;
763         avctx->delay  = 0;
764         s->low_delay  = 1;
765         break;
766     case AV_CODEC_ID_RV20:
767         s->out_format      = FMT_H263;
768         avctx->delay       = 0;
769         s->low_delay       = 1;
770         s->modified_quant  = 1;
771         s->h263_aic        = 1;
772         s->h263_plus       = 1;
773         s->loop_filter     = 1;
774         s->unrestricted_mv = 0;
775         break;
776     case AV_CODEC_ID_MPEG4:
777         s->out_format      = FMT_H263;
778         s->h263_pred       = 1;
779         s->unrestricted_mv = 1;
780         s->low_delay       = s->max_b_frames ? 0 : 1;
781         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
782         break;
783     case AV_CODEC_ID_MSMPEG4V2:
784         s->out_format      = FMT_H263;
785         s->h263_pred       = 1;
786         s->unrestricted_mv = 1;
787         s->msmpeg4_version = 2;
788         avctx->delay       = 0;
789         s->low_delay       = 1;
790         break;
791     case AV_CODEC_ID_MSMPEG4V3:
792         s->out_format        = FMT_H263;
793         s->h263_pred         = 1;
794         s->unrestricted_mv   = 1;
795         s->msmpeg4_version   = 3;
796         s->flipflop_rounding = 1;
797         avctx->delay         = 0;
798         s->low_delay         = 1;
799         break;
800     case AV_CODEC_ID_WMV1:
801         s->out_format        = FMT_H263;
802         s->h263_pred         = 1;
803         s->unrestricted_mv   = 1;
804         s->msmpeg4_version   = 4;
805         s->flipflop_rounding = 1;
806         avctx->delay         = 0;
807         s->low_delay         = 1;
808         break;
809     case AV_CODEC_ID_WMV2:
810         s->out_format        = FMT_H263;
811         s->h263_pred         = 1;
812         s->unrestricted_mv   = 1;
813         s->msmpeg4_version   = 5;
814         s->flipflop_rounding = 1;
815         avctx->delay         = 0;
816         s->low_delay         = 1;
817         break;
818     default:
819         return -1;
820     }
821
822     avctx->has_b_frames = !s->low_delay;
823
824     s->encoding = 1;
825
826     s->progressive_frame    =
827     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
828                                                 CODEC_FLAG_INTERLACED_ME) ||
829                                 s->alternate_scan);
830
831     /* init */
832     if (ff_MPV_common_init(s) < 0)
833         return -1;
834
835     if (ARCH_X86)
836         ff_MPV_encode_init_x86(s);
837
838     if (!s->dct_quantize)
839         s->dct_quantize = ff_dct_quantize_c;
840     if (!s->denoise_dct)
841         s->denoise_dct  = denoise_dct_c;
842     s->fast_dct_quantize = s->dct_quantize;
843     if (avctx->trellis)
844         s->dct_quantize  = dct_quantize_trellis_c;
845
846     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
847         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
848
849     s->quant_precision = 5;
850
851     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
852     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
853
854     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
855         ff_h261_encode_init(s);
856     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
857         ff_h263_encode_init(s);
858     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
859         ff_msmpeg4_encode_init(s);
860     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
861         && s->out_format == FMT_MPEG1)
862         ff_mpeg1_encode_init(s);
863
864     /* init q matrix */
865     for (i = 0; i < 64; i++) {
866         int j = s->dsp.idct_permutation[i];
867         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
868             s->mpeg_quant) {
869             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
870             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
871         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
872             s->intra_matrix[j] =
873             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
874         } else {
875             /* mpeg1/2 */
876             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
877             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
878         }
879         if (s->avctx->intra_matrix)
880             s->intra_matrix[j] = s->avctx->intra_matrix[i];
881         if (s->avctx->inter_matrix)
882             s->inter_matrix[j] = s->avctx->inter_matrix[i];
883     }
884
885     /* precompute matrix */
886     /* for mjpeg, we do include qscale in the matrix */
887     if (s->out_format != FMT_MJPEG) {
888         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
889                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
890                           31, 1);
891         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
892                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
893                           31, 0);
894     }
895
896     if (ff_rate_control_init(s) < 0)
897         return -1;
898
899     return 0;
900 }
901
902 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
903 {
904     MpegEncContext *s = avctx->priv_data;
905
906     ff_rate_control_uninit(s);
907
908     ff_MPV_common_end(s);
909     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
910         s->out_format == FMT_MJPEG)
911         ff_mjpeg_encode_close(s);
912
913     av_freep(&avctx->extradata);
914
915     return 0;
916 }
917
918 static int get_sae(uint8_t *src, int ref, int stride)
919 {
920     int x,y;
921     int acc = 0;
922
923     for (y = 0; y < 16; y++) {
924         for (x = 0; x < 16; x++) {
925             acc += FFABS(src[x + y * stride] - ref);
926         }
927     }
928
929     return acc;
930 }
931
932 static int get_intra_count(MpegEncContext *s, uint8_t *src,
933                            uint8_t *ref, int stride)
934 {
935     int x, y, w, h;
936     int acc = 0;
937
938     w = s->width  & ~15;
939     h = s->height & ~15;
940
941     for (y = 0; y < h; y += 16) {
942         for (x = 0; x < w; x += 16) {
943             int offset = x + y * stride;
944             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
945                                      16);
946             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
947             int sae  = get_sae(src + offset, mean, stride);
948
949             acc += sae + 500 < sad;
950         }
951     }
952     return acc;
953 }
954
955
956 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
957 {
958     AVFrame *pic = NULL;
959     int64_t pts;
960     int i;
961     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
962                                                  (s->low_delay ? 0 : 1);
963     int direct = 1;
964
965     if (pic_arg) {
966         pts = pic_arg->pts;
967         pic_arg->display_picture_number = s->input_picture_number++;
968
969         if (pts != AV_NOPTS_VALUE) {
970             if (s->user_specified_pts != AV_NOPTS_VALUE) {
971                 int64_t time = pts;
972                 int64_t last = s->user_specified_pts;
973
974                 if (time <= last) {
975                     av_log(s->avctx, AV_LOG_ERROR,
976                            "Error, Invalid timestamp=%"PRId64", "
977                            "last=%"PRId64"\n", pts, s->user_specified_pts);
978                     return -1;
979                 }
980
981                 if (!s->low_delay && pic_arg->display_picture_number == 1)
982                     s->dts_delta = time - last;
983             }
984             s->user_specified_pts = pts;
985         } else {
986             if (s->user_specified_pts != AV_NOPTS_VALUE) {
987                 s->user_specified_pts =
988                 pts = s->user_specified_pts + 1;
989                 av_log(s->avctx, AV_LOG_INFO,
990                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
991                        pts);
992             } else {
993                 pts = pic_arg->display_picture_number;
994             }
995         }
996     }
997
998   if (pic_arg) {
999     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
1000         direct = 0;
1001     if (pic_arg->linesize[0] != s->linesize)
1002         direct = 0;
1003     if (pic_arg->linesize[1] != s->uvlinesize)
1004         direct = 0;
1005     if (pic_arg->linesize[2] != s->uvlinesize)
1006         direct = 0;
1007
1008     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
1009     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
1010
1011     if (direct) {
1012         i = ff_find_unused_picture(s, 1);
1013         if (i < 0)
1014             return i;
1015
1016         pic = &s->picture[i].f;
1017         pic->reference = 3;
1018
1019         for (i = 0; i < 4; i++) {
1020             pic->data[i]     = pic_arg->data[i];
1021             pic->linesize[i] = pic_arg->linesize[i];
1022         }
1023         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1024             return -1;
1025         }
1026     } else {
1027         i = ff_find_unused_picture(s, 0);
1028         if (i < 0)
1029             return i;
1030
1031         pic = &s->picture[i].f;
1032         pic->reference = 3;
1033
1034         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1035             return -1;
1036         }
1037
1038         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1039             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1040             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1041             // empty
1042         } else {
1043             int h_chroma_shift, v_chroma_shift;
1044             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1045                                           &v_chroma_shift);
1046
1047             for (i = 0; i < 3; i++) {
1048                 int src_stride = pic_arg->linesize[i];
1049                 int dst_stride = i ? s->uvlinesize : s->linesize;
1050                 int h_shift = i ? h_chroma_shift : 0;
1051                 int v_shift = i ? v_chroma_shift : 0;
1052                 int w = s->width  >> h_shift;
1053                 int h = s->height >> v_shift;
1054                 uint8_t *src = pic_arg->data[i];
1055                 uint8_t *dst = pic->data[i];
1056
1057                 if(s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1058                     h= ((s->height+15)/16*16)>>v_shift;
1059                 }
1060
1061                 if (!s->avctx->rc_buffer_size)
1062                     dst += INPLACE_OFFSET;
1063
1064                 if (src_stride == dst_stride)
1065                     memcpy(dst, src, src_stride * h);
1066                 else {
1067                     while (h--) {
1068                         memcpy(dst, src, w);
1069                         dst += dst_stride;
1070                         src += src_stride;
1071                     }
1072                 }
1073             }
1074         }
1075     }
1076     copy_picture_attributes(s, pic, pic_arg);
1077     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1078   }
1079
1080     /* shift buffer entries */
1081     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1082         s->input_picture[i - 1] = s->input_picture[i];
1083
1084     s->input_picture[encoding_delay] = (Picture*) pic;
1085
1086     return 0;
1087 }
1088
1089 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1090 {
1091     int x, y, plane;
1092     int score = 0;
1093     int64_t score64 = 0;
1094
1095     for (plane = 0; plane < 3; plane++) {
1096         const int stride = p->f.linesize[plane];
1097         const int bw = plane ? 1 : 2;
1098         for (y = 0; y < s->mb_height * bw; y++) {
1099             for (x = 0; x < s->mb_width * bw; x++) {
1100                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1101                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1102                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1103                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1104
1105                 switch (s->avctx->frame_skip_exp) {
1106                 case 0: score    =  FFMAX(score, v);          break;
1107                 case 1: score   += FFABS(v);                  break;
1108                 case 2: score   += v * v;                     break;
1109                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1110                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1111                 }
1112             }
1113         }
1114     }
1115
1116     if (score)
1117         score64 = score;
1118
1119     if (score64 < s->avctx->frame_skip_threshold)
1120         return 1;
1121     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1122         return 1;
1123     return 0;
1124 }
1125
1126 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1127 {
1128     AVPacket pkt = { 0 };
1129     int ret, got_output;
1130
1131     av_init_packet(&pkt);
1132     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1133     if (ret < 0)
1134         return ret;
1135
1136     ret = pkt.size;
1137     av_free_packet(&pkt);
1138     return ret;
1139 }
1140
1141 static int estimate_best_b_count(MpegEncContext *s)
1142 {
1143     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1144     AVCodecContext *c = avcodec_alloc_context3(NULL);
1145     AVFrame input[FF_MAX_B_FRAMES + 2];
1146     const int scale = s->avctx->brd_scale;
1147     int i, j, out_size, p_lambda, b_lambda, lambda2;
1148     int64_t best_rd  = INT64_MAX;
1149     int best_b_count = -1;
1150
1151     av_assert0(scale >= 0 && scale <= 3);
1152
1153     //emms_c();
1154     //s->next_picture_ptr->quality;
1155     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1156     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1157     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1158     if (!b_lambda) // FIXME we should do this somewhere else
1159         b_lambda = p_lambda;
1160     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1161                FF_LAMBDA_SHIFT;
1162
1163     c->width        = s->width  >> scale;
1164     c->height       = s->height >> scale;
1165     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1166                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1167     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1168     c->mb_decision  = s->avctx->mb_decision;
1169     c->me_cmp       = s->avctx->me_cmp;
1170     c->mb_cmp       = s->avctx->mb_cmp;
1171     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1172     c->pix_fmt      = PIX_FMT_YUV420P;
1173     c->time_base    = s->avctx->time_base;
1174     c->max_b_frames = s->max_b_frames;
1175
1176     if (avcodec_open2(c, codec, NULL) < 0)
1177         return -1;
1178
1179     for (i = 0; i < s->max_b_frames + 2; i++) {
1180         int ysize = c->width * c->height;
1181         int csize = (c->width / 2) * (c->height / 2);
1182         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1183                                                 s->next_picture_ptr;
1184
1185         avcodec_get_frame_defaults(&input[i]);
1186         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1187         input[i].data[1]     = input[i].data[0] + ysize;
1188         input[i].data[2]     = input[i].data[1] + csize;
1189         input[i].linesize[0] = c->width;
1190         input[i].linesize[1] =
1191         input[i].linesize[2] = c->width / 2;
1192
1193         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1194             pre_input = *pre_input_ptr;
1195
1196             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1197                 pre_input.f.data[0] += INPLACE_OFFSET;
1198                 pre_input.f.data[1] += INPLACE_OFFSET;
1199                 pre_input.f.data[2] += INPLACE_OFFSET;
1200             }
1201
1202             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1203                                  pre_input.f.data[0], pre_input.f.linesize[0],
1204                                  c->width,      c->height);
1205             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1206                                  pre_input.f.data[1], pre_input.f.linesize[1],
1207                                  c->width >> 1, c->height >> 1);
1208             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1209                                  pre_input.f.data[2], pre_input.f.linesize[2],
1210                                  c->width >> 1, c->height >> 1);
1211         }
1212     }
1213
1214     for (j = 0; j < s->max_b_frames + 1; j++) {
1215         int64_t rd = 0;
1216
1217         if (!s->input_picture[j])
1218             break;
1219
1220         c->error[0] = c->error[1] = c->error[2] = 0;
1221
1222         input[0].pict_type = AV_PICTURE_TYPE_I;
1223         input[0].quality   = 1 * FF_QP2LAMBDA;
1224
1225         out_size = encode_frame(c, &input[0]);
1226
1227         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1228
1229         for (i = 0; i < s->max_b_frames + 1; i++) {
1230             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1231
1232             input[i + 1].pict_type = is_p ?
1233                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1234             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1235
1236             out_size = encode_frame(c, &input[i + 1]);
1237
1238             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1239         }
1240
1241         /* get the delayed frames */
1242         while (out_size) {
1243             out_size = encode_frame(c, NULL);
1244             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1245         }
1246
1247         rd += c->error[0] + c->error[1] + c->error[2];
1248
1249         if (rd < best_rd) {
1250             best_rd = rd;
1251             best_b_count = j;
1252         }
1253     }
1254
1255     avcodec_close(c);
1256     av_freep(&c);
1257
1258     for (i = 0; i < s->max_b_frames + 2; i++) {
1259         av_freep(&input[i].data[0]);
1260     }
1261
1262     return best_b_count;
1263 }
1264
1265 static int select_input_picture(MpegEncContext *s)
1266 {
1267     int i;
1268
1269     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1270         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1271     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1272
1273     /* set next picture type & ordering */
1274     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1275         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1276             s->next_picture_ptr == NULL || s->intra_only) {
1277             s->reordered_input_picture[0] = s->input_picture[0];
1278             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1279             s->reordered_input_picture[0]->f.coded_picture_number =
1280                 s->coded_picture_number++;
1281         } else {
1282             int b_frames;
1283
1284             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1285                 if (s->picture_in_gop_number < s->gop_size &&
1286                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1287                     // FIXME check that te gop check above is +-1 correct
1288                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1289                     //       s->input_picture[0]->f.data[0],
1290                     //       s->input_picture[0]->pts);
1291
1292                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1293                         for (i = 0; i < 4; i++)
1294                             s->input_picture[0]->f.data[i] = NULL;
1295                         s->input_picture[0]->f.type = 0;
1296                     } else {
1297                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1298                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1299
1300                         s->avctx->release_buffer(s->avctx,
1301                                                  &s->input_picture[0]->f);
1302                     }
1303
1304                     emms_c();
1305                     ff_vbv_update(s, 0);
1306
1307                     goto no_output_pic;
1308                 }
1309             }
1310
1311             if (s->flags & CODEC_FLAG_PASS2) {
1312                 for (i = 0; i < s->max_b_frames + 1; i++) {
1313                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1314
1315                     if (pict_num >= s->rc_context.num_entries)
1316                         break;
1317                     if (!s->input_picture[i]) {
1318                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1319                         break;
1320                     }
1321
1322                     s->input_picture[i]->f.pict_type =
1323                         s->rc_context.entry[pict_num].new_pict_type;
1324                 }
1325             }
1326
1327             if (s->avctx->b_frame_strategy == 0) {
1328                 b_frames = s->max_b_frames;
1329                 while (b_frames && !s->input_picture[b_frames])
1330                     b_frames--;
1331             } else if (s->avctx->b_frame_strategy == 1) {
1332                 for (i = 1; i < s->max_b_frames + 1; i++) {
1333                     if (s->input_picture[i] &&
1334                         s->input_picture[i]->b_frame_score == 0) {
1335                         s->input_picture[i]->b_frame_score =
1336                             get_intra_count(s,
1337                                             s->input_picture[i    ]->f.data[0],
1338                                             s->input_picture[i - 1]->f.data[0],
1339                                             s->linesize) + 1;
1340                     }
1341                 }
1342                 for (i = 0; i < s->max_b_frames + 1; i++) {
1343                     if (s->input_picture[i] == NULL ||
1344                         s->input_picture[i]->b_frame_score - 1 >
1345                             s->mb_num / s->avctx->b_sensitivity)
1346                         break;
1347                 }
1348
1349                 b_frames = FFMAX(0, i - 1);
1350
1351                 /* reset scores */
1352                 for (i = 0; i < b_frames + 1; i++) {
1353                     s->input_picture[i]->b_frame_score = 0;
1354                 }
1355             } else if (s->avctx->b_frame_strategy == 2) {
1356                 b_frames = estimate_best_b_count(s);
1357             } else {
1358                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1359                 b_frames = 0;
1360             }
1361
1362             emms_c();
1363             //static int b_count = 0;
1364             //b_count += b_frames;
1365             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1366
1367             for (i = b_frames - 1; i >= 0; i--) {
1368                 int type = s->input_picture[i]->f.pict_type;
1369                 if (type && type != AV_PICTURE_TYPE_B)
1370                     b_frames = i;
1371             }
1372             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1373                 b_frames == s->max_b_frames) {
1374                 av_log(s->avctx, AV_LOG_ERROR,
1375                        "warning, too many b frames in a row\n");
1376             }
1377
1378             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1379                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1380                     s->gop_size > s->picture_in_gop_number) {
1381                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1382                 } else {
1383                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1384                         b_frames = 0;
1385                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1386                 }
1387             }
1388
1389             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1390                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1391                 b_frames--;
1392
1393             s->reordered_input_picture[0] = s->input_picture[b_frames];
1394             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1395                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1396             s->reordered_input_picture[0]->f.coded_picture_number =
1397                 s->coded_picture_number++;
1398             for (i = 0; i < b_frames; i++) {
1399                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1400                 s->reordered_input_picture[i + 1]->f.pict_type =
1401                     AV_PICTURE_TYPE_B;
1402                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1403                     s->coded_picture_number++;
1404             }
1405         }
1406     }
1407 no_output_pic:
1408     if (s->reordered_input_picture[0]) {
1409         s->reordered_input_picture[0]->f.reference =
1410            s->reordered_input_picture[0]->f.pict_type !=
1411                AV_PICTURE_TYPE_B ? 3 : 0;
1412
1413         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1414
1415         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1416             s->avctx->rc_buffer_size) {
1417             // input is a shared pix, so we can't modifiy it -> alloc a new
1418             // one & ensure that the shared one is reuseable
1419
1420             Picture *pic;
1421             int i = ff_find_unused_picture(s, 0);
1422             if (i < 0)
1423                 return i;
1424             pic = &s->picture[i];
1425
1426             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1427             if (ff_alloc_picture(s, pic, 0) < 0) {
1428                 return -1;
1429             }
1430
1431             /* mark us unused / free shared pic */
1432             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1433                 s->avctx->release_buffer(s->avctx,
1434                                          &s->reordered_input_picture[0]->f);
1435             for (i = 0; i < 4; i++)
1436                 s->reordered_input_picture[0]->f.data[i] = NULL;
1437             s->reordered_input_picture[0]->f.type = 0;
1438
1439             copy_picture_attributes(s, &pic->f,
1440                                     &s->reordered_input_picture[0]->f);
1441
1442             s->current_picture_ptr = pic;
1443         } else {
1444             // input is not a shared pix -> reuse buffer for current_pix
1445
1446             assert(s->reordered_input_picture[0]->f.type ==
1447                        FF_BUFFER_TYPE_USER ||
1448                    s->reordered_input_picture[0]->f.type ==
1449                        FF_BUFFER_TYPE_INTERNAL);
1450
1451             s->current_picture_ptr = s->reordered_input_picture[0];
1452             for (i = 0; i < 4; i++) {
1453                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1454             }
1455         }
1456         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1457
1458         s->picture_number = s->new_picture.f.display_picture_number;
1459         //printf("dpn:%d\n", s->picture_number);
1460     } else {
1461         memset(&s->new_picture, 0, sizeof(Picture));
1462     }
1463     return 0;
1464 }
1465
1466 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1467                           AVFrame *pic_arg, int *got_packet)
1468 {
1469     MpegEncContext *s = avctx->priv_data;
1470     int i, stuffing_count, ret;
1471     int context_count = s->slice_context_count;
1472
1473     s->picture_in_gop_number++;
1474
1475     if (load_input_picture(s, pic_arg) < 0)
1476         return -1;
1477
1478     if (select_input_picture(s) < 0) {
1479         return -1;
1480     }
1481
1482     /* output? */
1483     if (s->new_picture.f.data[0]) {
1484         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1485             return ret;
1486         if (s->mb_info) {
1487             s->mb_info_ptr = av_packet_new_side_data(pkt,
1488                                  AV_PKT_DATA_H263_MB_INFO,
1489                                  s->mb_width*s->mb_height*12);
1490             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1491         }
1492
1493         for (i = 0; i < context_count; i++) {
1494             int start_y = s->thread_context[i]->start_mb_y;
1495             int   end_y = s->thread_context[i]->  end_mb_y;
1496             int h       = s->mb_height;
1497             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1498             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1499
1500             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1501         }
1502
1503         s->pict_type = s->new_picture.f.pict_type;
1504         //emms_c();
1505         //printf("qs:%f %f %d\n", s->new_picture.quality,
1506         //       s->current_picture.quality, s->qscale);
1507         ff_MPV_frame_start(s, avctx);
1508 vbv_retry:
1509         if (encode_picture(s, s->picture_number) < 0)
1510             return -1;
1511
1512         avctx->header_bits = s->header_bits;
1513         avctx->mv_bits     = s->mv_bits;
1514         avctx->misc_bits   = s->misc_bits;
1515         avctx->i_tex_bits  = s->i_tex_bits;
1516         avctx->p_tex_bits  = s->p_tex_bits;
1517         avctx->i_count     = s->i_count;
1518         // FIXME f/b_count in avctx
1519         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1520         avctx->skip_count  = s->skip_count;
1521
1522         ff_MPV_frame_end(s);
1523
1524         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1525             ff_mjpeg_encode_picture_trailer(s);
1526
1527         if (avctx->rc_buffer_size) {
1528             RateControlContext *rcc = &s->rc_context;
1529             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1530
1531             if (put_bits_count(&s->pb) > max_size &&
1532                 s->lambda < s->avctx->lmax) {
1533                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1534                                        (s->qscale + 1) / s->qscale);
1535                 if (s->adaptive_quant) {
1536                     int i;
1537                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1538                         s->lambda_table[i] =
1539                             FFMAX(s->lambda_table[i] + 1,
1540                                   s->lambda_table[i] * (s->qscale + 1) /
1541                                   s->qscale);
1542                 }
1543                 s->mb_skipped = 0;        // done in MPV_frame_start()
1544                 // done in encode_picture() so we must undo it
1545                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1546                     if (s->flipflop_rounding          ||
1547                         s->codec_id == AV_CODEC_ID_H263P ||
1548                         s->codec_id == AV_CODEC_ID_MPEG4)
1549                         s->no_rounding ^= 1;
1550                 }
1551                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1552                     s->time_base       = s->last_time_base;
1553                     s->last_non_b_time = s->time - s->pp_time;
1554                 }
1555                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1556                 for (i = 0; i < context_count; i++) {
1557                     PutBitContext *pb = &s->thread_context[i]->pb;
1558                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1559                 }
1560                 goto vbv_retry;
1561             }
1562
1563             assert(s->avctx->rc_max_rate);
1564         }
1565
1566         if (s->flags & CODEC_FLAG_PASS1)
1567             ff_write_pass1_stats(s);
1568
1569         for (i = 0; i < 4; i++) {
1570             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1571             avctx->error[i] += s->current_picture_ptr->f.error[i];
1572         }
1573
1574         if (s->flags & CODEC_FLAG_PASS1)
1575             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1576                    avctx->i_tex_bits + avctx->p_tex_bits ==
1577                        put_bits_count(&s->pb));
1578         flush_put_bits(&s->pb);
1579         s->frame_bits  = put_bits_count(&s->pb);
1580
1581         stuffing_count = ff_vbv_update(s, s->frame_bits);
1582         if (stuffing_count) {
1583             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1584                     stuffing_count + 50) {
1585                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1586                 return -1;
1587             }
1588
1589             switch (s->codec_id) {
1590             case AV_CODEC_ID_MPEG1VIDEO:
1591             case AV_CODEC_ID_MPEG2VIDEO:
1592                 while (stuffing_count--) {
1593                     put_bits(&s->pb, 8, 0);
1594                 }
1595             break;
1596             case AV_CODEC_ID_MPEG4:
1597                 put_bits(&s->pb, 16, 0);
1598                 put_bits(&s->pb, 16, 0x1C3);
1599                 stuffing_count -= 4;
1600                 while (stuffing_count--) {
1601                     put_bits(&s->pb, 8, 0xFF);
1602                 }
1603             break;
1604             default:
1605                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1606             }
1607             flush_put_bits(&s->pb);
1608             s->frame_bits  = put_bits_count(&s->pb);
1609         }
1610
1611         /* update mpeg1/2 vbv_delay for CBR */
1612         if (s->avctx->rc_max_rate                          &&
1613             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1614             s->out_format == FMT_MPEG1                     &&
1615             90000LL * (avctx->rc_buffer_size - 1) <=
1616                 s->avctx->rc_max_rate * 0xFFFFLL) {
1617             int vbv_delay, min_delay;
1618             double inbits  = s->avctx->rc_max_rate *
1619                              av_q2d(s->avctx->time_base);
1620             int    minbits = s->frame_bits - 8 *
1621                              (s->vbv_delay_ptr - s->pb.buf - 1);
1622             double bits    = s->rc_context.buffer_index + minbits - inbits;
1623
1624             if (bits < 0)
1625                 av_log(s->avctx, AV_LOG_ERROR,
1626                        "Internal error, negative bits\n");
1627
1628             assert(s->repeat_first_field == 0);
1629
1630             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1631             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1632                         s->avctx->rc_max_rate;
1633
1634             vbv_delay = FFMAX(vbv_delay, min_delay);
1635
1636             av_assert0(vbv_delay < 0xFFFF);
1637
1638             s->vbv_delay_ptr[0] &= 0xF8;
1639             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1640             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1641             s->vbv_delay_ptr[2] &= 0x07;
1642             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1643             avctx->vbv_delay     = vbv_delay * 300;
1644         }
1645         s->total_bits     += s->frame_bits;
1646         avctx->frame_bits  = s->frame_bits;
1647
1648         pkt->pts = s->current_picture.f.pts;
1649         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1650             if (!s->current_picture.f.coded_picture_number)
1651                 pkt->dts = pkt->pts - s->dts_delta;
1652             else
1653                 pkt->dts = s->reordered_pts;
1654             s->reordered_pts = pkt->pts;
1655         } else
1656             pkt->dts = pkt->pts;
1657         if (s->current_picture.f.key_frame)
1658             pkt->flags |= AV_PKT_FLAG_KEY;
1659         if (s->mb_info)
1660             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1661     } else {
1662         s->frame_bits = 0;
1663     }
1664     assert((s->frame_bits & 7) == 0);
1665
1666     pkt->size = s->frame_bits / 8;
1667     *got_packet = !!pkt->size;
1668     return 0;
1669 }
1670
1671 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1672                                                 int n, int threshold)
1673 {
1674     static const char tab[64] = {
1675         3, 2, 2, 1, 1, 1, 1, 1,
1676         1, 1, 1, 1, 1, 1, 1, 1,
1677         1, 1, 1, 1, 1, 1, 1, 1,
1678         0, 0, 0, 0, 0, 0, 0, 0,
1679         0, 0, 0, 0, 0, 0, 0, 0,
1680         0, 0, 0, 0, 0, 0, 0, 0,
1681         0, 0, 0, 0, 0, 0, 0, 0,
1682         0, 0, 0, 0, 0, 0, 0, 0
1683     };
1684     int score = 0;
1685     int run = 0;
1686     int i;
1687     DCTELEM *block = s->block[n];
1688     const int last_index = s->block_last_index[n];
1689     int skip_dc;
1690
1691     if (threshold < 0) {
1692         skip_dc = 0;
1693         threshold = -threshold;
1694     } else
1695         skip_dc = 1;
1696
1697     /* Are all we could set to zero already zero? */
1698     if (last_index <= skip_dc - 1)
1699         return;
1700
1701     for (i = 0; i <= last_index; i++) {
1702         const int j = s->intra_scantable.permutated[i];
1703         const int level = FFABS(block[j]);
1704         if (level == 1) {
1705             if (skip_dc && i == 0)
1706                 continue;
1707             score += tab[run];
1708             run = 0;
1709         } else if (level > 1) {
1710             return;
1711         } else {
1712             run++;
1713         }
1714     }
1715     if (score >= threshold)
1716         return;
1717     for (i = skip_dc; i <= last_index; i++) {
1718         const int j = s->intra_scantable.permutated[i];
1719         block[j] = 0;
1720     }
1721     if (block[0])
1722         s->block_last_index[n] = 0;
1723     else
1724         s->block_last_index[n] = -1;
1725 }
1726
1727 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1728                                int last_index)
1729 {
1730     int i;
1731     const int maxlevel = s->max_qcoeff;
1732     const int minlevel = s->min_qcoeff;
1733     int overflow = 0;
1734
1735     if (s->mb_intra) {
1736         i = 1; // skip clipping of intra dc
1737     } else
1738         i = 0;
1739
1740     for (; i <= last_index; i++) {
1741         const int j = s->intra_scantable.permutated[i];
1742         int level = block[j];
1743
1744         if (level > maxlevel) {
1745             level = maxlevel;
1746             overflow++;
1747         } else if (level < minlevel) {
1748             level = minlevel;
1749             overflow++;
1750         }
1751
1752         block[j] = level;
1753     }
1754
1755     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1756         av_log(s->avctx, AV_LOG_INFO,
1757                "warning, clipping %d dct coefficients to %d..%d\n",
1758                overflow, minlevel, maxlevel);
1759 }
1760
1761 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1762 {
1763     int x, y;
1764     // FIXME optimize
1765     for (y = 0; y < 8; y++) {
1766         for (x = 0; x < 8; x++) {
1767             int x2, y2;
1768             int sum = 0;
1769             int sqr = 0;
1770             int count = 0;
1771
1772             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1773                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1774                     int v = ptr[x2 + y2 * stride];
1775                     sum += v;
1776                     sqr += v * v;
1777                     count++;
1778                 }
1779             }
1780             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1781         }
1782     }
1783 }
1784
1785 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1786                                                 int motion_x, int motion_y,
1787                                                 int mb_block_height,
1788                                                 int mb_block_count)
1789 {
1790     int16_t weight[8][64];
1791     DCTELEM orig[8][64];
1792     const int mb_x = s->mb_x;
1793     const int mb_y = s->mb_y;
1794     int i;
1795     int skip_dct[8];
1796     int dct_offset = s->linesize * 8; // default for progressive frames
1797     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1798     int wrap_y, wrap_c;
1799
1800     for (i = 0; i < mb_block_count; i++)
1801         skip_dct[i] = s->skipdct;
1802
1803     if (s->adaptive_quant) {
1804         const int last_qp = s->qscale;
1805         const int mb_xy = mb_x + mb_y * s->mb_stride;
1806
1807         s->lambda = s->lambda_table[mb_xy];
1808         update_qscale(s);
1809
1810         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1811             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1812             s->dquant = s->qscale - last_qp;
1813
1814             if (s->out_format == FMT_H263) {
1815                 s->dquant = av_clip(s->dquant, -2, 2);
1816
1817                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1818                     if (!s->mb_intra) {
1819                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1820                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1821                                 s->dquant = 0;
1822                         }
1823                         if (s->mv_type == MV_TYPE_8X8)
1824                             s->dquant = 0;
1825                     }
1826                 }
1827             }
1828         }
1829         ff_set_qscale(s, last_qp + s->dquant);
1830     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1831         ff_set_qscale(s, s->qscale + s->dquant);
1832
1833     wrap_y = s->linesize;
1834     wrap_c = s->uvlinesize;
1835     ptr_y  = s->new_picture.f.data[0] +
1836              (mb_y * 16 * wrap_y)              + mb_x * 16;
1837     ptr_cb = s->new_picture.f.data[1] +
1838              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1839     ptr_cr = s->new_picture.f.data[2] +
1840              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1841
1842     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1843         uint8_t *ebuf = s->edge_emu_buffer + 32;
1844         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1845                                 mb_y * 16, s->width, s->height);
1846         ptr_y = ebuf;
1847         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1848                                 mb_block_height, mb_x * 8, mb_y * 8,
1849                                 (s->width+1) >> 1, (s->height+1) >> 1);
1850         ptr_cb = ebuf + 18 * wrap_y;
1851         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1852                                 mb_block_height, mb_x * 8, mb_y * 8,
1853                                 (s->width+1) >> 1, (s->height+1) >> 1);
1854         ptr_cr = ebuf + 18 * wrap_y + 8;
1855     }
1856
1857     if (s->mb_intra) {
1858         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1859             int progressive_score, interlaced_score;
1860
1861             s->interlaced_dct = 0;
1862             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1863                                                     NULL, wrap_y, 8) +
1864                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1865                                                     NULL, wrap_y, 8) - 400;
1866
1867             if (progressive_score > 0) {
1868                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1869                                                        NULL, wrap_y * 2, 8) +
1870                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1871                                                        NULL, wrap_y * 2, 8);
1872                 if (progressive_score > interlaced_score) {
1873                     s->interlaced_dct = 1;
1874
1875                     dct_offset = wrap_y;
1876                     wrap_y <<= 1;
1877                     if (s->chroma_format == CHROMA_422)
1878                         wrap_c <<= 1;
1879                 }
1880             }
1881         }
1882
1883         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1884         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1885         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1886         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1887
1888         if (s->flags & CODEC_FLAG_GRAY) {
1889             skip_dct[4] = 1;
1890             skip_dct[5] = 1;
1891         } else {
1892             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1893             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1894             if (!s->chroma_y_shift) { /* 422 */
1895                 s->dsp.get_pixels(s->block[6],
1896                                   ptr_cb + (dct_offset >> 1), wrap_c);
1897                 s->dsp.get_pixels(s->block[7],
1898                                   ptr_cr + (dct_offset >> 1), wrap_c);
1899             }
1900         }
1901     } else {
1902         op_pixels_func (*op_pix)[4];
1903         qpel_mc_func (*op_qpix)[16];
1904         uint8_t *dest_y, *dest_cb, *dest_cr;
1905
1906         dest_y  = s->dest[0];
1907         dest_cb = s->dest[1];
1908         dest_cr = s->dest[2];
1909
1910         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1911             op_pix  = s->dsp.put_pixels_tab;
1912             op_qpix = s->dsp.put_qpel_pixels_tab;
1913         } else {
1914             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1915             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1916         }
1917
1918         if (s->mv_dir & MV_DIR_FORWARD) {
1919             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1920                           s->last_picture.f.data,
1921                           op_pix, op_qpix);
1922             op_pix  = s->dsp.avg_pixels_tab;
1923             op_qpix = s->dsp.avg_qpel_pixels_tab;
1924         }
1925         if (s->mv_dir & MV_DIR_BACKWARD) {
1926             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1927                           s->next_picture.f.data,
1928                           op_pix, op_qpix);
1929         }
1930
1931         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1932             int progressive_score, interlaced_score;
1933
1934             s->interlaced_dct = 0;
1935             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1936                                                     ptr_y,              wrap_y,
1937                                                     8) +
1938                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1939                                                     ptr_y + wrap_y * 8, wrap_y,
1940                                                     8) - 400;
1941
1942             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1943                 progressive_score -= 400;
1944
1945             if (progressive_score > 0) {
1946                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1947                                                        ptr_y,
1948                                                        wrap_y * 2, 8) +
1949                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1950                                                        ptr_y + wrap_y,
1951                                                        wrap_y * 2, 8);
1952
1953                 if (progressive_score > interlaced_score) {
1954                     s->interlaced_dct = 1;
1955
1956                     dct_offset = wrap_y;
1957                     wrap_y <<= 1;
1958                     if (s->chroma_format == CHROMA_422)
1959                         wrap_c <<= 1;
1960                 }
1961             }
1962         }
1963
1964         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1965         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1966         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1967                            dest_y + dct_offset, wrap_y);
1968         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1969                            dest_y + dct_offset + 8, wrap_y);
1970
1971         if (s->flags & CODEC_FLAG_GRAY) {
1972             skip_dct[4] = 1;
1973             skip_dct[5] = 1;
1974         } else {
1975             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1976             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1977             if (!s->chroma_y_shift) { /* 422 */
1978                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1979                                    dest_cb + (dct_offset >> 1), wrap_c);
1980                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1981                                    dest_cr + (dct_offset >> 1), wrap_c);
1982             }
1983         }
1984         /* pre quantization */
1985         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1986                 2 * s->qscale * s->qscale) {
1987             // FIXME optimize
1988             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1989                               wrap_y, 8) < 20 * s->qscale)
1990                 skip_dct[0] = 1;
1991             if (s->dsp.sad[1](NULL, ptr_y + 8,
1992                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1993                 skip_dct[1] = 1;
1994             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1995                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1996                 skip_dct[2] = 1;
1997             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1998                               dest_y + dct_offset + 8,
1999                               wrap_y, 8) < 20 * s->qscale)
2000                 skip_dct[3] = 1;
2001             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2002                               wrap_c, 8) < 20 * s->qscale)
2003                 skip_dct[4] = 1;
2004             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2005                               wrap_c, 8) < 20 * s->qscale)
2006                 skip_dct[5] = 1;
2007             if (!s->chroma_y_shift) { /* 422 */
2008                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2009                                   dest_cb + (dct_offset >> 1),
2010                                   wrap_c, 8) < 20 * s->qscale)
2011                     skip_dct[6] = 1;
2012                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2013                                   dest_cr + (dct_offset >> 1),
2014                                   wrap_c, 8) < 20 * s->qscale)
2015                     skip_dct[7] = 1;
2016             }
2017         }
2018     }
2019
2020     if (s->quantizer_noise_shaping) {
2021         if (!skip_dct[0])
2022             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2023         if (!skip_dct[1])
2024             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2025         if (!skip_dct[2])
2026             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2027         if (!skip_dct[3])
2028             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2029         if (!skip_dct[4])
2030             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2031         if (!skip_dct[5])
2032             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2033         if (!s->chroma_y_shift) { /* 422 */
2034             if (!skip_dct[6])
2035                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2036                                   wrap_c);
2037             if (!skip_dct[7])
2038                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2039                                   wrap_c);
2040         }
2041         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
2042     }
2043
2044     /* DCT & quantize */
2045     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2046     {
2047         for (i = 0; i < mb_block_count; i++) {
2048             if (!skip_dct[i]) {
2049                 int overflow;
2050                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2051                 // FIXME we could decide to change to quantizer instead of
2052                 // clipping
2053                 // JS: I don't think that would be a good idea it could lower
2054                 //     quality instead of improve it. Just INTRADC clipping
2055                 //     deserves changes in quantizer
2056                 if (overflow)
2057                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2058             } else
2059                 s->block_last_index[i] = -1;
2060         }
2061         if (s->quantizer_noise_shaping) {
2062             for (i = 0; i < mb_block_count; i++) {
2063                 if (!skip_dct[i]) {
2064                     s->block_last_index[i] =
2065                         dct_quantize_refine(s, s->block[i], weight[i],
2066                                             orig[i], i, s->qscale);
2067                 }
2068             }
2069         }
2070
2071         if (s->luma_elim_threshold && !s->mb_intra)
2072             for (i = 0; i < 4; i++)
2073                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2074         if (s->chroma_elim_threshold && !s->mb_intra)
2075             for (i = 4; i < mb_block_count; i++)
2076                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2077
2078         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2079             for (i = 0; i < mb_block_count; i++) {
2080                 if (s->block_last_index[i] == -1)
2081                     s->coded_score[i] = INT_MAX / 256;
2082             }
2083         }
2084     }
2085
2086     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2087         s->block_last_index[4] =
2088         s->block_last_index[5] = 0;
2089         s->block[4][0] =
2090         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2091     }
2092
2093     // non c quantize code returns incorrect block_last_index FIXME
2094     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2095         for (i = 0; i < mb_block_count; i++) {
2096             int j;
2097             if (s->block_last_index[i] > 0) {
2098                 for (j = 63; j > 0; j--) {
2099                     if (s->block[i][s->intra_scantable.permutated[j]])
2100                         break;
2101                 }
2102                 s->block_last_index[i] = j;
2103             }
2104         }
2105     }
2106
2107     /* huffman encode */
2108     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2109     case AV_CODEC_ID_MPEG1VIDEO:
2110     case AV_CODEC_ID_MPEG2VIDEO:
2111         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2112             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2113         break;
2114     case AV_CODEC_ID_MPEG4:
2115         if (CONFIG_MPEG4_ENCODER)
2116             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2117         break;
2118     case AV_CODEC_ID_MSMPEG4V2:
2119     case AV_CODEC_ID_MSMPEG4V3:
2120     case AV_CODEC_ID_WMV1:
2121         if (CONFIG_MSMPEG4_ENCODER)
2122             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2123         break;
2124     case AV_CODEC_ID_WMV2:
2125         if (CONFIG_WMV2_ENCODER)
2126             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2127         break;
2128     case AV_CODEC_ID_H261:
2129         if (CONFIG_H261_ENCODER)
2130             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2131         break;
2132     case AV_CODEC_ID_H263:
2133     case AV_CODEC_ID_H263P:
2134     case AV_CODEC_ID_FLV1:
2135     case AV_CODEC_ID_RV10:
2136     case AV_CODEC_ID_RV20:
2137         if (CONFIG_H263_ENCODER)
2138             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2139         break;
2140     case AV_CODEC_ID_MJPEG:
2141     case AV_CODEC_ID_AMV:
2142         if (CONFIG_MJPEG_ENCODER)
2143             ff_mjpeg_encode_mb(s, s->block);
2144         break;
2145     default:
2146         av_assert1(0);
2147     }
2148 }
2149
2150 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2151 {
2152     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2153     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2154 }
2155
2156 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2157     int i;
2158
2159     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2160
2161     /* mpeg1 */
2162     d->mb_skip_run= s->mb_skip_run;
2163     for(i=0; i<3; i++)
2164         d->last_dc[i] = s->last_dc[i];
2165
2166     /* statistics */
2167     d->mv_bits= s->mv_bits;
2168     d->i_tex_bits= s->i_tex_bits;
2169     d->p_tex_bits= s->p_tex_bits;
2170     d->i_count= s->i_count;
2171     d->f_count= s->f_count;
2172     d->b_count= s->b_count;
2173     d->skip_count= s->skip_count;
2174     d->misc_bits= s->misc_bits;
2175     d->last_bits= 0;
2176
2177     d->mb_skipped= 0;
2178     d->qscale= s->qscale;
2179     d->dquant= s->dquant;
2180
2181     d->esc3_level_length= s->esc3_level_length;
2182 }
2183
2184 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2185     int i;
2186
2187     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2188     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2189
2190     /* mpeg1 */
2191     d->mb_skip_run= s->mb_skip_run;
2192     for(i=0; i<3; i++)
2193         d->last_dc[i] = s->last_dc[i];
2194
2195     /* statistics */
2196     d->mv_bits= s->mv_bits;
2197     d->i_tex_bits= s->i_tex_bits;
2198     d->p_tex_bits= s->p_tex_bits;
2199     d->i_count= s->i_count;
2200     d->f_count= s->f_count;
2201     d->b_count= s->b_count;
2202     d->skip_count= s->skip_count;
2203     d->misc_bits= s->misc_bits;
2204
2205     d->mb_intra= s->mb_intra;
2206     d->mb_skipped= s->mb_skipped;
2207     d->mv_type= s->mv_type;
2208     d->mv_dir= s->mv_dir;
2209     d->pb= s->pb;
2210     if(s->data_partitioning){
2211         d->pb2= s->pb2;
2212         d->tex_pb= s->tex_pb;
2213     }
2214     d->block= s->block;
2215     for(i=0; i<8; i++)
2216         d->block_last_index[i]= s->block_last_index[i];
2217     d->interlaced_dct= s->interlaced_dct;
2218     d->qscale= s->qscale;
2219
2220     d->esc3_level_length= s->esc3_level_length;
2221 }
2222
2223 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2224                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2225                            int *dmin, int *next_block, int motion_x, int motion_y)
2226 {
2227     int score;
2228     uint8_t *dest_backup[3];
2229
2230     copy_context_before_encode(s, backup, type);
2231
2232     s->block= s->blocks[*next_block];
2233     s->pb= pb[*next_block];
2234     if(s->data_partitioning){
2235         s->pb2   = pb2   [*next_block];
2236         s->tex_pb= tex_pb[*next_block];
2237     }
2238
2239     if(*next_block){
2240         memcpy(dest_backup, s->dest, sizeof(s->dest));
2241         s->dest[0] = s->rd_scratchpad;
2242         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2243         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2244         assert(s->linesize >= 32); //FIXME
2245     }
2246
2247     encode_mb(s, motion_x, motion_y);
2248
2249     score= put_bits_count(&s->pb);
2250     if(s->data_partitioning){
2251         score+= put_bits_count(&s->pb2);
2252         score+= put_bits_count(&s->tex_pb);
2253     }
2254
2255     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2256         ff_MPV_decode_mb(s, s->block);
2257
2258         score *= s->lambda2;
2259         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2260     }
2261
2262     if(*next_block){
2263         memcpy(s->dest, dest_backup, sizeof(s->dest));
2264     }
2265
2266     if(score<*dmin){
2267         *dmin= score;
2268         *next_block^=1;
2269
2270         copy_context_after_encode(best, s, type);
2271     }
2272 }
2273
2274 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2275     uint32_t *sq = ff_squareTbl + 256;
2276     int acc=0;
2277     int x,y;
2278
2279     if(w==16 && h==16)
2280         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2281     else if(w==8 && h==8)
2282         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2283
2284     for(y=0; y<h; y++){
2285         for(x=0; x<w; x++){
2286             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2287         }
2288     }
2289
2290     av_assert2(acc>=0);
2291
2292     return acc;
2293 }
2294
2295 static int sse_mb(MpegEncContext *s){
2296     int w= 16;
2297     int h= 16;
2298
2299     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2300     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2301
2302     if(w==16 && h==16)
2303       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2304         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2305                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2306                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2307       }else{
2308         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2309                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2310                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2311       }
2312     else
2313         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2314                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2315                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2316 }
2317
2318 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2319     MpegEncContext *s= *(void**)arg;
2320
2321
2322     s->me.pre_pass=1;
2323     s->me.dia_size= s->avctx->pre_dia_size;
2324     s->first_slice_line=1;
2325     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2326         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2327             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2328         }
2329         s->first_slice_line=0;
2330     }
2331
2332     s->me.pre_pass=0;
2333
2334     return 0;
2335 }
2336
2337 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2338     MpegEncContext *s= *(void**)arg;
2339
2340     ff_check_alignment();
2341
2342     s->me.dia_size= s->avctx->dia_size;
2343     s->first_slice_line=1;
2344     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2345         s->mb_x=0; //for block init below
2346         ff_init_block_index(s);
2347         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2348             s->block_index[0]+=2;
2349             s->block_index[1]+=2;
2350             s->block_index[2]+=2;
2351             s->block_index[3]+=2;
2352
2353             /* compute motion vector & mb_type and store in context */
2354             if(s->pict_type==AV_PICTURE_TYPE_B)
2355                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2356             else
2357                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2358         }
2359         s->first_slice_line=0;
2360     }
2361     return 0;
2362 }
2363
2364 static int mb_var_thread(AVCodecContext *c, void *arg){
2365     MpegEncContext *s= *(void**)arg;
2366     int mb_x, mb_y;
2367
2368     ff_check_alignment();
2369
2370     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2371         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2372             int xx = mb_x * 16;
2373             int yy = mb_y * 16;
2374             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2375             int varc;
2376             int sum = s->dsp.pix_sum(pix, s->linesize);
2377
2378             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2379
2380             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2381             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2382             s->me.mb_var_sum_temp    += varc;
2383         }
2384     }
2385     return 0;
2386 }
2387
2388 static void write_slice_end(MpegEncContext *s){
2389     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2390         if(s->partitioned_frame){
2391             ff_mpeg4_merge_partitions(s);
2392         }
2393
2394         ff_mpeg4_stuffing(&s->pb);
2395     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2396         ff_mjpeg_encode_stuffing(s);
2397     }
2398
2399     avpriv_align_put_bits(&s->pb);
2400     flush_put_bits(&s->pb);
2401
2402     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2403         s->misc_bits+= get_bits_diff(s);
2404 }
2405
2406 static void write_mb_info(MpegEncContext *s)
2407 {
2408     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2409     int offset = put_bits_count(&s->pb);
2410     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2411     int gobn = s->mb_y / s->gob_index;
2412     int pred_x, pred_y;
2413     if (CONFIG_H263_ENCODER)
2414         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2415     bytestream_put_le32(&ptr, offset);
2416     bytestream_put_byte(&ptr, s->qscale);
2417     bytestream_put_byte(&ptr, gobn);
2418     bytestream_put_le16(&ptr, mba);
2419     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2420     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2421     /* 4MV not implemented */
2422     bytestream_put_byte(&ptr, 0); /* hmv2 */
2423     bytestream_put_byte(&ptr, 0); /* vmv2 */
2424 }
2425
2426 static void update_mb_info(MpegEncContext *s, int startcode)
2427 {
2428     if (!s->mb_info)
2429         return;
2430     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2431         s->mb_info_size += 12;
2432         s->prev_mb_info = s->last_mb_info;
2433     }
2434     if (startcode) {
2435         s->prev_mb_info = put_bits_count(&s->pb)/8;
2436         /* This might have incremented mb_info_size above, and we return without
2437          * actually writing any info into that slot yet. But in that case,
2438          * this will be called again at the start of the after writing the
2439          * start code, actually writing the mb info. */
2440         return;
2441     }
2442
2443     s->last_mb_info = put_bits_count(&s->pb)/8;
2444     if (!s->mb_info_size)
2445         s->mb_info_size += 12;
2446     write_mb_info(s);
2447 }
2448
2449 static int encode_thread(AVCodecContext *c, void *arg){
2450     MpegEncContext *s= *(void**)arg;
2451     int mb_x, mb_y, pdif = 0;
2452     int chr_h= 16>>s->chroma_y_shift;
2453     int i, j;
2454     MpegEncContext best_s, backup_s;
2455     uint8_t bit_buf[2][MAX_MB_BYTES];
2456     uint8_t bit_buf2[2][MAX_MB_BYTES];
2457     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2458     PutBitContext pb[2], pb2[2], tex_pb[2];
2459 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2460
2461     ff_check_alignment();
2462
2463     for(i=0; i<2; i++){
2464         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2465         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2466         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2467     }
2468
2469     s->last_bits= put_bits_count(&s->pb);
2470     s->mv_bits=0;
2471     s->misc_bits=0;
2472     s->i_tex_bits=0;
2473     s->p_tex_bits=0;
2474     s->i_count=0;
2475     s->f_count=0;
2476     s->b_count=0;
2477     s->skip_count=0;
2478
2479     for(i=0; i<3; i++){
2480         /* init last dc values */
2481         /* note: quant matrix value (8) is implied here */
2482         s->last_dc[i] = 128 << s->intra_dc_precision;
2483
2484         s->current_picture.f.error[i] = 0;
2485     }
2486     if(s->codec_id==AV_CODEC_ID_AMV){
2487         s->last_dc[0] = 128*8/13;
2488         s->last_dc[1] = 128*8/14;
2489         s->last_dc[2] = 128*8/14;
2490     }
2491     s->mb_skip_run = 0;
2492     memset(s->last_mv, 0, sizeof(s->last_mv));
2493
2494     s->last_mv_dir = 0;
2495
2496     switch(s->codec_id){
2497     case AV_CODEC_ID_H263:
2498     case AV_CODEC_ID_H263P:
2499     case AV_CODEC_ID_FLV1:
2500         if (CONFIG_H263_ENCODER)
2501             s->gob_index = ff_h263_get_gob_height(s);
2502         break;
2503     case AV_CODEC_ID_MPEG4:
2504         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2505             ff_mpeg4_init_partitions(s);
2506         break;
2507     }
2508
2509     s->resync_mb_x=0;
2510     s->resync_mb_y=0;
2511     s->first_slice_line = 1;
2512     s->ptr_lastgob = s->pb.buf;
2513     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2514 //    printf("row %d at %X\n", s->mb_y, (int)s);
2515         s->mb_x=0;
2516         s->mb_y= mb_y;
2517
2518         ff_set_qscale(s, s->qscale);
2519         ff_init_block_index(s);
2520
2521         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2522             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2523             int mb_type= s->mb_type[xy];
2524 //            int d;
2525             int dmin= INT_MAX;
2526             int dir;
2527
2528             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2529                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2530                 return -1;
2531             }
2532             if(s->data_partitioning){
2533                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2534                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2535                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2536                     return -1;
2537                 }
2538             }
2539
2540             s->mb_x = mb_x;
2541             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2542             ff_update_block_index(s);
2543
2544             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2545                 ff_h261_reorder_mb_index(s);
2546                 xy= s->mb_y*s->mb_stride + s->mb_x;
2547                 mb_type= s->mb_type[xy];
2548             }
2549
2550             /* write gob / video packet header  */
2551             if(s->rtp_mode){
2552                 int current_packet_size, is_gob_start;
2553
2554                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2555
2556                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2557
2558                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2559
2560                 switch(s->codec_id){
2561                 case AV_CODEC_ID_H263:
2562                 case AV_CODEC_ID_H263P:
2563                     if(!s->h263_slice_structured)
2564                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2565                     break;
2566                 case AV_CODEC_ID_MPEG2VIDEO:
2567                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2568                 case AV_CODEC_ID_MPEG1VIDEO:
2569                     if(s->mb_skip_run) is_gob_start=0;
2570                     break;
2571                 case AV_CODEC_ID_MJPEG:
2572                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2573                     break;
2574                 }
2575
2576                 if(is_gob_start){
2577                     if(s->start_mb_y != mb_y || mb_x!=0){
2578                         write_slice_end(s);
2579                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2580                             ff_mpeg4_init_partitions(s);
2581                         }
2582                     }
2583
2584                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2585                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2586
2587                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2588                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2589                         int d= 100 / s->avctx->error_rate;
2590                         if(r % d == 0){
2591                             current_packet_size=0;
2592                             s->pb.buf_ptr= s->ptr_lastgob;
2593                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2594                         }
2595                     }
2596
2597                     if (s->avctx->rtp_callback){
2598                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2599                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2600                     }
2601                     update_mb_info(s, 1);
2602
2603                     switch(s->codec_id){
2604                     case AV_CODEC_ID_MPEG4:
2605                         if (CONFIG_MPEG4_ENCODER) {
2606                             ff_mpeg4_encode_video_packet_header(s);
2607                             ff_mpeg4_clean_buffers(s);
2608                         }
2609                     break;
2610                     case AV_CODEC_ID_MPEG1VIDEO:
2611                     case AV_CODEC_ID_MPEG2VIDEO:
2612                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2613                             ff_mpeg1_encode_slice_header(s);
2614                             ff_mpeg1_clean_buffers(s);
2615                         }
2616                     break;
2617                     case AV_CODEC_ID_H263:
2618                     case AV_CODEC_ID_H263P:
2619                         if (CONFIG_H263_ENCODER)
2620                             ff_h263_encode_gob_header(s, mb_y);
2621                     break;
2622                     }
2623
2624                     if(s->flags&CODEC_FLAG_PASS1){
2625                         int bits= put_bits_count(&s->pb);
2626                         s->misc_bits+= bits - s->last_bits;
2627                         s->last_bits= bits;
2628                     }
2629
2630                     s->ptr_lastgob += current_packet_size;
2631                     s->first_slice_line=1;
2632                     s->resync_mb_x=mb_x;
2633                     s->resync_mb_y=mb_y;
2634                 }
2635             }
2636
2637             if(  (s->resync_mb_x   == s->mb_x)
2638                && s->resync_mb_y+1 == s->mb_y){
2639                 s->first_slice_line=0;
2640             }
2641
2642             s->mb_skipped=0;
2643             s->dquant=0; //only for QP_RD
2644
2645             update_mb_info(s, 0);
2646
2647             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2648                 int next_block=0;
2649                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2650
2651                 copy_context_before_encode(&backup_s, s, -1);
2652                 backup_s.pb= s->pb;
2653                 best_s.data_partitioning= s->data_partitioning;
2654                 best_s.partitioned_frame= s->partitioned_frame;
2655                 if(s->data_partitioning){
2656                     backup_s.pb2= s->pb2;
2657                     backup_s.tex_pb= s->tex_pb;
2658                 }
2659
2660                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2661                     s->mv_dir = MV_DIR_FORWARD;
2662                     s->mv_type = MV_TYPE_16X16;
2663                     s->mb_intra= 0;
2664                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2665                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2666                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2667                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2668                 }
2669                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2670                     s->mv_dir = MV_DIR_FORWARD;
2671                     s->mv_type = MV_TYPE_FIELD;
2672                     s->mb_intra= 0;
2673                     for(i=0; i<2; i++){
2674                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2675                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2676                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2677                     }
2678                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2679                                  &dmin, &next_block, 0, 0);
2680                 }
2681                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2682                     s->mv_dir = MV_DIR_FORWARD;
2683                     s->mv_type = MV_TYPE_16X16;
2684                     s->mb_intra= 0;
2685                     s->mv[0][0][0] = 0;
2686                     s->mv[0][0][1] = 0;
2687                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2688                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2689                 }
2690                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2691                     s->mv_dir = MV_DIR_FORWARD;
2692                     s->mv_type = MV_TYPE_8X8;
2693                     s->mb_intra= 0;
2694                     for(i=0; i<4; i++){
2695                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2696                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2697                     }
2698                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2699                                  &dmin, &next_block, 0, 0);
2700                 }
2701                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2702                     s->mv_dir = MV_DIR_FORWARD;
2703                     s->mv_type = MV_TYPE_16X16;
2704                     s->mb_intra= 0;
2705                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2706                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2707                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2708                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2709                 }
2710                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2711                     s->mv_dir = MV_DIR_BACKWARD;
2712                     s->mv_type = MV_TYPE_16X16;
2713                     s->mb_intra= 0;
2714                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2715                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2716                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2717                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2718                 }
2719                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2720                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2721                     s->mv_type = MV_TYPE_16X16;
2722                     s->mb_intra= 0;
2723                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2724                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2725                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2726                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2727                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2728                                  &dmin, &next_block, 0, 0);
2729                 }
2730                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2731                     s->mv_dir = MV_DIR_FORWARD;
2732                     s->mv_type = MV_TYPE_FIELD;
2733                     s->mb_intra= 0;
2734                     for(i=0; i<2; i++){
2735                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2736                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2737                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2738                     }
2739                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2740                                  &dmin, &next_block, 0, 0);
2741                 }
2742                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2743                     s->mv_dir = MV_DIR_BACKWARD;
2744                     s->mv_type = MV_TYPE_FIELD;
2745                     s->mb_intra= 0;
2746                     for(i=0; i<2; i++){
2747                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2748                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2749                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2750                     }
2751                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2752                                  &dmin, &next_block, 0, 0);
2753                 }
2754                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2755                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2756                     s->mv_type = MV_TYPE_FIELD;
2757                     s->mb_intra= 0;
2758                     for(dir=0; dir<2; dir++){
2759                         for(i=0; i<2; i++){
2760                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2761                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2762                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2763                         }
2764                     }
2765                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2766                                  &dmin, &next_block, 0, 0);
2767                 }
2768                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2769                     s->mv_dir = 0;
2770                     s->mv_type = MV_TYPE_16X16;
2771                     s->mb_intra= 1;
2772                     s->mv[0][0][0] = 0;
2773                     s->mv[0][0][1] = 0;
2774                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2775                                  &dmin, &next_block, 0, 0);
2776                     if(s->h263_pred || s->h263_aic){
2777                         if(best_s.mb_intra)
2778                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2779                         else
2780                             ff_clean_intra_table_entries(s); //old mode?
2781                     }
2782                 }
2783
2784                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2785                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2786                         const int last_qp= backup_s.qscale;
2787                         int qpi, qp, dc[6];
2788                         DCTELEM ac[6][16];
2789                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2790                         static const int dquant_tab[4]={-1,1,-2,2};
2791
2792                         av_assert2(backup_s.dquant == 0);
2793
2794                         //FIXME intra
2795                         s->mv_dir= best_s.mv_dir;
2796                         s->mv_type = MV_TYPE_16X16;
2797                         s->mb_intra= best_s.mb_intra;
2798                         s->mv[0][0][0] = best_s.mv[0][0][0];
2799                         s->mv[0][0][1] = best_s.mv[0][0][1];
2800                         s->mv[1][0][0] = best_s.mv[1][0][0];
2801                         s->mv[1][0][1] = best_s.mv[1][0][1];
2802
2803                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2804                         for(; qpi<4; qpi++){
2805                             int dquant= dquant_tab[qpi];
2806                             qp= last_qp + dquant;
2807                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2808                                 continue;
2809                             backup_s.dquant= dquant;
2810                             if(s->mb_intra && s->dc_val[0]){
2811                                 for(i=0; i<6; i++){
2812                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2813                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2814                                 }
2815                             }
2816
2817                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2818                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2819                             if(best_s.qscale != qp){
2820                                 if(s->mb_intra && s->dc_val[0]){
2821                                     for(i=0; i<6; i++){
2822                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2823                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2824                                     }
2825                                 }
2826                             }
2827                         }
2828                     }
2829                 }
2830                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2831                     int mx= s->b_direct_mv_table[xy][0];
2832                     int my= s->b_direct_mv_table[xy][1];
2833
2834                     backup_s.dquant = 0;
2835                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2836                     s->mb_intra= 0;
2837                     ff_mpeg4_set_direct_mv(s, mx, my);
2838                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2839                                  &dmin, &next_block, mx, my);
2840                 }
2841                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2842                     backup_s.dquant = 0;
2843                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2844                     s->mb_intra= 0;
2845                     ff_mpeg4_set_direct_mv(s, 0, 0);
2846                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2847                                  &dmin, &next_block, 0, 0);
2848                 }
2849                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2850                     int coded=0;
2851                     for(i=0; i<6; i++)
2852                         coded |= s->block_last_index[i];
2853                     if(coded){
2854                         int mx,my;
2855                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2856                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2857                             mx=my=0; //FIXME find the one we actually used
2858                             ff_mpeg4_set_direct_mv(s, mx, my);
2859                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2860                             mx= s->mv[1][0][0];
2861                             my= s->mv[1][0][1];
2862                         }else{
2863                             mx= s->mv[0][0][0];
2864                             my= s->mv[0][0][1];
2865                         }
2866
2867                         s->mv_dir= best_s.mv_dir;
2868                         s->mv_type = best_s.mv_type;
2869                         s->mb_intra= 0;
2870 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2871                         s->mv[0][0][1] = best_s.mv[0][0][1];
2872                         s->mv[1][0][0] = best_s.mv[1][0][0];
2873                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2874                         backup_s.dquant= 0;
2875                         s->skipdct=1;
2876                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2877                                         &dmin, &next_block, mx, my);
2878                         s->skipdct=0;
2879                     }
2880                 }
2881
2882                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2883
2884                 copy_context_after_encode(s, &best_s, -1);
2885
2886                 pb_bits_count= put_bits_count(&s->pb);
2887                 flush_put_bits(&s->pb);
2888                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2889                 s->pb= backup_s.pb;
2890
2891                 if(s->data_partitioning){
2892                     pb2_bits_count= put_bits_count(&s->pb2);
2893                     flush_put_bits(&s->pb2);
2894                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2895                     s->pb2= backup_s.pb2;
2896
2897                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2898                     flush_put_bits(&s->tex_pb);
2899                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2900                     s->tex_pb= backup_s.tex_pb;
2901                 }
2902                 s->last_bits= put_bits_count(&s->pb);
2903
2904                 if (CONFIG_H263_ENCODER &&
2905                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2906                     ff_h263_update_motion_val(s);
2907
2908                 if(next_block==0){ //FIXME 16 vs linesize16
2909                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2910                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2911                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2912                 }
2913
2914                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2915                     ff_MPV_decode_mb(s, s->block);
2916             } else {
2917                 int motion_x = 0, motion_y = 0;
2918                 s->mv_type=MV_TYPE_16X16;
2919                 // only one MB-Type possible
2920
2921                 switch(mb_type){
2922                 case CANDIDATE_MB_TYPE_INTRA:
2923                     s->mv_dir = 0;
2924                     s->mb_intra= 1;
2925                     motion_x= s->mv[0][0][0] = 0;
2926                     motion_y= s->mv[0][0][1] = 0;
2927                     break;
2928                 case CANDIDATE_MB_TYPE_INTER:
2929                     s->mv_dir = MV_DIR_FORWARD;
2930                     s->mb_intra= 0;
2931                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2932                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2933                     break;
2934                 case CANDIDATE_MB_TYPE_INTER_I:
2935                     s->mv_dir = MV_DIR_FORWARD;
2936                     s->mv_type = MV_TYPE_FIELD;
2937                     s->mb_intra= 0;
2938                     for(i=0; i<2; i++){
2939                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2940                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2941                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2942                     }
2943                     break;
2944                 case CANDIDATE_MB_TYPE_INTER4V:
2945                     s->mv_dir = MV_DIR_FORWARD;
2946                     s->mv_type = MV_TYPE_8X8;
2947                     s->mb_intra= 0;
2948                     for(i=0; i<4; i++){
2949                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2950                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2951                     }
2952                     break;
2953                 case CANDIDATE_MB_TYPE_DIRECT:
2954                     if (CONFIG_MPEG4_ENCODER) {
2955                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2956                         s->mb_intra= 0;
2957                         motion_x=s->b_direct_mv_table[xy][0];
2958                         motion_y=s->b_direct_mv_table[xy][1];
2959                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2960                     }
2961                     break;
2962                 case CANDIDATE_MB_TYPE_DIRECT0:
2963                     if (CONFIG_MPEG4_ENCODER) {
2964                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2965                         s->mb_intra= 0;
2966                         ff_mpeg4_set_direct_mv(s, 0, 0);
2967                     }
2968                     break;
2969                 case CANDIDATE_MB_TYPE_BIDIR:
2970                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2971                     s->mb_intra= 0;
2972                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2973                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2974                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2975                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2976                     break;
2977                 case CANDIDATE_MB_TYPE_BACKWARD:
2978                     s->mv_dir = MV_DIR_BACKWARD;
2979                     s->mb_intra= 0;
2980                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2981                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2982                     break;
2983                 case CANDIDATE_MB_TYPE_FORWARD:
2984                     s->mv_dir = MV_DIR_FORWARD;
2985                     s->mb_intra= 0;
2986                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2987                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2988 //                    printf(" %d %d ", motion_x, motion_y);
2989                     break;
2990                 case CANDIDATE_MB_TYPE_FORWARD_I:
2991                     s->mv_dir = MV_DIR_FORWARD;
2992                     s->mv_type = MV_TYPE_FIELD;
2993                     s->mb_intra= 0;
2994                     for(i=0; i<2; i++){
2995                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2996                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2997                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2998                     }
2999                     break;
3000                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3001                     s->mv_dir = MV_DIR_BACKWARD;
3002                     s->mv_type = MV_TYPE_FIELD;
3003                     s->mb_intra= 0;
3004                     for(i=0; i<2; i++){
3005                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3006                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3007                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3008                     }
3009                     break;
3010                 case CANDIDATE_MB_TYPE_BIDIR_I:
3011                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3012                     s->mv_type = MV_TYPE_FIELD;
3013                     s->mb_intra= 0;
3014                     for(dir=0; dir<2; dir++){
3015                         for(i=0; i<2; i++){
3016                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3017                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3018                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3019                         }
3020                     }
3021                     break;
3022                 default:
3023                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3024                 }
3025
3026                 encode_mb(s, motion_x, motion_y);
3027
3028                 // RAL: Update last macroblock type
3029                 s->last_mv_dir = s->mv_dir;
3030
3031                 if (CONFIG_H263_ENCODER &&
3032                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3033                     ff_h263_update_motion_val(s);
3034
3035                 ff_MPV_decode_mb(s, s->block);
3036             }
3037
3038             /* clean the MV table in IPS frames for direct mode in B frames */
3039             if(s->mb_intra /* && I,P,S_TYPE */){
3040                 s->p_mv_table[xy][0]=0;
3041                 s->p_mv_table[xy][1]=0;
3042             }
3043
3044             if(s->flags&CODEC_FLAG_PSNR){
3045                 int w= 16;
3046                 int h= 16;
3047
3048                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3049                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3050
3051                 s->current_picture.f.error[0] += sse(
3052                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3053                     s->dest[0], w, h, s->linesize);
3054                 s->current_picture.f.error[1] += sse(
3055                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3056                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3057                 s->current_picture.f.error[2] += sse(
3058                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3059                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3060             }
3061             if(s->loop_filter){
3062                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3063                     ff_h263_loop_filter(s);
3064             }
3065 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3066         }
3067     }
3068
3069     //not beautiful here but we must write it before flushing so it has to be here
3070     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3071         ff_msmpeg4_encode_ext_header(s);
3072
3073     write_slice_end(s);
3074
3075     /* Send the last GOB if RTP */
3076     if (s->avctx->rtp_callback) {
3077         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3078         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3079         /* Call the RTP callback to send the last GOB */
3080         emms_c();
3081         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3082     }
3083
3084     return 0;
3085 }
3086
3087 #define MERGE(field) dst->field += src->field; src->field=0
3088 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3089     MERGE(me.scene_change_score);
3090     MERGE(me.mc_mb_var_sum_temp);
3091     MERGE(me.mb_var_sum_temp);
3092 }
3093
3094 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3095     int i;
3096
3097     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3098     MERGE(dct_count[1]);
3099     MERGE(mv_bits);
3100     MERGE(i_tex_bits);
3101     MERGE(p_tex_bits);
3102     MERGE(i_count);
3103     MERGE(f_count);
3104     MERGE(b_count);
3105     MERGE(skip_count);
3106     MERGE(misc_bits);
3107     MERGE(error_count);
3108     MERGE(padding_bug_score);
3109     MERGE(current_picture.f.error[0]);
3110     MERGE(current_picture.f.error[1]);
3111     MERGE(current_picture.f.error[2]);
3112
3113     if(dst->avctx->noise_reduction){
3114         for(i=0; i<64; i++){
3115             MERGE(dct_error_sum[0][i]);
3116             MERGE(dct_error_sum[1][i]);
3117         }
3118     }
3119
3120     assert(put_bits_count(&src->pb) % 8 ==0);
3121     assert(put_bits_count(&dst->pb) % 8 ==0);
3122     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3123     flush_put_bits(&dst->pb);
3124 }
3125
3126 static int estimate_qp(MpegEncContext *s, int dry_run){
3127     if (s->next_lambda){
3128         s->current_picture_ptr->f.quality =
3129         s->current_picture.f.quality = s->next_lambda;
3130         if(!dry_run) s->next_lambda= 0;
3131     } else if (!s->fixed_qscale) {
3132         s->current_picture_ptr->f.quality =
3133         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3134         if (s->current_picture.f.quality < 0)
3135             return -1;
3136     }
3137
3138     if(s->adaptive_quant){
3139         switch(s->codec_id){
3140         case AV_CODEC_ID_MPEG4:
3141             if (CONFIG_MPEG4_ENCODER)
3142                 ff_clean_mpeg4_qscales(s);
3143             break;
3144         case AV_CODEC_ID_H263:
3145         case AV_CODEC_ID_H263P:
3146         case AV_CODEC_ID_FLV1:
3147             if (CONFIG_H263_ENCODER)
3148                 ff_clean_h263_qscales(s);
3149             break;
3150         default:
3151             ff_init_qscale_tab(s);
3152         }
3153
3154         s->lambda= s->lambda_table[0];
3155         //FIXME broken
3156     }else
3157         s->lambda = s->current_picture.f.quality;
3158 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3159     update_qscale(s);
3160     return 0;
3161 }
3162
3163 /* must be called before writing the header */
3164 static void set_frame_distances(MpegEncContext * s){
3165     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3166     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3167
3168     if(s->pict_type==AV_PICTURE_TYPE_B){
3169         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3170         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3171     }else{
3172         s->pp_time= s->time - s->last_non_b_time;
3173         s->last_non_b_time= s->time;
3174         assert(s->picture_number==0 || s->pp_time > 0);
3175     }
3176 }
3177
3178 static int encode_picture(MpegEncContext *s, int picture_number)
3179 {
3180     int i;
3181     int bits;
3182     int context_count = s->slice_context_count;
3183
3184     s->picture_number = picture_number;
3185
3186     /* Reset the average MB variance */
3187     s->me.mb_var_sum_temp    =
3188     s->me.mc_mb_var_sum_temp = 0;
3189
3190     /* we need to initialize some time vars before we can encode b-frames */
3191     // RAL: Condition added for MPEG1VIDEO
3192     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3193         set_frame_distances(s);
3194     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3195         ff_set_mpeg4_time(s);
3196
3197     s->me.scene_change_score=0;
3198
3199 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3200
3201     if(s->pict_type==AV_PICTURE_TYPE_I){
3202         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3203         else                        s->no_rounding=0;
3204     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3205         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3206             s->no_rounding ^= 1;
3207     }
3208
3209     if(s->flags & CODEC_FLAG_PASS2){
3210         if (estimate_qp(s,1) < 0)
3211             return -1;
3212         ff_get_2pass_fcode(s);
3213     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3214         if(s->pict_type==AV_PICTURE_TYPE_B)
3215             s->lambda= s->last_lambda_for[s->pict_type];
3216         else
3217             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3218         update_qscale(s);
3219     }
3220
3221     if(s->codec_id != AV_CODEC_ID_AMV){
3222         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3223         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3224         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3225         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3226     }
3227
3228     s->mb_intra=0; //for the rate distortion & bit compare functions
3229     for(i=1; i<context_count; i++){
3230         ff_update_duplicate_context(s->thread_context[i], s);
3231     }
3232
3233     if(ff_init_me(s)<0)
3234         return -1;
3235
3236     /* Estimate motion for every MB */
3237     if(s->pict_type != AV_PICTURE_TYPE_I){
3238         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3239         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3240         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3241             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3242                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3243             }
3244         }
3245
3246         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3247     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3248         /* I-Frame */
3249         for(i=0; i<s->mb_stride*s->mb_height; i++)
3250             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3251
3252         if(!s->fixed_qscale){
3253             /* finding spatial complexity for I-frame rate control */
3254             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3255         }
3256     }
3257     for(i=1; i<context_count; i++){
3258         merge_context_after_me(s, s->thread_context[i]);
3259     }
3260     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3261     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3262     emms_c();
3263
3264     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3265         s->pict_type= AV_PICTURE_TYPE_I;
3266         for(i=0; i<s->mb_stride*s->mb_height; i++)
3267             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3268 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3269         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3270     }
3271
3272     if(!s->umvplus){
3273         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3274             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3275
3276             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3277                 int a,b;
3278                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3279                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3280                 s->f_code= FFMAX3(s->f_code, a, b);
3281             }
3282
3283             ff_fix_long_p_mvs(s);
3284             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3285             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3286                 int j;
3287                 for(i=0; i<2; i++){
3288                     for(j=0; j<2; j++)
3289                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3290                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3291                 }
3292             }
3293         }
3294
3295         if(s->pict_type==AV_PICTURE_TYPE_B){
3296             int a, b;
3297
3298             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3299             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3300             s->f_code = FFMAX(a, b);
3301
3302             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3303             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3304             s->b_code = FFMAX(a, b);
3305
3306             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3307             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3308             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3309             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3310             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3311                 int dir, j;
3312                 for(dir=0; dir<2; dir++){
3313                     for(i=0; i<2; i++){
3314                         for(j=0; j<2; j++){
3315                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3316                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3317                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3318                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3319                         }
3320                     }
3321                 }
3322             }
3323         }
3324     }
3325
3326     if (estimate_qp(s, 0) < 0)
3327         return -1;
3328
3329     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3330         s->qscale= 3; //reduce clipping problems
3331
3332     if (s->out_format == FMT_MJPEG) {
3333         /* for mjpeg, we do include qscale in the matrix */
3334         for(i=1;i<64;i++){
3335             int j= s->dsp.idct_permutation[i];
3336
3337             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3338         }
3339         s->y_dc_scale_table=
3340         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3341         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3342         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3343                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3344         s->qscale= 8;
3345     }
3346     if(s->codec_id == AV_CODEC_ID_AMV){
3347         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3348         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3349         for(i=1;i<64;i++){
3350             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3351
3352             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3353             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3354         }
3355         s->y_dc_scale_table= y;
3356         s->c_dc_scale_table= c;
3357         s->intra_matrix[0] = 13;
3358         s->chroma_intra_matrix[0] = 14;
3359         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3360                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3361         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3362                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3363         s->qscale= 8;
3364     }
3365
3366     //FIXME var duplication
3367     s->current_picture_ptr->f.key_frame =
3368     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3369     s->current_picture_ptr->f.pict_type =
3370     s->current_picture.f.pict_type = s->pict_type;
3371
3372     if (s->current_picture.f.key_frame)
3373         s->picture_in_gop_number=0;
3374
3375     s->mb_x = s->mb_y = 0;
3376     s->last_bits= put_bits_count(&s->pb);
3377     switch(s->out_format) {
3378     case FMT_MJPEG:
3379         if (CONFIG_MJPEG_ENCODER)
3380             ff_mjpeg_encode_picture_header(s);
3381         break;
3382     case FMT_H261:
3383         if (CONFIG_H261_ENCODER)
3384             ff_h261_encode_picture_header(s, picture_number);
3385         break;
3386     case FMT_H263:
3387         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3388             ff_wmv2_encode_picture_header(s, picture_number);
3389         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3390             ff_msmpeg4_encode_picture_header(s, picture_number);
3391         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3392             ff_mpeg4_encode_picture_header(s, picture_number);
3393         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3394             ff_rv10_encode_picture_header(s, picture_number);
3395         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3396             ff_rv20_encode_picture_header(s, picture_number);
3397         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3398             ff_flv_encode_picture_header(s, picture_number);
3399         else if (CONFIG_H263_ENCODER)
3400             ff_h263_encode_picture_header(s, picture_number);
3401         break;
3402     case FMT_MPEG1:
3403         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3404             ff_mpeg1_encode_picture_header(s, picture_number);
3405         break;
3406     case FMT_H264:
3407         break;
3408     default:
3409         av_assert0(0);
3410     }
3411     bits= put_bits_count(&s->pb);
3412     s->header_bits= bits - s->last_bits;
3413
3414     for(i=1; i<context_count; i++){
3415         update_duplicate_context_after_me(s->thread_context[i], s);
3416     }
3417     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3418     for(i=1; i<context_count; i++){
3419         merge_context_after_encode(s, s->thread_context[i]);
3420     }
3421     emms_c();
3422     return 0;
3423 }
3424
3425 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3426     const int intra= s->mb_intra;
3427     int i;
3428
3429     s->dct_count[intra]++;
3430
3431     for(i=0; i<64; i++){
3432         int level= block[i];
3433
3434         if(level){
3435             if(level>0){
3436                 s->dct_error_sum[intra][i] += level;
3437                 level -= s->dct_offset[intra][i];
3438                 if(level<0) level=0;
3439             }else{
3440                 s->dct_error_sum[intra][i] -= level;
3441                 level += s->dct_offset[intra][i];
3442                 if(level>0) level=0;
3443             }
3444             block[i]= level;
3445         }
3446     }
3447 }
3448
3449 static int dct_quantize_trellis_c(MpegEncContext *s,
3450                                   DCTELEM *block, int n,
3451                                   int qscale, int *overflow){
3452     const int *qmat;
3453     const uint8_t *scantable= s->intra_scantable.scantable;
3454     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3455     int max=0;
3456     unsigned int threshold1, threshold2;
3457     int bias=0;
3458     int run_tab[65];
3459     int level_tab[65];
3460     int score_tab[65];
3461     int survivor[65];
3462     int survivor_count;
3463     int last_run=0;
3464     int last_level=0;
3465     int last_score= 0;
3466     int last_i;
3467     int coeff[2][64];
3468     int coeff_count[64];
3469     int qmul, qadd, start_i, last_non_zero, i, dc;
3470     const int esc_length= s->ac_esc_length;
3471     uint8_t * length;
3472     uint8_t * last_length;
3473     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3474
3475     s->dsp.fdct (block);
3476
3477     if(s->dct_error_sum)
3478         s->denoise_dct(s, block);
3479     qmul= qscale*16;
3480     qadd= ((qscale-1)|1)*8;
3481
3482     if (s->mb_intra) {
3483         int q;
3484         if (!s->h263_aic) {
3485             if (n < 4)
3486                 q = s->y_dc_scale;
3487             else
3488                 q = s->c_dc_scale;
3489             q = q << 3;
3490         } else{
3491             /* For AIC we skip quant/dequant of INTRADC */
3492             q = 1 << 3;
3493             qadd=0;
3494         }
3495
3496         /* note: block[0] is assumed to be positive */
3497         block[0] = (block[0] + (q >> 1)) / q;
3498         start_i = 1;
3499         last_non_zero = 0;
3500         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3501         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3502             bias= 1<<(QMAT_SHIFT-1);
3503         length     = s->intra_ac_vlc_length;
3504         last_length= s->intra_ac_vlc_last_length;
3505     } else {
3506         start_i = 0;
3507         last_non_zero = -1;
3508         qmat = s->q_inter_matrix[qscale];
3509         length     = s->inter_ac_vlc_length;
3510         last_length= s->inter_ac_vlc_last_length;
3511     }
3512     last_i= start_i;
3513
3514     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3515     threshold2= (threshold1<<1);
3516
3517     for(i=63; i>=start_i; i--) {
3518         const int j = scantable[i];
3519         int level = block[j] * qmat[j];
3520
3521         if(((unsigned)(level+threshold1))>threshold2){
3522             last_non_zero = i;
3523             break;
3524         }
3525     }
3526
3527     for(i=start_i; i<=last_non_zero; i++) {
3528         const int j = scantable[i];
3529         int level = block[j] * qmat[j];
3530
3531 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3532 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3533         if(((unsigned)(level+threshold1))>threshold2){
3534             if(level>0){
3535                 level= (bias + level)>>QMAT_SHIFT;
3536                 coeff[0][i]= level;
3537                 coeff[1][i]= level-1;
3538 //                coeff[2][k]= level-2;
3539             }else{
3540                 level= (bias - level)>>QMAT_SHIFT;
3541                 coeff[0][i]= -level;
3542                 coeff[1][i]= -level+1;
3543 //                coeff[2][k]= -level+2;
3544             }
3545             coeff_count[i]= FFMIN(level, 2);
3546             av_assert2(coeff_count[i]);
3547             max |=level;
3548         }else{
3549             coeff[0][i]= (level>>31)|1;
3550             coeff_count[i]= 1;
3551         }
3552     }
3553
3554     *overflow= s->max_qcoeff < max; //overflow might have happened
3555
3556     if(last_non_zero < start_i){
3557         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3558         return last_non_zero;
3559     }
3560
3561     score_tab[start_i]= 0;
3562     survivor[0]= start_i;
3563     survivor_count= 1;
3564
3565     for(i=start_i; i<=last_non_zero; i++){
3566         int level_index, j, zero_distortion;
3567         int dct_coeff= FFABS(block[ scantable[i] ]);
3568         int best_score=256*256*256*120;
3569
3570         if (s->dsp.fdct == ff_fdct_ifast)
3571             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3572         zero_distortion= dct_coeff*dct_coeff;
3573
3574         for(level_index=0; level_index < coeff_count[i]; level_index++){
3575             int distortion;
3576             int level= coeff[level_index][i];
3577             const int alevel= FFABS(level);
3578             int unquant_coeff;
3579
3580             av_assert2(level);
3581
3582             if(s->out_format == FMT_H263){
3583                 unquant_coeff= alevel*qmul + qadd;
3584             }else{ //MPEG1
3585                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3586                 if(s->mb_intra){
3587                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3588                         unquant_coeff =   (unquant_coeff - 1) | 1;
3589                 }else{
3590                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3591                         unquant_coeff =   (unquant_coeff - 1) | 1;
3592                 }
3593                 unquant_coeff<<= 3;
3594             }
3595
3596             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3597             level+=64;
3598             if((level&(~127)) == 0){
3599                 for(j=survivor_count-1; j>=0; j--){
3600                     int run= i - survivor[j];
3601                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3602                     score += score_tab[i-run];
3603
3604                     if(score < best_score){
3605                         best_score= score;
3606                         run_tab[i+1]= run;
3607                         level_tab[i+1]= level-64;
3608                     }
3609                 }
3610
3611                 if(s->out_format == FMT_H263){
3612                     for(j=survivor_count-1; j>=0; j--){
3613                         int run= i - survivor[j];
3614                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3615                         score += score_tab[i-run];
3616                         if(score < last_score){
3617                             last_score= score;
3618                             last_run= run;
3619                             last_level= level-64;
3620                             last_i= i+1;
3621                         }
3622                     }
3623                 }
3624             }else{
3625                 distortion += esc_length*lambda;
3626                 for(j=survivor_count-1; j>=0; j--){
3627                     int run= i - survivor[j];
3628                     int score= distortion + score_tab[i-run];
3629
3630                     if(score < best_score){
3631                         best_score= score;
3632                         run_tab[i+1]= run;
3633                         level_tab[i+1]= level-64;
3634                     }
3635                 }
3636
3637                 if(s->out_format == FMT_H263){
3638                   for(j=survivor_count-1; j>=0; j--){
3639                         int run= i - survivor[j];
3640                         int score= distortion + score_tab[i-run];
3641                         if(score < last_score){
3642                             last_score= score;
3643                             last_run= run;
3644                             last_level= level-64;
3645                             last_i= i+1;
3646                         }
3647                     }
3648                 }
3649             }
3650         }
3651
3652         score_tab[i+1]= best_score;
3653
3654         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3655         if(last_non_zero <= 27){
3656             for(; survivor_count; survivor_count--){
3657                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3658                     break;
3659             }
3660         }else{
3661             for(; survivor_count; survivor_count--){
3662                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3663                     break;
3664             }
3665         }
3666
3667         survivor[ survivor_count++ ]= i+1;
3668     }
3669
3670     if(s->out_format != FMT_H263){
3671         last_score= 256*256*256*120;
3672         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3673             int score= score_tab[i];
3674             if(i) score += lambda*2; //FIXME exacter?
3675
3676             if(score < last_score){
3677                 last_score= score;
3678                 last_i= i;
3679                 last_level= level_tab[i];
3680                 last_run= run_tab[i];
3681             }
3682         }
3683     }
3684
3685     s->coded_score[n] = last_score;
3686
3687     dc= FFABS(block[0]);
3688     last_non_zero= last_i - 1;
3689     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3690
3691     if(last_non_zero < start_i)
3692         return last_non_zero;
3693
3694     if(last_non_zero == 0 && start_i == 0){
3695         int best_level= 0;
3696         int best_score= dc * dc;
3697
3698         for(i=0; i<coeff_count[0]; i++){
3699             int level= coeff[i][0];
3700             int alevel= FFABS(level);
3701             int unquant_coeff, score, distortion;
3702
3703             if(s->out_format == FMT_H263){
3704                     unquant_coeff= (alevel*qmul + qadd)>>3;
3705             }else{ //MPEG1
3706                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3707                     unquant_coeff =   (unquant_coeff - 1) | 1;
3708             }
3709             unquant_coeff = (unquant_coeff + 4) >> 3;
3710             unquant_coeff<<= 3 + 3;
3711
3712             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3713             level+=64;
3714             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3715             else                    score= distortion + esc_length*lambda;
3716
3717             if(score < best_score){
3718                 best_score= score;
3719                 best_level= level - 64;
3720             }
3721         }
3722         block[0]= best_level;
3723         s->coded_score[n] = best_score - dc*dc;
3724         if(best_level == 0) return -1;
3725         else                return last_non_zero;
3726     }
3727
3728     i= last_i;
3729     av_assert2(last_level);
3730
3731     block[ perm_scantable[last_non_zero] ]= last_level;
3732     i -= last_run + 1;
3733
3734     for(; i>start_i; i -= run_tab[i] + 1){
3735         block[ perm_scantable[i-1] ]= level_tab[i];
3736     }
3737
3738     return last_non_zero;
3739 }
3740
3741 //#define REFINE_STATS 1
3742 static int16_t basis[64][64];
3743
3744 static void build_basis(uint8_t *perm){
3745     int i, j, x, y;
3746     emms_c();
3747     for(i=0; i<8; i++){
3748         for(j=0; j<8; j++){
3749             for(y=0; y<8; y++){
3750                 for(x=0; x<8; x++){
3751                     double s= 0.25*(1<<BASIS_SHIFT);
3752                     int index= 8*i + j;
3753                     int perm_index= perm[index];
3754                     if(i==0) s*= sqrt(0.5);
3755                     if(j==0) s*= sqrt(0.5);
3756                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3757                 }
3758             }
3759         }
3760     }
3761 }
3762
3763 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3764                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3765                         int n, int qscale){
3766     int16_t rem[64];
3767     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3768     const uint8_t *scantable= s->intra_scantable.scantable;
3769     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3770 //    unsigned int threshold1, threshold2;
3771 //    int bias=0;
3772     int run_tab[65];
3773     int prev_run=0;
3774     int prev_level=0;
3775     int qmul, qadd, start_i, last_non_zero, i, dc;
3776     uint8_t * length;
3777     uint8_t * last_length;
3778     int lambda;
3779     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3780 #ifdef REFINE_STATS
3781 static int count=0;
3782 static int after_last=0;
3783 static int to_zero=0;
3784 static int from_zero=0;
3785 static int raise=0;
3786 static int lower=0;
3787 static int messed_sign=0;
3788 #endif
3789
3790     if(basis[0][0] == 0)
3791         build_basis(s->dsp.idct_permutation);
3792
3793     qmul= qscale*2;
3794     qadd= (qscale-1)|1;
3795     if (s->mb_intra) {
3796         if (!s->h263_aic) {
3797             if (n < 4)
3798                 q = s->y_dc_scale;
3799             else
3800                 q = s->c_dc_scale;
3801         } else{
3802             /* For AIC we skip quant/dequant of INTRADC */
3803             q = 1;
3804             qadd=0;
3805         }
3806         q <<= RECON_SHIFT-3;
3807         /* note: block[0] is assumed to be positive */
3808         dc= block[0]*q;
3809 //        block[0] = (block[0] + (q >> 1)) / q;
3810         start_i = 1;
3811 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3812 //            bias= 1<<(QMAT_SHIFT-1);
3813         length     = s->intra_ac_vlc_length;
3814         last_length= s->intra_ac_vlc_last_length;
3815     } else {
3816         dc= 0;
3817         start_i = 0;
3818         length     = s->inter_ac_vlc_length;
3819         last_length= s->inter_ac_vlc_last_length;
3820     }
3821     last_non_zero = s->block_last_index[n];
3822
3823 #ifdef REFINE_STATS
3824 {START_TIMER
3825 #endif
3826     dc += (1<<(RECON_SHIFT-1));
3827     for(i=0; i<64; i++){
3828         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3829     }
3830 #ifdef REFINE_STATS
3831 STOP_TIMER("memset rem[]")}
3832 #endif
3833     sum=0;
3834     for(i=0; i<64; i++){
3835         int one= 36;
3836         int qns=4;
3837         int w;
3838
3839         w= FFABS(weight[i]) + qns*one;
3840         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3841
3842         weight[i] = w;
3843 //        w=weight[i] = (63*qns + (w/2)) / w;
3844
3845         av_assert2(w>0);
3846         av_assert2(w<(1<<6));
3847         sum += w*w;
3848     }
3849     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3850 #ifdef REFINE_STATS
3851 {START_TIMER
3852 #endif
3853     run=0;
3854     rle_index=0;
3855     for(i=start_i; i<=last_non_zero; i++){
3856         int j= perm_scantable[i];
3857         const int level= block[j];
3858         int coeff;
3859
3860         if(level){
3861             if(level<0) coeff= qmul*level - qadd;
3862             else        coeff= qmul*level + qadd;
3863             run_tab[rle_index++]=run;
3864             run=0;
3865
3866             s->dsp.add_8x8basis(rem, basis[j], coeff);
3867         }else{
3868             run++;
3869         }
3870     }
3871 #ifdef REFINE_STATS
3872 if(last_non_zero>0){
3873 STOP_TIMER("init rem[]")
3874 }
3875 }
3876
3877 {START_TIMER
3878 #endif
3879     for(;;){
3880         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3881         int best_coeff=0;
3882         int best_change=0;
3883         int run2, best_unquant_change=0, analyze_gradient;
3884 #ifdef REFINE_STATS
3885 {START_TIMER
3886 #endif
3887         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3888
3889         if(analyze_gradient){
3890 #ifdef REFINE_STATS
3891 {START_TIMER
3892 #endif
3893             for(i=0; i<64; i++){
3894                 int w= weight[i];
3895
3896                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3897             }
3898 #ifdef REFINE_STATS
3899 STOP_TIMER("rem*w*w")}
3900 {START_TIMER
3901 #endif
3902             s->dsp.fdct(d1);
3903 #ifdef REFINE_STATS
3904 STOP_TIMER("dct")}
3905 #endif
3906         }
3907
3908         if(start_i){
3909             const int level= block[0];
3910             int change, old_coeff;
3911
3912             av_assert2(s->mb_intra);
3913
3914             old_coeff= q*level;
3915
3916             for(change=-1; change<=1; change+=2){
3917                 int new_level= level + change;
3918                 int score, new_coeff;
3919
3920                 new_coeff= q*new_level;
3921                 if(new_coeff >= 2048 || new_coeff < 0)
3922                     continue;
3923
3924                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3925                 if(score<best_score){
3926                     best_score= score;
3927                     best_coeff= 0;
3928                     best_change= change;
3929                     best_unquant_change= new_coeff - old_coeff;
3930                 }
3931             }
3932         }
3933
3934         run=0;
3935         rle_index=0;
3936         run2= run_tab[rle_index++];
3937         prev_level=0;
3938         prev_run=0;
3939
3940         for(i=start_i; i<64; i++){
3941             int j= perm_scantable[i];
3942             const int level= block[j];
3943             int change, old_coeff;
3944
3945             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3946                 break;
3947
3948             if(level){
3949                 if(level<0) old_coeff= qmul*level - qadd;
3950                 else        old_coeff= qmul*level + qadd;
3951                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3952             }else{
3953                 old_coeff=0;
3954                 run2--;
3955                 av_assert2(run2>=0 || i >= last_non_zero );
3956             }
3957
3958             for(change=-1; change<=1; change+=2){
3959                 int new_level= level + change;
3960                 int score, new_coeff, unquant_change;
3961
3962                 score=0;
3963                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3964                    continue;
3965
3966                 if(new_level){
3967                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3968                     else            new_coeff= qmul*new_level + qadd;
3969                     if(new_coeff >= 2048 || new_coeff <= -2048)
3970                         continue;
3971                     //FIXME check for overflow
3972
3973                     if(level){
3974                         if(level < 63 && level > -63){
3975                             if(i < last_non_zero)
3976                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3977                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3978                             else
3979                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3980                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3981                         }
3982                     }else{
3983                         av_assert2(FFABS(new_level)==1);
3984
3985                         if(analyze_gradient){
3986                             int g= d1[ scantable[i] ];
3987                             if(g && (g^new_level) >= 0)
3988                                 continue;
3989                         }
3990
3991                         if(i < last_non_zero){
3992                             int next_i= i + run2 + 1;
3993                             int next_level= block[ perm_scantable[next_i] ] + 64;
3994
3995                             if(next_level&(~127))
3996                                 next_level= 0;
3997
3998                             if(next_i < last_non_zero)
3999                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4000                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4001                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4002                             else
4003                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4004                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4005                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4006                         }else{
4007                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4008                             if(prev_level){
4009                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4010                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4011                             }
4012                         }
4013                     }
4014                 }else{
4015                     new_coeff=0;
4016                     av_assert2(FFABS(level)==1);
4017
4018                     if(i < last_non_zero){
4019                         int next_i= i + run2 + 1;
4020                         int next_level= block[ perm_scantable[next_i] ] + 64;
4021
4022                         if(next_level&(~127))
4023                             next_level= 0;
4024
4025                         if(next_i < last_non_zero)
4026                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4027                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4028                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4029                         else
4030                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4031                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4032                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4033                     }else{
4034                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4035                         if(prev_level){
4036                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4037                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4038                         }
4039                     }
4040                 }
4041
4042                 score *= lambda;
4043
4044                 unquant_change= new_coeff - old_coeff;
4045                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4046
4047                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4048                 if(score<best_score){
4049                     best_score= score;
4050                     best_coeff= i;
4051                     best_change= change;
4052                     best_unquant_change= unquant_change;
4053                 }
4054             }
4055             if(level){
4056                 prev_level= level + 64;
4057                 if(prev_level&(~127))
4058                     prev_level= 0;
4059                 prev_run= run;
4060                 run=0;
4061             }else{
4062                 run++;
4063             }
4064         }
4065 #ifdef REFINE_STATS
4066 STOP_TIMER("iterative step")}
4067 #endif
4068
4069         if(best_change){
4070             int j= perm_scantable[ best_coeff ];
4071
4072             block[j] += best_change;
4073
4074             if(best_coeff > last_non_zero){
4075                 last_non_zero= best_coeff;
4076                 av_assert2(block[j]);
4077 #ifdef REFINE_STATS
4078 after_last++;
4079 #endif
4080             }else{
4081 #ifdef REFINE_STATS
4082 if(block[j]){
4083     if(block[j] - best_change){
4084         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4085             raise++;
4086         }else{
4087             lower++;
4088         }
4089     }else{
4090         from_zero++;
4091     }
4092 }else{
4093     to_zero++;
4094 }
4095 #endif
4096                 for(; last_non_zero>=start_i; last_non_zero--){
4097                     if(block[perm_scantable[last_non_zero]])
4098                         break;
4099                 }
4100             }
4101 #ifdef REFINE_STATS
4102 count++;
4103 if(256*256*256*64 % count == 0){
4104     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4105 }
4106 #endif
4107             run=0;
4108             rle_index=0;
4109             for(i=start_i; i<=last_non_zero; i++){
4110                 int j= perm_scantable[i];
4111                 const int level= block[j];
4112
4113                  if(level){
4114                      run_tab[rle_index++]=run;
4115                      run=0;
4116                  }else{
4117                      run++;
4118                  }
4119             }
4120
4121             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4122         }else{
4123             break;
4124         }
4125     }
4126 #ifdef REFINE_STATS
4127 if(last_non_zero>0){
4128 STOP_TIMER("iterative search")
4129 }
4130 }
4131 #endif
4132
4133     return last_non_zero;
4134 }
4135
4136 int ff_dct_quantize_c(MpegEncContext *s,
4137                         DCTELEM *block, int n,
4138                         int qscale, int *overflow)
4139 {
4140     int i, j, level, last_non_zero, q, start_i;
4141     const int *qmat;
4142     const uint8_t *scantable= s->intra_scantable.scantable;
4143     int bias;
4144     int max=0;
4145     unsigned int threshold1, threshold2;
4146
4147     s->dsp.fdct (block);
4148
4149     if(s->dct_error_sum)
4150         s->denoise_dct(s, block);
4151
4152     if (s->mb_intra) {
4153         if (!s->h263_aic) {
4154             if (n < 4)
4155                 q = s->y_dc_scale;
4156             else
4157                 q = s->c_dc_scale;
4158             q = q << 3;
4159         } else
4160             /* For AIC we skip quant/dequant of INTRADC */
4161             q = 1 << 3;
4162
4163         /* note: block[0] is assumed to be positive */
4164         block[0] = (block[0] + (q >> 1)) / q;
4165         start_i = 1;
4166         last_non_zero = 0;
4167         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4168         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4169     } else {
4170         start_i = 0;
4171         last_non_zero = -1;
4172         qmat = s->q_inter_matrix[qscale];
4173         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4174     }
4175     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4176     threshold2= (threshold1<<1);
4177     for(i=63;i>=start_i;i--) {
4178         j = scantable[i];
4179         level = block[j] * qmat[j];
4180
4181         if(((unsigned)(level+threshold1))>threshold2){
4182             last_non_zero = i;
4183             break;
4184         }else{
4185             block[j]=0;
4186         }
4187     }
4188     for(i=start_i; i<=last_non_zero; i++) {
4189         j = scantable[i];
4190         level = block[j] * qmat[j];
4191
4192 //        if(   bias+level >= (1<<QMAT_SHIFT)
4193 //           || bias-level >= (1<<QMAT_SHIFT)){
4194         if(((unsigned)(level+threshold1))>threshold2){
4195             if(level>0){
4196                 level= (bias + level)>>QMAT_SHIFT;
4197                 block[j]= level;
4198             }else{
4199                 level= (bias - level)>>QMAT_SHIFT;
4200                 block[j]= -level;
4201             }
4202             max |=level;
4203         }else{
4204             block[j]=0;
4205         }
4206     }
4207     *overflow= s->max_qcoeff < max; //overflow might have happened
4208
4209     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4210     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4211         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4212
4213     return last_non_zero;
4214 }
4215
4216 #define OFFSET(x) offsetof(MpegEncContext, x)
4217 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4218 static const AVOption h263_options[] = {
4219     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4220     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4221     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, VE },
4222     FF_MPV_COMMON_OPTS
4223     { NULL },
4224 };
4225
4226 static const AVClass h263_class = {
4227     .class_name = "H.263 encoder",
4228     .item_name  = av_default_item_name,
4229     .option     = h263_options,
4230     .version    = LIBAVUTIL_VERSION_INT,
4231 };
4232
4233 AVCodec ff_h263_encoder = {
4234     .name           = "h263",
4235     .type           = AVMEDIA_TYPE_VIDEO,
4236     .id             = AV_CODEC_ID_H263,
4237     .priv_data_size = sizeof(MpegEncContext),
4238     .init           = ff_MPV_encode_init,
4239     .encode2        = ff_MPV_encode_picture,
4240     .close          = ff_MPV_encode_end,
4241     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4242     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4243     .priv_class     = &h263_class,
4244 };
4245
4246 static const AVOption h263p_options[] = {
4247     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4248     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4249     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4250     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4251     FF_MPV_COMMON_OPTS
4252     { NULL },
4253 };
4254 static const AVClass h263p_class = {
4255     .class_name = "H.263p encoder",
4256     .item_name  = av_default_item_name,
4257     .option     = h263p_options,
4258     .version    = LIBAVUTIL_VERSION_INT,
4259 };
4260
4261 AVCodec ff_h263p_encoder = {
4262     .name           = "h263p",
4263     .type           = AVMEDIA_TYPE_VIDEO,
4264     .id             = AV_CODEC_ID_H263P,
4265     .priv_data_size = sizeof(MpegEncContext),
4266     .init           = ff_MPV_encode_init,
4267     .encode2        = ff_MPV_encode_picture,
4268     .close          = ff_MPV_encode_end,
4269     .capabilities   = CODEC_CAP_SLICE_THREADS,
4270     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4271     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4272     .priv_class     = &h263p_class,
4273 };
4274
4275 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4276
4277 AVCodec ff_msmpeg4v2_encoder = {
4278     .name           = "msmpeg4v2",
4279     .type           = AVMEDIA_TYPE_VIDEO,
4280     .id             = AV_CODEC_ID_MSMPEG4V2,
4281     .priv_data_size = sizeof(MpegEncContext),
4282     .init           = ff_MPV_encode_init,
4283     .encode2        = ff_MPV_encode_picture,
4284     .close          = ff_MPV_encode_end,
4285     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4286     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4287     .priv_class     = &msmpeg4v2_class,
4288 };
4289
4290 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4291
4292 AVCodec ff_msmpeg4v3_encoder = {
4293     .name           = "msmpeg4",
4294     .type           = AVMEDIA_TYPE_VIDEO,
4295     .id             = AV_CODEC_ID_MSMPEG4V3,
4296     .priv_data_size = sizeof(MpegEncContext),
4297     .init           = ff_MPV_encode_init,
4298     .encode2        = ff_MPV_encode_picture,
4299     .close          = ff_MPV_encode_end,
4300     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4301     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4302     .priv_class     = &msmpeg4v3_class,
4303 };
4304
4305 FF_MPV_GENERIC_CLASS(wmv1)
4306
4307 AVCodec ff_wmv1_encoder = {
4308     .name           = "wmv1",
4309     .type           = AVMEDIA_TYPE_VIDEO,
4310     .id             = AV_CODEC_ID_WMV1,
4311     .priv_data_size = sizeof(MpegEncContext),
4312     .init           = ff_MPV_encode_init,
4313     .encode2        = ff_MPV_encode_picture,
4314     .close          = ff_MPV_encode_end,
4315     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4316     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4317     .priv_class     = &wmv1_class,
4318 };