]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mp3dec: ask for 8khz switch point mp3s
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "h263.h"
37 #include "mjpegenc.h"
38 #include "msmpeg4.h"
39 #include "faandct.h"
40 #include "thread.h"
41 #include "aandcttab.h"
42 #include "flv.h"
43 #include "mpeg4video.h"
44 #include "internal.h"
45 #include "bytestream.h"
46 #include <limits.h>
47 #include "sp5x.h"
48
49 //#undef NDEBUG
50 //#include <assert.h>
51
52 static int encode_picture(MpegEncContext *s, int picture_number);
53 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
54 static int sse_mb(MpegEncContext *s);
55 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
56 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
57
58 /* enable all paranoid tests for rounding, overflows, etc... */
59 //#define PARANOID
60
61 //#define DEBUG
62
63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
65
66 const AVOption ff_mpv_generic_options[] = {
67     FF_MPV_COMMON_OPTS
68     { NULL },
69 };
70
71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
72                        uint16_t (*qmat16)[2][64],
73                        const uint16_t *quant_matrix,
74                        int bias, int qmin, int qmax, int intra)
75 {
76     int qscale;
77     int shift = 0;
78
79     for (qscale = qmin; qscale <= qmax; qscale++) {
80         int i;
81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
82             dsp->fdct == ff_jpeg_fdct_islow_10 ||
83             dsp->fdct == ff_faandct) {
84             for (i = 0; i < 64; i++) {
85                 const int j = dsp->idct_permutation[i];
86                 /* 16 <= qscale * quant_matrix[i] <= 7905
87                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
88                  *             19952 <=              x  <= 249205026
89                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
90                  *           3444240 >= (1 << 36) / (x) >= 275 */
91
92                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
93                                         (qscale * quant_matrix[j]));
94             }
95         } else if (dsp->fdct == ff_fdct_ifast) {
96             for (i = 0; i < 64; i++) {
97                 const int j = dsp->idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
105                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
106             }
107         } else {
108             for (i = 0; i < 64; i++) {
109                 const int j = dsp->idct_permutation[i];
110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
111                  * Assume x = qscale * quant_matrix[i]
112                  * So             16 <=              x  <= 7905
113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
114                  * so          32768 >= (1 << 19) / (x) >= 67 */
115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
116                                         (qscale * quant_matrix[j]));
117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
118                 //                    (qscale * quant_matrix[i]);
119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
120                                        (qscale * quant_matrix[j]);
121
122                 if (qmat16[qscale][0][i] == 0 ||
123                     qmat16[qscale][0][i] == 128 * 256)
124                     qmat16[qscale][0][i] = 128 * 256 - 1;
125                 qmat16[qscale][1][i] =
126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
127                                 qmat16[qscale][0][i]);
128             }
129         }
130
131         for (i = intra; i < 64; i++) {
132             int64_t max = 8191;
133             if (dsp->fdct == ff_fdct_ifast) {
134                 max = (8191LL * ff_aanscales[i]) >> 14;
135             }
136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
137                 shift++;
138             }
139         }
140     }
141     if (shift) {
142         av_log(NULL, AV_LOG_INFO,
143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
144                QMAT_SHIFT - shift);
145     }
146 }
147
148 static inline void update_qscale(MpegEncContext *s)
149 {
150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
151                 (FF_LAMBDA_SHIFT + 7);
152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
153
154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
155                  FF_LAMBDA_SHIFT;
156 }
157
158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
159 {
160     int i;
161
162     if (matrix) {
163         put_bits(pb, 1, 1);
164         for (i = 0; i < 64; i++) {
165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
166         }
167     } else
168         put_bits(pb, 1, 0);
169 }
170
171 /**
172  * init s->current_picture.qscale_table from s->lambda_table
173  */
174 void ff_init_qscale_tab(MpegEncContext *s)
175 {
176     int8_t * const qscale_table = s->current_picture.f.qscale_table;
177     int i;
178
179     for (i = 0; i < s->mb_num; i++) {
180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
183                                                   s->avctx->qmax);
184     }
185 }
186
187 static void copy_picture_attributes(MpegEncContext *s,
188                                     AVFrame *dst,
189                                     AVFrame *src)
190 {
191     int i;
192
193     dst->pict_type              = src->pict_type;
194     dst->quality                = src->quality;
195     dst->coded_picture_number   = src->coded_picture_number;
196     dst->display_picture_number = src->display_picture_number;
197     //dst->reference              = src->reference;
198     dst->pts                    = src->pts;
199     dst->interlaced_frame       = src->interlaced_frame;
200     dst->top_field_first        = src->top_field_first;
201
202     if (s->avctx->me_threshold) {
203         if (!src->motion_val[0])
204             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
205         if (!src->mb_type)
206             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
207         if (!src->ref_index[0])
208             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
209         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
210             av_log(s->avctx, AV_LOG_ERROR,
211                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
212                    src->motion_subsample_log2, dst->motion_subsample_log2);
213
214         memcpy(dst->mb_type, src->mb_type,
215                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
216
217         for (i = 0; i < 2; i++) {
218             int stride = ((16 * s->mb_width ) >>
219                           src->motion_subsample_log2) + 1;
220             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
221
222             if (src->motion_val[i] &&
223                 src->motion_val[i] != dst->motion_val[i]) {
224                 memcpy(dst->motion_val[i], src->motion_val[i],
225                        2 * stride * height * sizeof(int16_t));
226             }
227             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
228                 memcpy(dst->ref_index[i], src->ref_index[i],
229                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
230             }
231         }
232     }
233 }
234
235 static void update_duplicate_context_after_me(MpegEncContext *dst,
236                                               MpegEncContext *src)
237 {
238 #define COPY(a) dst->a= src->a
239     COPY(pict_type);
240     COPY(current_picture);
241     COPY(f_code);
242     COPY(b_code);
243     COPY(qscale);
244     COPY(lambda);
245     COPY(lambda2);
246     COPY(picture_in_gop_number);
247     COPY(gop_picture_number);
248     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
249     COPY(progressive_frame);    // FIXME don't set in encode_header
250     COPY(partitioned_frame);    // FIXME don't set in encode_header
251 #undef COPY
252 }
253
254 /**
255  * Set the given MpegEncContext to defaults for encoding.
256  * the changed fields will not depend upon the prior state of the MpegEncContext.
257  */
258 static void MPV_encode_defaults(MpegEncContext *s)
259 {
260     int i;
261     ff_MPV_common_defaults(s);
262
263     for (i = -16; i < 16; i++) {
264         default_fcode_tab[i + MAX_MV] = 1;
265     }
266     s->me.mv_penalty = default_mv_penalty;
267     s->fcode_tab     = default_fcode_tab;
268 }
269
270 av_cold int ff_dct_encode_init(MpegEncContext *s) {
271     if (ARCH_X86)
272         ff_dct_encode_init_x86(s);
273
274     if (!s->dct_quantize)
275         s->dct_quantize = ff_dct_quantize_c;
276     if (!s->denoise_dct)
277         s->denoise_dct  = denoise_dct_c;
278     s->fast_dct_quantize = s->dct_quantize;
279     if (s->avctx->trellis)
280         s->dct_quantize  = dct_quantize_trellis_c;
281
282     return 0;
283 }
284
285 /* init video encoder */
286 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
287 {
288     MpegEncContext *s = avctx->priv_data;
289     int i;
290     int chroma_h_shift, chroma_v_shift;
291
292     MPV_encode_defaults(s);
293
294     switch (avctx->codec_id) {
295     case AV_CODEC_ID_MPEG2VIDEO:
296         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
297             avctx->pix_fmt != PIX_FMT_YUV422P) {
298             av_log(avctx, AV_LOG_ERROR,
299                    "only YUV420 and YUV422 are supported\n");
300             return -1;
301         }
302         break;
303     case AV_CODEC_ID_LJPEG:
304         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
305             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
306             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
307             avctx->pix_fmt != PIX_FMT_BGR0     &&
308             avctx->pix_fmt != PIX_FMT_BGRA     &&
309             avctx->pix_fmt != PIX_FMT_BGR24    &&
310             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
311               avctx->pix_fmt != PIX_FMT_YUV422P &&
312               avctx->pix_fmt != PIX_FMT_YUV444P) ||
313              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
314             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
315             return -1;
316         }
317         break;
318     case AV_CODEC_ID_MJPEG:
319     case AV_CODEC_ID_AMV:
320         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
321             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
322             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
323               avctx->pix_fmt != PIX_FMT_YUV422P) ||
324              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
325             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
326             return -1;
327         }
328         break;
329     default:
330         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
331             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
332             return -1;
333         }
334     }
335
336     switch (avctx->pix_fmt) {
337     case PIX_FMT_YUVJ422P:
338     case PIX_FMT_YUV422P:
339         s->chroma_format = CHROMA_422;
340         break;
341     case PIX_FMT_YUVJ420P:
342     case PIX_FMT_YUV420P:
343     default:
344         s->chroma_format = CHROMA_420;
345         break;
346     }
347
348     s->bit_rate = avctx->bit_rate;
349     s->width    = avctx->width;
350     s->height   = avctx->height;
351     if (avctx->gop_size > 600 &&
352         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
353         av_log(avctx, AV_LOG_WARNING,
354                "keyframe interval too large!, reducing it from %d to %d\n",
355                avctx->gop_size, 600);
356         avctx->gop_size = 600;
357     }
358     s->gop_size     = avctx->gop_size;
359     s->avctx        = avctx;
360     s->flags        = avctx->flags;
361     s->flags2       = avctx->flags2;
362     s->max_b_frames = avctx->max_b_frames;
363     s->codec_id     = avctx->codec->id;
364 #if FF_API_MPV_GLOBAL_OPTS
365     if (avctx->luma_elim_threshold)
366         s->luma_elim_threshold   = avctx->luma_elim_threshold;
367     if (avctx->chroma_elim_threshold)
368         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
369 #endif
370     s->strict_std_compliance = avctx->strict_std_compliance;
371     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
372     s->mpeg_quant         = avctx->mpeg_quant;
373     s->rtp_mode           = !!avctx->rtp_payload_size;
374     s->intra_dc_precision = avctx->intra_dc_precision;
375     s->user_specified_pts = AV_NOPTS_VALUE;
376
377     if (s->gop_size <= 1) {
378         s->intra_only = 1;
379         s->gop_size   = 12;
380     } else {
381         s->intra_only = 0;
382     }
383
384     s->me_method = avctx->me_method;
385
386     /* Fixed QSCALE */
387     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
388
389 #if FF_API_MPV_GLOBAL_OPTS
390     if (s->flags & CODEC_FLAG_QP_RD)
391         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
392 #endif
393
394     s->adaptive_quant = (s->avctx->lumi_masking ||
395                          s->avctx->dark_masking ||
396                          s->avctx->temporal_cplx_masking ||
397                          s->avctx->spatial_cplx_masking  ||
398                          s->avctx->p_masking      ||
399                          s->avctx->border_masking ||
400                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
401                         !s->fixed_qscale;
402
403     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
404
405     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
406         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
407         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
408             return -1;
409     }
410
411     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
412         av_log(avctx, AV_LOG_INFO,
413                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
414     }
415
416     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
417         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
418         return -1;
419     }
420
421     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
422         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
423         return -1;
424     }
425
426     if (avctx->rc_max_rate &&
427         avctx->rc_max_rate == avctx->bit_rate &&
428         avctx->rc_max_rate != avctx->rc_min_rate) {
429         av_log(avctx, AV_LOG_INFO,
430                "impossible bitrate constraints, this will fail\n");
431     }
432
433     if (avctx->rc_buffer_size &&
434         avctx->bit_rate * (int64_t)avctx->time_base.num >
435             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
436         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
437         return -1;
438     }
439
440     if (!s->fixed_qscale &&
441         avctx->bit_rate * av_q2d(avctx->time_base) >
442             avctx->bit_rate_tolerance) {
443         av_log(avctx, AV_LOG_ERROR,
444                "bitrate tolerance too small for bitrate\n");
445         return -1;
446     }
447
448     if (s->avctx->rc_max_rate &&
449         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
450         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
451          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
452         90000LL * (avctx->rc_buffer_size - 1) >
453             s->avctx->rc_max_rate * 0xFFFFLL) {
454         av_log(avctx, AV_LOG_INFO,
455                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
456                "specified vbv buffer is too large for the given bitrate!\n");
457     }
458
459     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
460         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
461         s->codec_id != AV_CODEC_ID_FLV1) {
462         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
463         return -1;
464     }
465
466     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
467         av_log(avctx, AV_LOG_ERROR,
468                "OBMC is only supported with simple mb decision\n");
469         return -1;
470     }
471
472     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
473         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
474         return -1;
475     }
476
477     if (s->max_b_frames                    &&
478         s->codec_id != AV_CODEC_ID_MPEG4      &&
479         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
480         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
481         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
482         return -1;
483     }
484
485     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
486          s->codec_id == AV_CODEC_ID_H263  ||
487          s->codec_id == AV_CODEC_ID_H263P) &&
488         (avctx->sample_aspect_ratio.num > 255 ||
489          avctx->sample_aspect_ratio.den > 255)) {
490         av_log(avctx, AV_LOG_WARNING,
491                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
492                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
493         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
494                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
495     }
496
497     if ((s->codec_id == AV_CODEC_ID_H263  ||
498          s->codec_id == AV_CODEC_ID_H263P) &&
499         (avctx->width  > 2048 ||
500          avctx->height > 1152 )) {
501         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
502         return -1;
503     }
504     if ((s->codec_id == AV_CODEC_ID_H263  ||
505          s->codec_id == AV_CODEC_ID_H263P) &&
506         ((avctx->width &3) ||
507          (avctx->height&3) )) {
508         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
509         return -1;
510     }
511
512     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
513         (avctx->width  > 4095 ||
514          avctx->height > 4095 )) {
515         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
516         return -1;
517     }
518
519     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
520         (avctx->width  > 16383 ||
521          avctx->height > 16383 )) {
522         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
523         return -1;
524     }
525
526     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
527          s->codec_id == AV_CODEC_ID_WMV2) &&
528          avctx->width & 1) {
529          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
530          return -1;
531     }
532
533     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
534         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
535         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
536         return -1;
537     }
538
539     // FIXME mpeg2 uses that too
540     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
541         av_log(avctx, AV_LOG_ERROR,
542                "mpeg2 style quantization not supported by codec\n");
543         return -1;
544     }
545
546 #if FF_API_MPV_GLOBAL_OPTS
547     if (s->flags & CODEC_FLAG_CBP_RD)
548         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
549 #endif
550
551     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
552         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
553         return -1;
554     }
555
556     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
557         s->avctx->mb_decision != FF_MB_DECISION_RD) {
558         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
559         return -1;
560     }
561
562     if (s->avctx->scenechange_threshold < 1000000000 &&
563         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
564         av_log(avctx, AV_LOG_ERROR,
565                "closed gop with scene change detection are not supported yet, "
566                "set threshold to 1000000000\n");
567         return -1;
568     }
569
570     if (s->flags & CODEC_FLAG_LOW_DELAY) {
571         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
572             av_log(avctx, AV_LOG_ERROR,
573                   "low delay forcing is only available for mpeg2\n");
574             return -1;
575         }
576         if (s->max_b_frames != 0) {
577             av_log(avctx, AV_LOG_ERROR,
578                    "b frames cannot be used with low delay\n");
579             return -1;
580         }
581     }
582
583     if (s->q_scale_type == 1) {
584         if (avctx->qmax > 12) {
585             av_log(avctx, AV_LOG_ERROR,
586                    "non linear quant only supports qmax <= 12 currently\n");
587             return -1;
588         }
589     }
590
591     if (s->avctx->thread_count > 1         &&
592         s->codec_id != AV_CODEC_ID_MPEG4      &&
593         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
594         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
595         s->codec_id != AV_CODEC_ID_MJPEG      &&
596         (s->codec_id != AV_CODEC_ID_H263P)) {
597         av_log(avctx, AV_LOG_ERROR,
598                "multi threaded encoding not supported by codec\n");
599         return -1;
600     }
601
602     if (s->avctx->thread_count < 1) {
603         av_log(avctx, AV_LOG_ERROR,
604                "automatic thread number detection not supported by codec, "
605                "patch welcome\n");
606         return -1;
607     }
608
609     if (s->avctx->thread_count > 1)
610         s->rtp_mode = 1;
611
612     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
613         s->h263_slice_structured = 1;
614
615     if (!avctx->time_base.den || !avctx->time_base.num) {
616         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
617         return -1;
618     }
619
620     i = (INT_MAX / 2 + 128) >> 8;
621     if (avctx->me_threshold >= i) {
622         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
623                i - 1);
624         return -1;
625     }
626     if (avctx->mb_threshold >= i) {
627         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
628                i - 1);
629         return -1;
630     }
631
632     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
633         av_log(avctx, AV_LOG_INFO,
634                "notice: b_frame_strategy only affects the first pass\n");
635         avctx->b_frame_strategy = 0;
636     }
637
638     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
639     if (i > 1) {
640         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
641         avctx->time_base.den /= i;
642         avctx->time_base.num /= i;
643         //return -1;
644     }
645
646     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
647         // (a + x * 3 / 8) / x
648         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
649         s->inter_quant_bias = 0;
650     } else {
651         s->intra_quant_bias = 0;
652         // (a - x / 4) / x
653         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
654     }
655
656     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
657         s->intra_quant_bias = avctx->intra_quant_bias;
658     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
659         s->inter_quant_bias = avctx->inter_quant_bias;
660
661     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
662
663     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
664                                   &chroma_v_shift);
665
666     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
667         s->avctx->time_base.den > (1 << 16) - 1) {
668         av_log(avctx, AV_LOG_ERROR,
669                "timebase %d/%d not supported by MPEG 4 standard, "
670                "the maximum admitted value for the timebase denominator "
671                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
672                (1 << 16) - 1);
673         return -1;
674     }
675     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
676
677 #if FF_API_MPV_GLOBAL_OPTS
678     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
679         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
680     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
681         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
682     if (avctx->quantizer_noise_shaping)
683         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
684 #endif
685
686     switch (avctx->codec->id) {
687     case AV_CODEC_ID_MPEG1VIDEO:
688         s->out_format = FMT_MPEG1;
689         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
690         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
691         break;
692     case AV_CODEC_ID_MPEG2VIDEO:
693         s->out_format = FMT_MPEG1;
694         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
695         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
696         s->rtp_mode   = 1;
697         break;
698     case AV_CODEC_ID_LJPEG:
699     case AV_CODEC_ID_MJPEG:
700     case AV_CODEC_ID_AMV:
701         s->out_format = FMT_MJPEG;
702         s->intra_only = 1; /* force intra only for jpeg */
703         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
704             (avctx->pix_fmt == PIX_FMT_BGR0
705              || s->avctx->pix_fmt == PIX_FMT_BGRA
706              || s->avctx->pix_fmt == PIX_FMT_BGR24)) {
707             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
708             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
709             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
710         } else {
711             s->mjpeg_vsample[0] = 2;
712             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
713             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
714             s->mjpeg_hsample[0] = 2;
715             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
716             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
717         }
718         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
719             ff_mjpeg_encode_init(s) < 0)
720             return -1;
721         avctx->delay = 0;
722         s->low_delay = 1;
723         break;
724     case AV_CODEC_ID_H261:
725         if (!CONFIG_H261_ENCODER)
726             return -1;
727         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
728             av_log(avctx, AV_LOG_ERROR,
729                    "The specified picture size of %dx%d is not valid for the "
730                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
731                     s->width, s->height);
732             return -1;
733         }
734         s->out_format = FMT_H261;
735         avctx->delay  = 0;
736         s->low_delay  = 1;
737         break;
738     case AV_CODEC_ID_H263:
739         if (!CONFIG_H263_ENCODER)
740             return -1;
741         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
742                              s->width, s->height) == 8) {
743             av_log(avctx, AV_LOG_ERROR,
744                    "The specified picture size of %dx%d is not valid for "
745                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
746                    "352x288, 704x576, and 1408x1152. "
747                    "Try H.263+.\n", s->width, s->height);
748             return -1;
749         }
750         s->out_format = FMT_H263;
751         avctx->delay  = 0;
752         s->low_delay  = 1;
753         break;
754     case AV_CODEC_ID_H263P:
755         s->out_format = FMT_H263;
756         s->h263_plus  = 1;
757         /* Fx */
758         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
759         s->modified_quant  = s->h263_aic;
760         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
761         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
762
763         /* /Fx */
764         /* These are just to be sure */
765         avctx->delay = 0;
766         s->low_delay = 1;
767         break;
768     case AV_CODEC_ID_FLV1:
769         s->out_format      = FMT_H263;
770         s->h263_flv        = 2; /* format = 1; 11-bit codes */
771         s->unrestricted_mv = 1;
772         s->rtp_mode  = 0; /* don't allow GOB */
773         avctx->delay = 0;
774         s->low_delay = 1;
775         break;
776     case AV_CODEC_ID_RV10:
777         s->out_format = FMT_H263;
778         avctx->delay  = 0;
779         s->low_delay  = 1;
780         break;
781     case AV_CODEC_ID_RV20:
782         s->out_format      = FMT_H263;
783         avctx->delay       = 0;
784         s->low_delay       = 1;
785         s->modified_quant  = 1;
786         s->h263_aic        = 1;
787         s->h263_plus       = 1;
788         s->loop_filter     = 1;
789         s->unrestricted_mv = 0;
790         break;
791     case AV_CODEC_ID_MPEG4:
792         s->out_format      = FMT_H263;
793         s->h263_pred       = 1;
794         s->unrestricted_mv = 1;
795         s->low_delay       = s->max_b_frames ? 0 : 1;
796         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
797         break;
798     case AV_CODEC_ID_MSMPEG4V2:
799         s->out_format      = FMT_H263;
800         s->h263_pred       = 1;
801         s->unrestricted_mv = 1;
802         s->msmpeg4_version = 2;
803         avctx->delay       = 0;
804         s->low_delay       = 1;
805         break;
806     case AV_CODEC_ID_MSMPEG4V3:
807         s->out_format        = FMT_H263;
808         s->h263_pred         = 1;
809         s->unrestricted_mv   = 1;
810         s->msmpeg4_version   = 3;
811         s->flipflop_rounding = 1;
812         avctx->delay         = 0;
813         s->low_delay         = 1;
814         break;
815     case AV_CODEC_ID_WMV1:
816         s->out_format        = FMT_H263;
817         s->h263_pred         = 1;
818         s->unrestricted_mv   = 1;
819         s->msmpeg4_version   = 4;
820         s->flipflop_rounding = 1;
821         avctx->delay         = 0;
822         s->low_delay         = 1;
823         break;
824     case AV_CODEC_ID_WMV2:
825         s->out_format        = FMT_H263;
826         s->h263_pred         = 1;
827         s->unrestricted_mv   = 1;
828         s->msmpeg4_version   = 5;
829         s->flipflop_rounding = 1;
830         avctx->delay         = 0;
831         s->low_delay         = 1;
832         break;
833     default:
834         return -1;
835     }
836
837     avctx->has_b_frames = !s->low_delay;
838
839     s->encoding = 1;
840
841     s->progressive_frame    =
842     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
843                                                 CODEC_FLAG_INTERLACED_ME) ||
844                                 s->alternate_scan);
845
846     /* init */
847     if (ff_MPV_common_init(s) < 0)
848         return -1;
849
850     ff_dct_encode_init(s);
851
852     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
853         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
854
855     s->quant_precision = 5;
856
857     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
858     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
859
860     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
861         ff_h261_encode_init(s);
862     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
863         ff_h263_encode_init(s);
864     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
865         ff_msmpeg4_encode_init(s);
866     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
867         && s->out_format == FMT_MPEG1)
868         ff_mpeg1_encode_init(s);
869
870     /* init q matrix */
871     for (i = 0; i < 64; i++) {
872         int j = s->dsp.idct_permutation[i];
873         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
874             s->mpeg_quant) {
875             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
876             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
877         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
878             s->intra_matrix[j] =
879             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
880         } else {
881             /* mpeg1/2 */
882             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
883             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
884         }
885         if (s->avctx->intra_matrix)
886             s->intra_matrix[j] = s->avctx->intra_matrix[i];
887         if (s->avctx->inter_matrix)
888             s->inter_matrix[j] = s->avctx->inter_matrix[i];
889     }
890
891     /* precompute matrix */
892     /* for mjpeg, we do include qscale in the matrix */
893     if (s->out_format != FMT_MJPEG) {
894         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
895                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
896                           31, 1);
897         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
898                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
899                           31, 0);
900     }
901
902     if (ff_rate_control_init(s) < 0)
903         return -1;
904
905     return 0;
906 }
907
908 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
909 {
910     MpegEncContext *s = avctx->priv_data;
911
912     ff_rate_control_uninit(s);
913
914     ff_MPV_common_end(s);
915     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
916         s->out_format == FMT_MJPEG)
917         ff_mjpeg_encode_close(s);
918
919     av_freep(&avctx->extradata);
920
921     return 0;
922 }
923
924 static int get_sae(uint8_t *src, int ref, int stride)
925 {
926     int x,y;
927     int acc = 0;
928
929     for (y = 0; y < 16; y++) {
930         for (x = 0; x < 16; x++) {
931             acc += FFABS(src[x + y * stride] - ref);
932         }
933     }
934
935     return acc;
936 }
937
938 static int get_intra_count(MpegEncContext *s, uint8_t *src,
939                            uint8_t *ref, int stride)
940 {
941     int x, y, w, h;
942     int acc = 0;
943
944     w = s->width  & ~15;
945     h = s->height & ~15;
946
947     for (y = 0; y < h; y += 16) {
948         for (x = 0; x < w; x += 16) {
949             int offset = x + y * stride;
950             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
951                                      16);
952             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
953             int sae  = get_sae(src + offset, mean, stride);
954
955             acc += sae + 500 < sad;
956         }
957     }
958     return acc;
959 }
960
961
962 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
963 {
964     AVFrame *pic = NULL;
965     int64_t pts;
966     int i;
967     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
968                                                  (s->low_delay ? 0 : 1);
969     int direct = 1;
970
971     if (pic_arg) {
972         pts = pic_arg->pts;
973         pic_arg->display_picture_number = s->input_picture_number++;
974
975         if (pts != AV_NOPTS_VALUE) {
976             if (s->user_specified_pts != AV_NOPTS_VALUE) {
977                 int64_t time = pts;
978                 int64_t last = s->user_specified_pts;
979
980                 if (time <= last) {
981                     av_log(s->avctx, AV_LOG_ERROR,
982                            "Error, Invalid timestamp=%"PRId64", "
983                            "last=%"PRId64"\n", pts, s->user_specified_pts);
984                     return -1;
985                 }
986
987                 if (!s->low_delay && pic_arg->display_picture_number == 1)
988                     s->dts_delta = time - last;
989             }
990             s->user_specified_pts = pts;
991         } else {
992             if (s->user_specified_pts != AV_NOPTS_VALUE) {
993                 s->user_specified_pts =
994                 pts = s->user_specified_pts + 1;
995                 av_log(s->avctx, AV_LOG_INFO,
996                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
997                        pts);
998             } else {
999                 pts = pic_arg->display_picture_number;
1000             }
1001         }
1002     }
1003
1004   if (pic_arg) {
1005     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
1006         direct = 0;
1007     if (pic_arg->linesize[0] != s->linesize)
1008         direct = 0;
1009     if (pic_arg->linesize[1] != s->uvlinesize)
1010         direct = 0;
1011     if (pic_arg->linesize[2] != s->uvlinesize)
1012         direct = 0;
1013
1014     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
1015     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
1016
1017     if (direct) {
1018         i = ff_find_unused_picture(s, 1);
1019         if (i < 0)
1020             return i;
1021
1022         pic = &s->picture[i].f;
1023         pic->reference = 3;
1024
1025         for (i = 0; i < 4; i++) {
1026             pic->data[i]     = pic_arg->data[i];
1027             pic->linesize[i] = pic_arg->linesize[i];
1028         }
1029         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1030             return -1;
1031         }
1032     } else {
1033         i = ff_find_unused_picture(s, 0);
1034         if (i < 0)
1035             return i;
1036
1037         pic = &s->picture[i].f;
1038         pic->reference = 3;
1039
1040         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1041             return -1;
1042         }
1043
1044         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1045             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1046             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1047             // empty
1048         } else {
1049             int h_chroma_shift, v_chroma_shift;
1050             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1051                                           &v_chroma_shift);
1052
1053             for (i = 0; i < 3; i++) {
1054                 int src_stride = pic_arg->linesize[i];
1055                 int dst_stride = i ? s->uvlinesize : s->linesize;
1056                 int h_shift = i ? h_chroma_shift : 0;
1057                 int v_shift = i ? v_chroma_shift : 0;
1058                 int w = s->width  >> h_shift;
1059                 int h = s->height >> v_shift;
1060                 uint8_t *src = pic_arg->data[i];
1061                 uint8_t *dst = pic->data[i];
1062
1063                 if(s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1064                     h= ((s->height+15)/16*16)>>v_shift;
1065                 }
1066
1067                 if (!s->avctx->rc_buffer_size)
1068                     dst += INPLACE_OFFSET;
1069
1070                 if (src_stride == dst_stride)
1071                     memcpy(dst, src, src_stride * h);
1072                 else {
1073                     while (h--) {
1074                         memcpy(dst, src, w);
1075                         dst += dst_stride;
1076                         src += src_stride;
1077                     }
1078                 }
1079             }
1080         }
1081     }
1082     copy_picture_attributes(s, pic, pic_arg);
1083     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1084   }
1085
1086     /* shift buffer entries */
1087     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1088         s->input_picture[i - 1] = s->input_picture[i];
1089
1090     s->input_picture[encoding_delay] = (Picture*) pic;
1091
1092     return 0;
1093 }
1094
1095 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1096 {
1097     int x, y, plane;
1098     int score = 0;
1099     int64_t score64 = 0;
1100
1101     for (plane = 0; plane < 3; plane++) {
1102         const int stride = p->f.linesize[plane];
1103         const int bw = plane ? 1 : 2;
1104         for (y = 0; y < s->mb_height * bw; y++) {
1105             for (x = 0; x < s->mb_width * bw; x++) {
1106                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1107                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1108                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1109                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1110
1111                 switch (s->avctx->frame_skip_exp) {
1112                 case 0: score    =  FFMAX(score, v);          break;
1113                 case 1: score   += FFABS(v);                  break;
1114                 case 2: score   += v * v;                     break;
1115                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1116                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1117                 }
1118             }
1119         }
1120     }
1121
1122     if (score)
1123         score64 = score;
1124
1125     if (score64 < s->avctx->frame_skip_threshold)
1126         return 1;
1127     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1128         return 1;
1129     return 0;
1130 }
1131
1132 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1133 {
1134     AVPacket pkt = { 0 };
1135     int ret, got_output;
1136
1137     av_init_packet(&pkt);
1138     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1139     if (ret < 0)
1140         return ret;
1141
1142     ret = pkt.size;
1143     av_free_packet(&pkt);
1144     return ret;
1145 }
1146
1147 static int estimate_best_b_count(MpegEncContext *s)
1148 {
1149     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1150     AVCodecContext *c = avcodec_alloc_context3(NULL);
1151     AVFrame input[FF_MAX_B_FRAMES + 2];
1152     const int scale = s->avctx->brd_scale;
1153     int i, j, out_size, p_lambda, b_lambda, lambda2;
1154     int64_t best_rd  = INT64_MAX;
1155     int best_b_count = -1;
1156
1157     av_assert0(scale >= 0 && scale <= 3);
1158
1159     //emms_c();
1160     //s->next_picture_ptr->quality;
1161     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1162     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1163     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1164     if (!b_lambda) // FIXME we should do this somewhere else
1165         b_lambda = p_lambda;
1166     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1167                FF_LAMBDA_SHIFT;
1168
1169     c->width        = s->width  >> scale;
1170     c->height       = s->height >> scale;
1171     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1172                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1173     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1174     c->mb_decision  = s->avctx->mb_decision;
1175     c->me_cmp       = s->avctx->me_cmp;
1176     c->mb_cmp       = s->avctx->mb_cmp;
1177     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1178     c->pix_fmt      = PIX_FMT_YUV420P;
1179     c->time_base    = s->avctx->time_base;
1180     c->max_b_frames = s->max_b_frames;
1181
1182     if (avcodec_open2(c, codec, NULL) < 0)
1183         return -1;
1184
1185     for (i = 0; i < s->max_b_frames + 2; i++) {
1186         int ysize = c->width * c->height;
1187         int csize = (c->width / 2) * (c->height / 2);
1188         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1189                                                 s->next_picture_ptr;
1190
1191         avcodec_get_frame_defaults(&input[i]);
1192         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1193         input[i].data[1]     = input[i].data[0] + ysize;
1194         input[i].data[2]     = input[i].data[1] + csize;
1195         input[i].linesize[0] = c->width;
1196         input[i].linesize[1] =
1197         input[i].linesize[2] = c->width / 2;
1198
1199         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1200             pre_input = *pre_input_ptr;
1201
1202             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1203                 pre_input.f.data[0] += INPLACE_OFFSET;
1204                 pre_input.f.data[1] += INPLACE_OFFSET;
1205                 pre_input.f.data[2] += INPLACE_OFFSET;
1206             }
1207
1208             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1209                                  pre_input.f.data[0], pre_input.f.linesize[0],
1210                                  c->width,      c->height);
1211             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1212                                  pre_input.f.data[1], pre_input.f.linesize[1],
1213                                  c->width >> 1, c->height >> 1);
1214             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1215                                  pre_input.f.data[2], pre_input.f.linesize[2],
1216                                  c->width >> 1, c->height >> 1);
1217         }
1218     }
1219
1220     for (j = 0; j < s->max_b_frames + 1; j++) {
1221         int64_t rd = 0;
1222
1223         if (!s->input_picture[j])
1224             break;
1225
1226         c->error[0] = c->error[1] = c->error[2] = 0;
1227
1228         input[0].pict_type = AV_PICTURE_TYPE_I;
1229         input[0].quality   = 1 * FF_QP2LAMBDA;
1230
1231         out_size = encode_frame(c, &input[0]);
1232
1233         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1234
1235         for (i = 0; i < s->max_b_frames + 1; i++) {
1236             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1237
1238             input[i + 1].pict_type = is_p ?
1239                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1240             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1241
1242             out_size = encode_frame(c, &input[i + 1]);
1243
1244             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1245         }
1246
1247         /* get the delayed frames */
1248         while (out_size) {
1249             out_size = encode_frame(c, NULL);
1250             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1251         }
1252
1253         rd += c->error[0] + c->error[1] + c->error[2];
1254
1255         if (rd < best_rd) {
1256             best_rd = rd;
1257             best_b_count = j;
1258         }
1259     }
1260
1261     avcodec_close(c);
1262     av_freep(&c);
1263
1264     for (i = 0; i < s->max_b_frames + 2; i++) {
1265         av_freep(&input[i].data[0]);
1266     }
1267
1268     return best_b_count;
1269 }
1270
1271 static int select_input_picture(MpegEncContext *s)
1272 {
1273     int i;
1274
1275     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1276         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1277     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1278
1279     /* set next picture type & ordering */
1280     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1281         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1282             s->next_picture_ptr == NULL || s->intra_only) {
1283             s->reordered_input_picture[0] = s->input_picture[0];
1284             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1285             s->reordered_input_picture[0]->f.coded_picture_number =
1286                 s->coded_picture_number++;
1287         } else {
1288             int b_frames;
1289
1290             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1291                 if (s->picture_in_gop_number < s->gop_size &&
1292                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1293                     // FIXME check that te gop check above is +-1 correct
1294                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1295                     //       s->input_picture[0]->f.data[0],
1296                     //       s->input_picture[0]->pts);
1297
1298                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1299                         for (i = 0; i < 4; i++)
1300                             s->input_picture[0]->f.data[i] = NULL;
1301                         s->input_picture[0]->f.type = 0;
1302                     } else {
1303                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1304                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1305
1306                         s->avctx->release_buffer(s->avctx,
1307                                                  &s->input_picture[0]->f);
1308                     }
1309
1310                     emms_c();
1311                     ff_vbv_update(s, 0);
1312
1313                     goto no_output_pic;
1314                 }
1315             }
1316
1317             if (s->flags & CODEC_FLAG_PASS2) {
1318                 for (i = 0; i < s->max_b_frames + 1; i++) {
1319                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1320
1321                     if (pict_num >= s->rc_context.num_entries)
1322                         break;
1323                     if (!s->input_picture[i]) {
1324                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1325                         break;
1326                     }
1327
1328                     s->input_picture[i]->f.pict_type =
1329                         s->rc_context.entry[pict_num].new_pict_type;
1330                 }
1331             }
1332
1333             if (s->avctx->b_frame_strategy == 0) {
1334                 b_frames = s->max_b_frames;
1335                 while (b_frames && !s->input_picture[b_frames])
1336                     b_frames--;
1337             } else if (s->avctx->b_frame_strategy == 1) {
1338                 for (i = 1; i < s->max_b_frames + 1; i++) {
1339                     if (s->input_picture[i] &&
1340                         s->input_picture[i]->b_frame_score == 0) {
1341                         s->input_picture[i]->b_frame_score =
1342                             get_intra_count(s,
1343                                             s->input_picture[i    ]->f.data[0],
1344                                             s->input_picture[i - 1]->f.data[0],
1345                                             s->linesize) + 1;
1346                     }
1347                 }
1348                 for (i = 0; i < s->max_b_frames + 1; i++) {
1349                     if (s->input_picture[i] == NULL ||
1350                         s->input_picture[i]->b_frame_score - 1 >
1351                             s->mb_num / s->avctx->b_sensitivity)
1352                         break;
1353                 }
1354
1355                 b_frames = FFMAX(0, i - 1);
1356
1357                 /* reset scores */
1358                 for (i = 0; i < b_frames + 1; i++) {
1359                     s->input_picture[i]->b_frame_score = 0;
1360                 }
1361             } else if (s->avctx->b_frame_strategy == 2) {
1362                 b_frames = estimate_best_b_count(s);
1363             } else {
1364                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1365                 b_frames = 0;
1366             }
1367
1368             emms_c();
1369             //static int b_count = 0;
1370             //b_count += b_frames;
1371             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1372
1373             for (i = b_frames - 1; i >= 0; i--) {
1374                 int type = s->input_picture[i]->f.pict_type;
1375                 if (type && type != AV_PICTURE_TYPE_B)
1376                     b_frames = i;
1377             }
1378             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1379                 b_frames == s->max_b_frames) {
1380                 av_log(s->avctx, AV_LOG_ERROR,
1381                        "warning, too many b frames in a row\n");
1382             }
1383
1384             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1385                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1386                     s->gop_size > s->picture_in_gop_number) {
1387                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1388                 } else {
1389                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1390                         b_frames = 0;
1391                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1392                 }
1393             }
1394
1395             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1396                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1397                 b_frames--;
1398
1399             s->reordered_input_picture[0] = s->input_picture[b_frames];
1400             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1401                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1402             s->reordered_input_picture[0]->f.coded_picture_number =
1403                 s->coded_picture_number++;
1404             for (i = 0; i < b_frames; i++) {
1405                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1406                 s->reordered_input_picture[i + 1]->f.pict_type =
1407                     AV_PICTURE_TYPE_B;
1408                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1409                     s->coded_picture_number++;
1410             }
1411         }
1412     }
1413 no_output_pic:
1414     if (s->reordered_input_picture[0]) {
1415         s->reordered_input_picture[0]->f.reference =
1416            s->reordered_input_picture[0]->f.pict_type !=
1417                AV_PICTURE_TYPE_B ? 3 : 0;
1418
1419         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1420
1421         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1422             s->avctx->rc_buffer_size) {
1423             // input is a shared pix, so we can't modifiy it -> alloc a new
1424             // one & ensure that the shared one is reuseable
1425
1426             Picture *pic;
1427             int i = ff_find_unused_picture(s, 0);
1428             if (i < 0)
1429                 return i;
1430             pic = &s->picture[i];
1431
1432             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1433             if (ff_alloc_picture(s, pic, 0) < 0) {
1434                 return -1;
1435             }
1436
1437             /* mark us unused / free shared pic */
1438             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1439                 s->avctx->release_buffer(s->avctx,
1440                                          &s->reordered_input_picture[0]->f);
1441             for (i = 0; i < 4; i++)
1442                 s->reordered_input_picture[0]->f.data[i] = NULL;
1443             s->reordered_input_picture[0]->f.type = 0;
1444
1445             copy_picture_attributes(s, &pic->f,
1446                                     &s->reordered_input_picture[0]->f);
1447
1448             s->current_picture_ptr = pic;
1449         } else {
1450             // input is not a shared pix -> reuse buffer for current_pix
1451
1452             assert(s->reordered_input_picture[0]->f.type ==
1453                        FF_BUFFER_TYPE_USER ||
1454                    s->reordered_input_picture[0]->f.type ==
1455                        FF_BUFFER_TYPE_INTERNAL);
1456
1457             s->current_picture_ptr = s->reordered_input_picture[0];
1458             for (i = 0; i < 4; i++) {
1459                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1460             }
1461         }
1462         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1463
1464         s->picture_number = s->new_picture.f.display_picture_number;
1465         //printf("dpn:%d\n", s->picture_number);
1466     } else {
1467         memset(&s->new_picture, 0, sizeof(Picture));
1468     }
1469     return 0;
1470 }
1471
1472 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1473                           AVFrame *pic_arg, int *got_packet)
1474 {
1475     MpegEncContext *s = avctx->priv_data;
1476     int i, stuffing_count, ret;
1477     int context_count = s->slice_context_count;
1478
1479     s->picture_in_gop_number++;
1480
1481     if (load_input_picture(s, pic_arg) < 0)
1482         return -1;
1483
1484     if (select_input_picture(s) < 0) {
1485         return -1;
1486     }
1487
1488     /* output? */
1489     if (s->new_picture.f.data[0]) {
1490         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1491             return ret;
1492         if (s->mb_info) {
1493             s->mb_info_ptr = av_packet_new_side_data(pkt,
1494                                  AV_PKT_DATA_H263_MB_INFO,
1495                                  s->mb_width*s->mb_height*12);
1496             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1497         }
1498
1499         for (i = 0; i < context_count; i++) {
1500             int start_y = s->thread_context[i]->start_mb_y;
1501             int   end_y = s->thread_context[i]->  end_mb_y;
1502             int h       = s->mb_height;
1503             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1504             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1505
1506             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1507         }
1508
1509         s->pict_type = s->new_picture.f.pict_type;
1510         //emms_c();
1511         //printf("qs:%f %f %d\n", s->new_picture.quality,
1512         //       s->current_picture.quality, s->qscale);
1513         ff_MPV_frame_start(s, avctx);
1514 vbv_retry:
1515         if (encode_picture(s, s->picture_number) < 0)
1516             return -1;
1517
1518         avctx->header_bits = s->header_bits;
1519         avctx->mv_bits     = s->mv_bits;
1520         avctx->misc_bits   = s->misc_bits;
1521         avctx->i_tex_bits  = s->i_tex_bits;
1522         avctx->p_tex_bits  = s->p_tex_bits;
1523         avctx->i_count     = s->i_count;
1524         // FIXME f/b_count in avctx
1525         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1526         avctx->skip_count  = s->skip_count;
1527
1528         ff_MPV_frame_end(s);
1529
1530         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1531             ff_mjpeg_encode_picture_trailer(s);
1532
1533         if (avctx->rc_buffer_size) {
1534             RateControlContext *rcc = &s->rc_context;
1535             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1536
1537             if (put_bits_count(&s->pb) > max_size &&
1538                 s->lambda < s->avctx->lmax) {
1539                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1540                                        (s->qscale + 1) / s->qscale);
1541                 if (s->adaptive_quant) {
1542                     int i;
1543                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1544                         s->lambda_table[i] =
1545                             FFMAX(s->lambda_table[i] + 1,
1546                                   s->lambda_table[i] * (s->qscale + 1) /
1547                                   s->qscale);
1548                 }
1549                 s->mb_skipped = 0;        // done in MPV_frame_start()
1550                 // done in encode_picture() so we must undo it
1551                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1552                     if (s->flipflop_rounding          ||
1553                         s->codec_id == AV_CODEC_ID_H263P ||
1554                         s->codec_id == AV_CODEC_ID_MPEG4)
1555                         s->no_rounding ^= 1;
1556                 }
1557                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1558                     s->time_base       = s->last_time_base;
1559                     s->last_non_b_time = s->time - s->pp_time;
1560                 }
1561                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1562                 for (i = 0; i < context_count; i++) {
1563                     PutBitContext *pb = &s->thread_context[i]->pb;
1564                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1565                 }
1566                 goto vbv_retry;
1567             }
1568
1569             assert(s->avctx->rc_max_rate);
1570         }
1571
1572         if (s->flags & CODEC_FLAG_PASS1)
1573             ff_write_pass1_stats(s);
1574
1575         for (i = 0; i < 4; i++) {
1576             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1577             avctx->error[i] += s->current_picture_ptr->f.error[i];
1578         }
1579
1580         if (s->flags & CODEC_FLAG_PASS1)
1581             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1582                    avctx->i_tex_bits + avctx->p_tex_bits ==
1583                        put_bits_count(&s->pb));
1584         flush_put_bits(&s->pb);
1585         s->frame_bits  = put_bits_count(&s->pb);
1586
1587         stuffing_count = ff_vbv_update(s, s->frame_bits);
1588         s->stuffing_bits = 8*stuffing_count;
1589         if (stuffing_count) {
1590             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1591                     stuffing_count + 50) {
1592                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1593                 return -1;
1594             }
1595
1596             switch (s->codec_id) {
1597             case AV_CODEC_ID_MPEG1VIDEO:
1598             case AV_CODEC_ID_MPEG2VIDEO:
1599                 while (stuffing_count--) {
1600                     put_bits(&s->pb, 8, 0);
1601                 }
1602             break;
1603             case AV_CODEC_ID_MPEG4:
1604                 put_bits(&s->pb, 16, 0);
1605                 put_bits(&s->pb, 16, 0x1C3);
1606                 stuffing_count -= 4;
1607                 while (stuffing_count--) {
1608                     put_bits(&s->pb, 8, 0xFF);
1609                 }
1610             break;
1611             default:
1612                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1613             }
1614             flush_put_bits(&s->pb);
1615             s->frame_bits  = put_bits_count(&s->pb);
1616         }
1617
1618         /* update mpeg1/2 vbv_delay for CBR */
1619         if (s->avctx->rc_max_rate                          &&
1620             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1621             s->out_format == FMT_MPEG1                     &&
1622             90000LL * (avctx->rc_buffer_size - 1) <=
1623                 s->avctx->rc_max_rate * 0xFFFFLL) {
1624             int vbv_delay, min_delay;
1625             double inbits  = s->avctx->rc_max_rate *
1626                              av_q2d(s->avctx->time_base);
1627             int    minbits = s->frame_bits - 8 *
1628                              (s->vbv_delay_ptr - s->pb.buf - 1);
1629             double bits    = s->rc_context.buffer_index + minbits - inbits;
1630
1631             if (bits < 0)
1632                 av_log(s->avctx, AV_LOG_ERROR,
1633                        "Internal error, negative bits\n");
1634
1635             assert(s->repeat_first_field == 0);
1636
1637             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1638             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1639                         s->avctx->rc_max_rate;
1640
1641             vbv_delay = FFMAX(vbv_delay, min_delay);
1642
1643             av_assert0(vbv_delay < 0xFFFF);
1644
1645             s->vbv_delay_ptr[0] &= 0xF8;
1646             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1647             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1648             s->vbv_delay_ptr[2] &= 0x07;
1649             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1650             avctx->vbv_delay     = vbv_delay * 300;
1651         }
1652         s->total_bits     += s->frame_bits;
1653         avctx->frame_bits  = s->frame_bits;
1654
1655         pkt->pts = s->current_picture.f.pts;
1656         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1657             if (!s->current_picture.f.coded_picture_number)
1658                 pkt->dts = pkt->pts - s->dts_delta;
1659             else
1660                 pkt->dts = s->reordered_pts;
1661             s->reordered_pts = pkt->pts;
1662         } else
1663             pkt->dts = pkt->pts;
1664         if (s->current_picture.f.key_frame)
1665             pkt->flags |= AV_PKT_FLAG_KEY;
1666         if (s->mb_info)
1667             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1668     } else {
1669         s->frame_bits = 0;
1670     }
1671     assert((s->frame_bits & 7) == 0);
1672
1673     pkt->size = s->frame_bits / 8;
1674     *got_packet = !!pkt->size;
1675     return 0;
1676 }
1677
1678 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1679                                                 int n, int threshold)
1680 {
1681     static const char tab[64] = {
1682         3, 2, 2, 1, 1, 1, 1, 1,
1683         1, 1, 1, 1, 1, 1, 1, 1,
1684         1, 1, 1, 1, 1, 1, 1, 1,
1685         0, 0, 0, 0, 0, 0, 0, 0,
1686         0, 0, 0, 0, 0, 0, 0, 0,
1687         0, 0, 0, 0, 0, 0, 0, 0,
1688         0, 0, 0, 0, 0, 0, 0, 0,
1689         0, 0, 0, 0, 0, 0, 0, 0
1690     };
1691     int score = 0;
1692     int run = 0;
1693     int i;
1694     DCTELEM *block = s->block[n];
1695     const int last_index = s->block_last_index[n];
1696     int skip_dc;
1697
1698     if (threshold < 0) {
1699         skip_dc = 0;
1700         threshold = -threshold;
1701     } else
1702         skip_dc = 1;
1703
1704     /* Are all we could set to zero already zero? */
1705     if (last_index <= skip_dc - 1)
1706         return;
1707
1708     for (i = 0; i <= last_index; i++) {
1709         const int j = s->intra_scantable.permutated[i];
1710         const int level = FFABS(block[j]);
1711         if (level == 1) {
1712             if (skip_dc && i == 0)
1713                 continue;
1714             score += tab[run];
1715             run = 0;
1716         } else if (level > 1) {
1717             return;
1718         } else {
1719             run++;
1720         }
1721     }
1722     if (score >= threshold)
1723         return;
1724     for (i = skip_dc; i <= last_index; i++) {
1725         const int j = s->intra_scantable.permutated[i];
1726         block[j] = 0;
1727     }
1728     if (block[0])
1729         s->block_last_index[n] = 0;
1730     else
1731         s->block_last_index[n] = -1;
1732 }
1733
1734 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1735                                int last_index)
1736 {
1737     int i;
1738     const int maxlevel = s->max_qcoeff;
1739     const int minlevel = s->min_qcoeff;
1740     int overflow = 0;
1741
1742     if (s->mb_intra) {
1743         i = 1; // skip clipping of intra dc
1744     } else
1745         i = 0;
1746
1747     for (; i <= last_index; i++) {
1748         const int j = s->intra_scantable.permutated[i];
1749         int level = block[j];
1750
1751         if (level > maxlevel) {
1752             level = maxlevel;
1753             overflow++;
1754         } else if (level < minlevel) {
1755             level = minlevel;
1756             overflow++;
1757         }
1758
1759         block[j] = level;
1760     }
1761
1762     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1763         av_log(s->avctx, AV_LOG_INFO,
1764                "warning, clipping %d dct coefficients to %d..%d\n",
1765                overflow, minlevel, maxlevel);
1766 }
1767
1768 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1769 {
1770     int x, y;
1771     // FIXME optimize
1772     for (y = 0; y < 8; y++) {
1773         for (x = 0; x < 8; x++) {
1774             int x2, y2;
1775             int sum = 0;
1776             int sqr = 0;
1777             int count = 0;
1778
1779             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1780                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1781                     int v = ptr[x2 + y2 * stride];
1782                     sum += v;
1783                     sqr += v * v;
1784                     count++;
1785                 }
1786             }
1787             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1788         }
1789     }
1790 }
1791
1792 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1793                                                 int motion_x, int motion_y,
1794                                                 int mb_block_height,
1795                                                 int mb_block_count)
1796 {
1797     int16_t weight[8][64];
1798     DCTELEM orig[8][64];
1799     const int mb_x = s->mb_x;
1800     const int mb_y = s->mb_y;
1801     int i;
1802     int skip_dct[8];
1803     int dct_offset = s->linesize * 8; // default for progressive frames
1804     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1805     int wrap_y, wrap_c;
1806
1807     for (i = 0; i < mb_block_count; i++)
1808         skip_dct[i] = s->skipdct;
1809
1810     if (s->adaptive_quant) {
1811         const int last_qp = s->qscale;
1812         const int mb_xy = mb_x + mb_y * s->mb_stride;
1813
1814         s->lambda = s->lambda_table[mb_xy];
1815         update_qscale(s);
1816
1817         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1818             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1819             s->dquant = s->qscale - last_qp;
1820
1821             if (s->out_format == FMT_H263) {
1822                 s->dquant = av_clip(s->dquant, -2, 2);
1823
1824                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1825                     if (!s->mb_intra) {
1826                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1827                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1828                                 s->dquant = 0;
1829                         }
1830                         if (s->mv_type == MV_TYPE_8X8)
1831                             s->dquant = 0;
1832                     }
1833                 }
1834             }
1835         }
1836         ff_set_qscale(s, last_qp + s->dquant);
1837     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1838         ff_set_qscale(s, s->qscale + s->dquant);
1839
1840     wrap_y = s->linesize;
1841     wrap_c = s->uvlinesize;
1842     ptr_y  = s->new_picture.f.data[0] +
1843              (mb_y * 16 * wrap_y)              + mb_x * 16;
1844     ptr_cb = s->new_picture.f.data[1] +
1845              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1846     ptr_cr = s->new_picture.f.data[2] +
1847              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1848
1849     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1850         uint8_t *ebuf = s->edge_emu_buffer + 32;
1851         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1852                                 mb_y * 16, s->width, s->height);
1853         ptr_y = ebuf;
1854         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1855                                 mb_block_height, mb_x * 8, mb_y * 8,
1856                                 (s->width+1) >> 1, (s->height+1) >> 1);
1857         ptr_cb = ebuf + 18 * wrap_y;
1858         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1859                                 mb_block_height, mb_x * 8, mb_y * 8,
1860                                 (s->width+1) >> 1, (s->height+1) >> 1);
1861         ptr_cr = ebuf + 18 * wrap_y + 8;
1862     }
1863
1864     if (s->mb_intra) {
1865         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1866             int progressive_score, interlaced_score;
1867
1868             s->interlaced_dct = 0;
1869             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1870                                                     NULL, wrap_y, 8) +
1871                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1872                                                     NULL, wrap_y, 8) - 400;
1873
1874             if (progressive_score > 0) {
1875                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1876                                                        NULL, wrap_y * 2, 8) +
1877                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1878                                                        NULL, wrap_y * 2, 8);
1879                 if (progressive_score > interlaced_score) {
1880                     s->interlaced_dct = 1;
1881
1882                     dct_offset = wrap_y;
1883                     wrap_y <<= 1;
1884                     if (s->chroma_format == CHROMA_422)
1885                         wrap_c <<= 1;
1886                 }
1887             }
1888         }
1889
1890         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1891         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1892         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1893         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1894
1895         if (s->flags & CODEC_FLAG_GRAY) {
1896             skip_dct[4] = 1;
1897             skip_dct[5] = 1;
1898         } else {
1899             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1900             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1901             if (!s->chroma_y_shift) { /* 422 */
1902                 s->dsp.get_pixels(s->block[6],
1903                                   ptr_cb + (dct_offset >> 1), wrap_c);
1904                 s->dsp.get_pixels(s->block[7],
1905                                   ptr_cr + (dct_offset >> 1), wrap_c);
1906             }
1907         }
1908     } else {
1909         op_pixels_func (*op_pix)[4];
1910         qpel_mc_func (*op_qpix)[16];
1911         uint8_t *dest_y, *dest_cb, *dest_cr;
1912
1913         dest_y  = s->dest[0];
1914         dest_cb = s->dest[1];
1915         dest_cr = s->dest[2];
1916
1917         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1918             op_pix  = s->dsp.put_pixels_tab;
1919             op_qpix = s->dsp.put_qpel_pixels_tab;
1920         } else {
1921             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1922             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1923         }
1924
1925         if (s->mv_dir & MV_DIR_FORWARD) {
1926             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1927                           s->last_picture.f.data,
1928                           op_pix, op_qpix);
1929             op_pix  = s->dsp.avg_pixels_tab;
1930             op_qpix = s->dsp.avg_qpel_pixels_tab;
1931         }
1932         if (s->mv_dir & MV_DIR_BACKWARD) {
1933             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1934                           s->next_picture.f.data,
1935                           op_pix, op_qpix);
1936         }
1937
1938         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1939             int progressive_score, interlaced_score;
1940
1941             s->interlaced_dct = 0;
1942             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1943                                                     ptr_y,              wrap_y,
1944                                                     8) +
1945                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1946                                                     ptr_y + wrap_y * 8, wrap_y,
1947                                                     8) - 400;
1948
1949             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1950                 progressive_score -= 400;
1951
1952             if (progressive_score > 0) {
1953                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1954                                                        ptr_y,
1955                                                        wrap_y * 2, 8) +
1956                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1957                                                        ptr_y + wrap_y,
1958                                                        wrap_y * 2, 8);
1959
1960                 if (progressive_score > interlaced_score) {
1961                     s->interlaced_dct = 1;
1962
1963                     dct_offset = wrap_y;
1964                     wrap_y <<= 1;
1965                     if (s->chroma_format == CHROMA_422)
1966                         wrap_c <<= 1;
1967                 }
1968             }
1969         }
1970
1971         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1972         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1973         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1974                            dest_y + dct_offset, wrap_y);
1975         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1976                            dest_y + dct_offset + 8, wrap_y);
1977
1978         if (s->flags & CODEC_FLAG_GRAY) {
1979             skip_dct[4] = 1;
1980             skip_dct[5] = 1;
1981         } else {
1982             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1983             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1984             if (!s->chroma_y_shift) { /* 422 */
1985                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1986                                    dest_cb + (dct_offset >> 1), wrap_c);
1987                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1988                                    dest_cr + (dct_offset >> 1), wrap_c);
1989             }
1990         }
1991         /* pre quantization */
1992         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1993                 2 * s->qscale * s->qscale) {
1994             // FIXME optimize
1995             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1996                               wrap_y, 8) < 20 * s->qscale)
1997                 skip_dct[0] = 1;
1998             if (s->dsp.sad[1](NULL, ptr_y + 8,
1999                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2000                 skip_dct[1] = 1;
2001             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
2002                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
2003                 skip_dct[2] = 1;
2004             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
2005                               dest_y + dct_offset + 8,
2006                               wrap_y, 8) < 20 * s->qscale)
2007                 skip_dct[3] = 1;
2008             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
2009                               wrap_c, 8) < 20 * s->qscale)
2010                 skip_dct[4] = 1;
2011             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2012                               wrap_c, 8) < 20 * s->qscale)
2013                 skip_dct[5] = 1;
2014             if (!s->chroma_y_shift) { /* 422 */
2015                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2016                                   dest_cb + (dct_offset >> 1),
2017                                   wrap_c, 8) < 20 * s->qscale)
2018                     skip_dct[6] = 1;
2019                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2020                                   dest_cr + (dct_offset >> 1),
2021                                   wrap_c, 8) < 20 * s->qscale)
2022                     skip_dct[7] = 1;
2023             }
2024         }
2025     }
2026
2027     if (s->quantizer_noise_shaping) {
2028         if (!skip_dct[0])
2029             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2030         if (!skip_dct[1])
2031             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2032         if (!skip_dct[2])
2033             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2034         if (!skip_dct[3])
2035             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2036         if (!skip_dct[4])
2037             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2038         if (!skip_dct[5])
2039             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2040         if (!s->chroma_y_shift) { /* 422 */
2041             if (!skip_dct[6])
2042                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2043                                   wrap_c);
2044             if (!skip_dct[7])
2045                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2046                                   wrap_c);
2047         }
2048         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
2049     }
2050
2051     /* DCT & quantize */
2052     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2053     {
2054         for (i = 0; i < mb_block_count; i++) {
2055             if (!skip_dct[i]) {
2056                 int overflow;
2057                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2058                 // FIXME we could decide to change to quantizer instead of
2059                 // clipping
2060                 // JS: I don't think that would be a good idea it could lower
2061                 //     quality instead of improve it. Just INTRADC clipping
2062                 //     deserves changes in quantizer
2063                 if (overflow)
2064                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2065             } else
2066                 s->block_last_index[i] = -1;
2067         }
2068         if (s->quantizer_noise_shaping) {
2069             for (i = 0; i < mb_block_count; i++) {
2070                 if (!skip_dct[i]) {
2071                     s->block_last_index[i] =
2072                         dct_quantize_refine(s, s->block[i], weight[i],
2073                                             orig[i], i, s->qscale);
2074                 }
2075             }
2076         }
2077
2078         if (s->luma_elim_threshold && !s->mb_intra)
2079             for (i = 0; i < 4; i++)
2080                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2081         if (s->chroma_elim_threshold && !s->mb_intra)
2082             for (i = 4; i < mb_block_count; i++)
2083                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2084
2085         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2086             for (i = 0; i < mb_block_count; i++) {
2087                 if (s->block_last_index[i] == -1)
2088                     s->coded_score[i] = INT_MAX / 256;
2089             }
2090         }
2091     }
2092
2093     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2094         s->block_last_index[4] =
2095         s->block_last_index[5] = 0;
2096         s->block[4][0] =
2097         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2098     }
2099
2100     // non c quantize code returns incorrect block_last_index FIXME
2101     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2102         for (i = 0; i < mb_block_count; i++) {
2103             int j;
2104             if (s->block_last_index[i] > 0) {
2105                 for (j = 63; j > 0; j--) {
2106                     if (s->block[i][s->intra_scantable.permutated[j]])
2107                         break;
2108                 }
2109                 s->block_last_index[i] = j;
2110             }
2111         }
2112     }
2113
2114     /* huffman encode */
2115     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2116     case AV_CODEC_ID_MPEG1VIDEO:
2117     case AV_CODEC_ID_MPEG2VIDEO:
2118         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2119             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2120         break;
2121     case AV_CODEC_ID_MPEG4:
2122         if (CONFIG_MPEG4_ENCODER)
2123             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2124         break;
2125     case AV_CODEC_ID_MSMPEG4V2:
2126     case AV_CODEC_ID_MSMPEG4V3:
2127     case AV_CODEC_ID_WMV1:
2128         if (CONFIG_MSMPEG4_ENCODER)
2129             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2130         break;
2131     case AV_CODEC_ID_WMV2:
2132         if (CONFIG_WMV2_ENCODER)
2133             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2134         break;
2135     case AV_CODEC_ID_H261:
2136         if (CONFIG_H261_ENCODER)
2137             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2138         break;
2139     case AV_CODEC_ID_H263:
2140     case AV_CODEC_ID_H263P:
2141     case AV_CODEC_ID_FLV1:
2142     case AV_CODEC_ID_RV10:
2143     case AV_CODEC_ID_RV20:
2144         if (CONFIG_H263_ENCODER)
2145             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2146         break;
2147     case AV_CODEC_ID_MJPEG:
2148     case AV_CODEC_ID_AMV:
2149         if (CONFIG_MJPEG_ENCODER)
2150             ff_mjpeg_encode_mb(s, s->block);
2151         break;
2152     default:
2153         av_assert1(0);
2154     }
2155 }
2156
2157 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2158 {
2159     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2160     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2161 }
2162
2163 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2164     int i;
2165
2166     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2167
2168     /* mpeg1 */
2169     d->mb_skip_run= s->mb_skip_run;
2170     for(i=0; i<3; i++)
2171         d->last_dc[i] = s->last_dc[i];
2172
2173     /* statistics */
2174     d->mv_bits= s->mv_bits;
2175     d->i_tex_bits= s->i_tex_bits;
2176     d->p_tex_bits= s->p_tex_bits;
2177     d->i_count= s->i_count;
2178     d->f_count= s->f_count;
2179     d->b_count= s->b_count;
2180     d->skip_count= s->skip_count;
2181     d->misc_bits= s->misc_bits;
2182     d->last_bits= 0;
2183
2184     d->mb_skipped= 0;
2185     d->qscale= s->qscale;
2186     d->dquant= s->dquant;
2187
2188     d->esc3_level_length= s->esc3_level_length;
2189 }
2190
2191 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2192     int i;
2193
2194     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2195     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2196
2197     /* mpeg1 */
2198     d->mb_skip_run= s->mb_skip_run;
2199     for(i=0; i<3; i++)
2200         d->last_dc[i] = s->last_dc[i];
2201
2202     /* statistics */
2203     d->mv_bits= s->mv_bits;
2204     d->i_tex_bits= s->i_tex_bits;
2205     d->p_tex_bits= s->p_tex_bits;
2206     d->i_count= s->i_count;
2207     d->f_count= s->f_count;
2208     d->b_count= s->b_count;
2209     d->skip_count= s->skip_count;
2210     d->misc_bits= s->misc_bits;
2211
2212     d->mb_intra= s->mb_intra;
2213     d->mb_skipped= s->mb_skipped;
2214     d->mv_type= s->mv_type;
2215     d->mv_dir= s->mv_dir;
2216     d->pb= s->pb;
2217     if(s->data_partitioning){
2218         d->pb2= s->pb2;
2219         d->tex_pb= s->tex_pb;
2220     }
2221     d->block= s->block;
2222     for(i=0; i<8; i++)
2223         d->block_last_index[i]= s->block_last_index[i];
2224     d->interlaced_dct= s->interlaced_dct;
2225     d->qscale= s->qscale;
2226
2227     d->esc3_level_length= s->esc3_level_length;
2228 }
2229
2230 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2231                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2232                            int *dmin, int *next_block, int motion_x, int motion_y)
2233 {
2234     int score;
2235     uint8_t *dest_backup[3];
2236
2237     copy_context_before_encode(s, backup, type);
2238
2239     s->block= s->blocks[*next_block];
2240     s->pb= pb[*next_block];
2241     if(s->data_partitioning){
2242         s->pb2   = pb2   [*next_block];
2243         s->tex_pb= tex_pb[*next_block];
2244     }
2245
2246     if(*next_block){
2247         memcpy(dest_backup, s->dest, sizeof(s->dest));
2248         s->dest[0] = s->rd_scratchpad;
2249         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2250         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2251         assert(s->linesize >= 32); //FIXME
2252     }
2253
2254     encode_mb(s, motion_x, motion_y);
2255
2256     score= put_bits_count(&s->pb);
2257     if(s->data_partitioning){
2258         score+= put_bits_count(&s->pb2);
2259         score+= put_bits_count(&s->tex_pb);
2260     }
2261
2262     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2263         ff_MPV_decode_mb(s, s->block);
2264
2265         score *= s->lambda2;
2266         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2267     }
2268
2269     if(*next_block){
2270         memcpy(s->dest, dest_backup, sizeof(s->dest));
2271     }
2272
2273     if(score<*dmin){
2274         *dmin= score;
2275         *next_block^=1;
2276
2277         copy_context_after_encode(best, s, type);
2278     }
2279 }
2280
2281 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2282     uint32_t *sq = ff_squareTbl + 256;
2283     int acc=0;
2284     int x,y;
2285
2286     if(w==16 && h==16)
2287         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2288     else if(w==8 && h==8)
2289         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2290
2291     for(y=0; y<h; y++){
2292         for(x=0; x<w; x++){
2293             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2294         }
2295     }
2296
2297     av_assert2(acc>=0);
2298
2299     return acc;
2300 }
2301
2302 static int sse_mb(MpegEncContext *s){
2303     int w= 16;
2304     int h= 16;
2305
2306     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2307     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2308
2309     if(w==16 && h==16)
2310       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2311         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2312                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2313                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2314       }else{
2315         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2316                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2317                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2318       }
2319     else
2320         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2321                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2322                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2323 }
2324
2325 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2326     MpegEncContext *s= *(void**)arg;
2327
2328
2329     s->me.pre_pass=1;
2330     s->me.dia_size= s->avctx->pre_dia_size;
2331     s->first_slice_line=1;
2332     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2333         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2334             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2335         }
2336         s->first_slice_line=0;
2337     }
2338
2339     s->me.pre_pass=0;
2340
2341     return 0;
2342 }
2343
2344 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2345     MpegEncContext *s= *(void**)arg;
2346
2347     ff_check_alignment();
2348
2349     s->me.dia_size= s->avctx->dia_size;
2350     s->first_slice_line=1;
2351     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2352         s->mb_x=0; //for block init below
2353         ff_init_block_index(s);
2354         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2355             s->block_index[0]+=2;
2356             s->block_index[1]+=2;
2357             s->block_index[2]+=2;
2358             s->block_index[3]+=2;
2359
2360             /* compute motion vector & mb_type and store in context */
2361             if(s->pict_type==AV_PICTURE_TYPE_B)
2362                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2363             else
2364                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2365         }
2366         s->first_slice_line=0;
2367     }
2368     return 0;
2369 }
2370
2371 static int mb_var_thread(AVCodecContext *c, void *arg){
2372     MpegEncContext *s= *(void**)arg;
2373     int mb_x, mb_y;
2374
2375     ff_check_alignment();
2376
2377     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2378         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2379             int xx = mb_x * 16;
2380             int yy = mb_y * 16;
2381             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2382             int varc;
2383             int sum = s->dsp.pix_sum(pix, s->linesize);
2384
2385             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2386
2387             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2388             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2389             s->me.mb_var_sum_temp    += varc;
2390         }
2391     }
2392     return 0;
2393 }
2394
2395 static void write_slice_end(MpegEncContext *s){
2396     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2397         if(s->partitioned_frame){
2398             ff_mpeg4_merge_partitions(s);
2399         }
2400
2401         ff_mpeg4_stuffing(&s->pb);
2402     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2403         ff_mjpeg_encode_stuffing(s);
2404     }
2405
2406     avpriv_align_put_bits(&s->pb);
2407     flush_put_bits(&s->pb);
2408
2409     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2410         s->misc_bits+= get_bits_diff(s);
2411 }
2412
2413 static void write_mb_info(MpegEncContext *s)
2414 {
2415     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2416     int offset = put_bits_count(&s->pb);
2417     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2418     int gobn = s->mb_y / s->gob_index;
2419     int pred_x, pred_y;
2420     if (CONFIG_H263_ENCODER)
2421         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2422     bytestream_put_le32(&ptr, offset);
2423     bytestream_put_byte(&ptr, s->qscale);
2424     bytestream_put_byte(&ptr, gobn);
2425     bytestream_put_le16(&ptr, mba);
2426     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2427     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2428     /* 4MV not implemented */
2429     bytestream_put_byte(&ptr, 0); /* hmv2 */
2430     bytestream_put_byte(&ptr, 0); /* vmv2 */
2431 }
2432
2433 static void update_mb_info(MpegEncContext *s, int startcode)
2434 {
2435     if (!s->mb_info)
2436         return;
2437     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2438         s->mb_info_size += 12;
2439         s->prev_mb_info = s->last_mb_info;
2440     }
2441     if (startcode) {
2442         s->prev_mb_info = put_bits_count(&s->pb)/8;
2443         /* This might have incremented mb_info_size above, and we return without
2444          * actually writing any info into that slot yet. But in that case,
2445          * this will be called again at the start of the after writing the
2446          * start code, actually writing the mb info. */
2447         return;
2448     }
2449
2450     s->last_mb_info = put_bits_count(&s->pb)/8;
2451     if (!s->mb_info_size)
2452         s->mb_info_size += 12;
2453     write_mb_info(s);
2454 }
2455
2456 static int encode_thread(AVCodecContext *c, void *arg){
2457     MpegEncContext *s= *(void**)arg;
2458     int mb_x, mb_y, pdif = 0;
2459     int chr_h= 16>>s->chroma_y_shift;
2460     int i, j;
2461     MpegEncContext best_s, backup_s;
2462     uint8_t bit_buf[2][MAX_MB_BYTES];
2463     uint8_t bit_buf2[2][MAX_MB_BYTES];
2464     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2465     PutBitContext pb[2], pb2[2], tex_pb[2];
2466 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2467
2468     ff_check_alignment();
2469
2470     for(i=0; i<2; i++){
2471         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2472         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2473         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2474     }
2475
2476     s->last_bits= put_bits_count(&s->pb);
2477     s->mv_bits=0;
2478     s->misc_bits=0;
2479     s->i_tex_bits=0;
2480     s->p_tex_bits=0;
2481     s->i_count=0;
2482     s->f_count=0;
2483     s->b_count=0;
2484     s->skip_count=0;
2485
2486     for(i=0; i<3; i++){
2487         /* init last dc values */
2488         /* note: quant matrix value (8) is implied here */
2489         s->last_dc[i] = 128 << s->intra_dc_precision;
2490
2491         s->current_picture.f.error[i] = 0;
2492     }
2493     if(s->codec_id==AV_CODEC_ID_AMV){
2494         s->last_dc[0] = 128*8/13;
2495         s->last_dc[1] = 128*8/14;
2496         s->last_dc[2] = 128*8/14;
2497     }
2498     s->mb_skip_run = 0;
2499     memset(s->last_mv, 0, sizeof(s->last_mv));
2500
2501     s->last_mv_dir = 0;
2502
2503     switch(s->codec_id){
2504     case AV_CODEC_ID_H263:
2505     case AV_CODEC_ID_H263P:
2506     case AV_CODEC_ID_FLV1:
2507         if (CONFIG_H263_ENCODER)
2508             s->gob_index = ff_h263_get_gob_height(s);
2509         break;
2510     case AV_CODEC_ID_MPEG4:
2511         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2512             ff_mpeg4_init_partitions(s);
2513         break;
2514     }
2515
2516     s->resync_mb_x=0;
2517     s->resync_mb_y=0;
2518     s->first_slice_line = 1;
2519     s->ptr_lastgob = s->pb.buf;
2520     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2521 //    printf("row %d at %X\n", s->mb_y, (int)s);
2522         s->mb_x=0;
2523         s->mb_y= mb_y;
2524
2525         ff_set_qscale(s, s->qscale);
2526         ff_init_block_index(s);
2527
2528         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2529             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2530             int mb_type= s->mb_type[xy];
2531 //            int d;
2532             int dmin= INT_MAX;
2533             int dir;
2534
2535             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2536                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2537                 return -1;
2538             }
2539             if(s->data_partitioning){
2540                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2541                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2542                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2543                     return -1;
2544                 }
2545             }
2546
2547             s->mb_x = mb_x;
2548             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2549             ff_update_block_index(s);
2550
2551             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2552                 ff_h261_reorder_mb_index(s);
2553                 xy= s->mb_y*s->mb_stride + s->mb_x;
2554                 mb_type= s->mb_type[xy];
2555             }
2556
2557             /* write gob / video packet header  */
2558             if(s->rtp_mode){
2559                 int current_packet_size, is_gob_start;
2560
2561                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2562
2563                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2564
2565                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2566
2567                 switch(s->codec_id){
2568                 case AV_CODEC_ID_H263:
2569                 case AV_CODEC_ID_H263P:
2570                     if(!s->h263_slice_structured)
2571                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2572                     break;
2573                 case AV_CODEC_ID_MPEG2VIDEO:
2574                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2575                 case AV_CODEC_ID_MPEG1VIDEO:
2576                     if(s->mb_skip_run) is_gob_start=0;
2577                     break;
2578                 case AV_CODEC_ID_MJPEG:
2579                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2580                     break;
2581                 }
2582
2583                 if(is_gob_start){
2584                     if(s->start_mb_y != mb_y || mb_x!=0){
2585                         write_slice_end(s);
2586                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2587                             ff_mpeg4_init_partitions(s);
2588                         }
2589                     }
2590
2591                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2592                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2593
2594                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2595                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2596                         int d= 100 / s->avctx->error_rate;
2597                         if(r % d == 0){
2598                             current_packet_size=0;
2599                             s->pb.buf_ptr= s->ptr_lastgob;
2600                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2601                         }
2602                     }
2603
2604                     if (s->avctx->rtp_callback){
2605                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2606                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2607                     }
2608                     update_mb_info(s, 1);
2609
2610                     switch(s->codec_id){
2611                     case AV_CODEC_ID_MPEG4:
2612                         if (CONFIG_MPEG4_ENCODER) {
2613                             ff_mpeg4_encode_video_packet_header(s);
2614                             ff_mpeg4_clean_buffers(s);
2615                         }
2616                     break;
2617                     case AV_CODEC_ID_MPEG1VIDEO:
2618                     case AV_CODEC_ID_MPEG2VIDEO:
2619                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2620                             ff_mpeg1_encode_slice_header(s);
2621                             ff_mpeg1_clean_buffers(s);
2622                         }
2623                     break;
2624                     case AV_CODEC_ID_H263:
2625                     case AV_CODEC_ID_H263P:
2626                         if (CONFIG_H263_ENCODER)
2627                             ff_h263_encode_gob_header(s, mb_y);
2628                     break;
2629                     }
2630
2631                     if(s->flags&CODEC_FLAG_PASS1){
2632                         int bits= put_bits_count(&s->pb);
2633                         s->misc_bits+= bits - s->last_bits;
2634                         s->last_bits= bits;
2635                     }
2636
2637                     s->ptr_lastgob += current_packet_size;
2638                     s->first_slice_line=1;
2639                     s->resync_mb_x=mb_x;
2640                     s->resync_mb_y=mb_y;
2641                 }
2642             }
2643
2644             if(  (s->resync_mb_x   == s->mb_x)
2645                && s->resync_mb_y+1 == s->mb_y){
2646                 s->first_slice_line=0;
2647             }
2648
2649             s->mb_skipped=0;
2650             s->dquant=0; //only for QP_RD
2651
2652             update_mb_info(s, 0);
2653
2654             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2655                 int next_block=0;
2656                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2657
2658                 copy_context_before_encode(&backup_s, s, -1);
2659                 backup_s.pb= s->pb;
2660                 best_s.data_partitioning= s->data_partitioning;
2661                 best_s.partitioned_frame= s->partitioned_frame;
2662                 if(s->data_partitioning){
2663                     backup_s.pb2= s->pb2;
2664                     backup_s.tex_pb= s->tex_pb;
2665                 }
2666
2667                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2668                     s->mv_dir = MV_DIR_FORWARD;
2669                     s->mv_type = MV_TYPE_16X16;
2670                     s->mb_intra= 0;
2671                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2672                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2673                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2674                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2675                 }
2676                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2677                     s->mv_dir = MV_DIR_FORWARD;
2678                     s->mv_type = MV_TYPE_FIELD;
2679                     s->mb_intra= 0;
2680                     for(i=0; i<2; i++){
2681                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2682                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2683                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2684                     }
2685                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2686                                  &dmin, &next_block, 0, 0);
2687                 }
2688                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2689                     s->mv_dir = MV_DIR_FORWARD;
2690                     s->mv_type = MV_TYPE_16X16;
2691                     s->mb_intra= 0;
2692                     s->mv[0][0][0] = 0;
2693                     s->mv[0][0][1] = 0;
2694                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2695                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2696                 }
2697                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2698                     s->mv_dir = MV_DIR_FORWARD;
2699                     s->mv_type = MV_TYPE_8X8;
2700                     s->mb_intra= 0;
2701                     for(i=0; i<4; i++){
2702                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2703                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2704                     }
2705                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2706                                  &dmin, &next_block, 0, 0);
2707                 }
2708                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2709                     s->mv_dir = MV_DIR_FORWARD;
2710                     s->mv_type = MV_TYPE_16X16;
2711                     s->mb_intra= 0;
2712                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2713                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2714                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2715                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2716                 }
2717                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2718                     s->mv_dir = MV_DIR_BACKWARD;
2719                     s->mv_type = MV_TYPE_16X16;
2720                     s->mb_intra= 0;
2721                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2722                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2723                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2724                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2725                 }
2726                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2727                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2728                     s->mv_type = MV_TYPE_16X16;
2729                     s->mb_intra= 0;
2730                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2731                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2732                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2733                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2734                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2735                                  &dmin, &next_block, 0, 0);
2736                 }
2737                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2738                     s->mv_dir = MV_DIR_FORWARD;
2739                     s->mv_type = MV_TYPE_FIELD;
2740                     s->mb_intra= 0;
2741                     for(i=0; i<2; i++){
2742                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2743                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2744                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2745                     }
2746                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2747                                  &dmin, &next_block, 0, 0);
2748                 }
2749                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2750                     s->mv_dir = MV_DIR_BACKWARD;
2751                     s->mv_type = MV_TYPE_FIELD;
2752                     s->mb_intra= 0;
2753                     for(i=0; i<2; i++){
2754                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2755                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2756                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2757                     }
2758                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2759                                  &dmin, &next_block, 0, 0);
2760                 }
2761                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2762                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2763                     s->mv_type = MV_TYPE_FIELD;
2764                     s->mb_intra= 0;
2765                     for(dir=0; dir<2; dir++){
2766                         for(i=0; i<2; i++){
2767                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2768                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2769                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2770                         }
2771                     }
2772                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2773                                  &dmin, &next_block, 0, 0);
2774                 }
2775                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2776                     s->mv_dir = 0;
2777                     s->mv_type = MV_TYPE_16X16;
2778                     s->mb_intra= 1;
2779                     s->mv[0][0][0] = 0;
2780                     s->mv[0][0][1] = 0;
2781                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2782                                  &dmin, &next_block, 0, 0);
2783                     if(s->h263_pred || s->h263_aic){
2784                         if(best_s.mb_intra)
2785                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2786                         else
2787                             ff_clean_intra_table_entries(s); //old mode?
2788                     }
2789                 }
2790
2791                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2792                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2793                         const int last_qp= backup_s.qscale;
2794                         int qpi, qp, dc[6];
2795                         DCTELEM ac[6][16];
2796                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2797                         static const int dquant_tab[4]={-1,1,-2,2};
2798
2799                         av_assert2(backup_s.dquant == 0);
2800
2801                         //FIXME intra
2802                         s->mv_dir= best_s.mv_dir;
2803                         s->mv_type = MV_TYPE_16X16;
2804                         s->mb_intra= best_s.mb_intra;
2805                         s->mv[0][0][0] = best_s.mv[0][0][0];
2806                         s->mv[0][0][1] = best_s.mv[0][0][1];
2807                         s->mv[1][0][0] = best_s.mv[1][0][0];
2808                         s->mv[1][0][1] = best_s.mv[1][0][1];
2809
2810                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2811                         for(; qpi<4; qpi++){
2812                             int dquant= dquant_tab[qpi];
2813                             qp= last_qp + dquant;
2814                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2815                                 continue;
2816                             backup_s.dquant= dquant;
2817                             if(s->mb_intra && s->dc_val[0]){
2818                                 for(i=0; i<6; i++){
2819                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2820                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2821                                 }
2822                             }
2823
2824                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2825                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2826                             if(best_s.qscale != qp){
2827                                 if(s->mb_intra && s->dc_val[0]){
2828                                     for(i=0; i<6; i++){
2829                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2830                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2831                                     }
2832                                 }
2833                             }
2834                         }
2835                     }
2836                 }
2837                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2838                     int mx= s->b_direct_mv_table[xy][0];
2839                     int my= s->b_direct_mv_table[xy][1];
2840
2841                     backup_s.dquant = 0;
2842                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2843                     s->mb_intra= 0;
2844                     ff_mpeg4_set_direct_mv(s, mx, my);
2845                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2846                                  &dmin, &next_block, mx, my);
2847                 }
2848                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2849                     backup_s.dquant = 0;
2850                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2851                     s->mb_intra= 0;
2852                     ff_mpeg4_set_direct_mv(s, 0, 0);
2853                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2854                                  &dmin, &next_block, 0, 0);
2855                 }
2856                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2857                     int coded=0;
2858                     for(i=0; i<6; i++)
2859                         coded |= s->block_last_index[i];
2860                     if(coded){
2861                         int mx,my;
2862                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2863                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2864                             mx=my=0; //FIXME find the one we actually used
2865                             ff_mpeg4_set_direct_mv(s, mx, my);
2866                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2867                             mx= s->mv[1][0][0];
2868                             my= s->mv[1][0][1];
2869                         }else{
2870                             mx= s->mv[0][0][0];
2871                             my= s->mv[0][0][1];
2872                         }
2873
2874                         s->mv_dir= best_s.mv_dir;
2875                         s->mv_type = best_s.mv_type;
2876                         s->mb_intra= 0;
2877 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2878                         s->mv[0][0][1] = best_s.mv[0][0][1];
2879                         s->mv[1][0][0] = best_s.mv[1][0][0];
2880                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2881                         backup_s.dquant= 0;
2882                         s->skipdct=1;
2883                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2884                                         &dmin, &next_block, mx, my);
2885                         s->skipdct=0;
2886                     }
2887                 }
2888
2889                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2890
2891                 copy_context_after_encode(s, &best_s, -1);
2892
2893                 pb_bits_count= put_bits_count(&s->pb);
2894                 flush_put_bits(&s->pb);
2895                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2896                 s->pb= backup_s.pb;
2897
2898                 if(s->data_partitioning){
2899                     pb2_bits_count= put_bits_count(&s->pb2);
2900                     flush_put_bits(&s->pb2);
2901                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2902                     s->pb2= backup_s.pb2;
2903
2904                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2905                     flush_put_bits(&s->tex_pb);
2906                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2907                     s->tex_pb= backup_s.tex_pb;
2908                 }
2909                 s->last_bits= put_bits_count(&s->pb);
2910
2911                 if (CONFIG_H263_ENCODER &&
2912                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2913                     ff_h263_update_motion_val(s);
2914
2915                 if(next_block==0){ //FIXME 16 vs linesize16
2916                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2917                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2918                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2919                 }
2920
2921                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2922                     ff_MPV_decode_mb(s, s->block);
2923             } else {
2924                 int motion_x = 0, motion_y = 0;
2925                 s->mv_type=MV_TYPE_16X16;
2926                 // only one MB-Type possible
2927
2928                 switch(mb_type){
2929                 case CANDIDATE_MB_TYPE_INTRA:
2930                     s->mv_dir = 0;
2931                     s->mb_intra= 1;
2932                     motion_x= s->mv[0][0][0] = 0;
2933                     motion_y= s->mv[0][0][1] = 0;
2934                     break;
2935                 case CANDIDATE_MB_TYPE_INTER:
2936                     s->mv_dir = MV_DIR_FORWARD;
2937                     s->mb_intra= 0;
2938                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2939                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2940                     break;
2941                 case CANDIDATE_MB_TYPE_INTER_I:
2942                     s->mv_dir = MV_DIR_FORWARD;
2943                     s->mv_type = MV_TYPE_FIELD;
2944                     s->mb_intra= 0;
2945                     for(i=0; i<2; i++){
2946                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2947                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2948                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2949                     }
2950                     break;
2951                 case CANDIDATE_MB_TYPE_INTER4V:
2952                     s->mv_dir = MV_DIR_FORWARD;
2953                     s->mv_type = MV_TYPE_8X8;
2954                     s->mb_intra= 0;
2955                     for(i=0; i<4; i++){
2956                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2957                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2958                     }
2959                     break;
2960                 case CANDIDATE_MB_TYPE_DIRECT:
2961                     if (CONFIG_MPEG4_ENCODER) {
2962                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2963                         s->mb_intra= 0;
2964                         motion_x=s->b_direct_mv_table[xy][0];
2965                         motion_y=s->b_direct_mv_table[xy][1];
2966                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2967                     }
2968                     break;
2969                 case CANDIDATE_MB_TYPE_DIRECT0:
2970                     if (CONFIG_MPEG4_ENCODER) {
2971                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2972                         s->mb_intra= 0;
2973                         ff_mpeg4_set_direct_mv(s, 0, 0);
2974                     }
2975                     break;
2976                 case CANDIDATE_MB_TYPE_BIDIR:
2977                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2978                     s->mb_intra= 0;
2979                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2980                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2981                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2982                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2983                     break;
2984                 case CANDIDATE_MB_TYPE_BACKWARD:
2985                     s->mv_dir = MV_DIR_BACKWARD;
2986                     s->mb_intra= 0;
2987                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2988                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2989                     break;
2990                 case CANDIDATE_MB_TYPE_FORWARD:
2991                     s->mv_dir = MV_DIR_FORWARD;
2992                     s->mb_intra= 0;
2993                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2994                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2995 //                    printf(" %d %d ", motion_x, motion_y);
2996                     break;
2997                 case CANDIDATE_MB_TYPE_FORWARD_I:
2998                     s->mv_dir = MV_DIR_FORWARD;
2999                     s->mv_type = MV_TYPE_FIELD;
3000                     s->mb_intra= 0;
3001                     for(i=0; i<2; i++){
3002                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3003                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3004                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3005                     }
3006                     break;
3007                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3008                     s->mv_dir = MV_DIR_BACKWARD;
3009                     s->mv_type = MV_TYPE_FIELD;
3010                     s->mb_intra= 0;
3011                     for(i=0; i<2; i++){
3012                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3013                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3014                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3015                     }
3016                     break;
3017                 case CANDIDATE_MB_TYPE_BIDIR_I:
3018                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3019                     s->mv_type = MV_TYPE_FIELD;
3020                     s->mb_intra= 0;
3021                     for(dir=0; dir<2; dir++){
3022                         for(i=0; i<2; i++){
3023                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3024                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3025                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3026                         }
3027                     }
3028                     break;
3029                 default:
3030                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3031                 }
3032
3033                 encode_mb(s, motion_x, motion_y);
3034
3035                 // RAL: Update last macroblock type
3036                 s->last_mv_dir = s->mv_dir;
3037
3038                 if (CONFIG_H263_ENCODER &&
3039                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3040                     ff_h263_update_motion_val(s);
3041
3042                 ff_MPV_decode_mb(s, s->block);
3043             }
3044
3045             /* clean the MV table in IPS frames for direct mode in B frames */
3046             if(s->mb_intra /* && I,P,S_TYPE */){
3047                 s->p_mv_table[xy][0]=0;
3048                 s->p_mv_table[xy][1]=0;
3049             }
3050
3051             if(s->flags&CODEC_FLAG_PSNR){
3052                 int w= 16;
3053                 int h= 16;
3054
3055                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3056                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3057
3058                 s->current_picture.f.error[0] += sse(
3059                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3060                     s->dest[0], w, h, s->linesize);
3061                 s->current_picture.f.error[1] += sse(
3062                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3063                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3064                 s->current_picture.f.error[2] += sse(
3065                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3066                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3067             }
3068             if(s->loop_filter){
3069                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3070                     ff_h263_loop_filter(s);
3071             }
3072 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3073         }
3074     }
3075
3076     //not beautiful here but we must write it before flushing so it has to be here
3077     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3078         ff_msmpeg4_encode_ext_header(s);
3079
3080     write_slice_end(s);
3081
3082     /* Send the last GOB if RTP */
3083     if (s->avctx->rtp_callback) {
3084         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3085         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3086         /* Call the RTP callback to send the last GOB */
3087         emms_c();
3088         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3089     }
3090
3091     return 0;
3092 }
3093
3094 #define MERGE(field) dst->field += src->field; src->field=0
3095 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3096     MERGE(me.scene_change_score);
3097     MERGE(me.mc_mb_var_sum_temp);
3098     MERGE(me.mb_var_sum_temp);
3099 }
3100
3101 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3102     int i;
3103
3104     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3105     MERGE(dct_count[1]);
3106     MERGE(mv_bits);
3107     MERGE(i_tex_bits);
3108     MERGE(p_tex_bits);
3109     MERGE(i_count);
3110     MERGE(f_count);
3111     MERGE(b_count);
3112     MERGE(skip_count);
3113     MERGE(misc_bits);
3114     MERGE(error_count);
3115     MERGE(padding_bug_score);
3116     MERGE(current_picture.f.error[0]);
3117     MERGE(current_picture.f.error[1]);
3118     MERGE(current_picture.f.error[2]);
3119
3120     if(dst->avctx->noise_reduction){
3121         for(i=0; i<64; i++){
3122             MERGE(dct_error_sum[0][i]);
3123             MERGE(dct_error_sum[1][i]);
3124         }
3125     }
3126
3127     assert(put_bits_count(&src->pb) % 8 ==0);
3128     assert(put_bits_count(&dst->pb) % 8 ==0);
3129     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3130     flush_put_bits(&dst->pb);
3131 }
3132
3133 static int estimate_qp(MpegEncContext *s, int dry_run){
3134     if (s->next_lambda){
3135         s->current_picture_ptr->f.quality =
3136         s->current_picture.f.quality = s->next_lambda;
3137         if(!dry_run) s->next_lambda= 0;
3138     } else if (!s->fixed_qscale) {
3139         s->current_picture_ptr->f.quality =
3140         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3141         if (s->current_picture.f.quality < 0)
3142             return -1;
3143     }
3144
3145     if(s->adaptive_quant){
3146         switch(s->codec_id){
3147         case AV_CODEC_ID_MPEG4:
3148             if (CONFIG_MPEG4_ENCODER)
3149                 ff_clean_mpeg4_qscales(s);
3150             break;
3151         case AV_CODEC_ID_H263:
3152         case AV_CODEC_ID_H263P:
3153         case AV_CODEC_ID_FLV1:
3154             if (CONFIG_H263_ENCODER)
3155                 ff_clean_h263_qscales(s);
3156             break;
3157         default:
3158             ff_init_qscale_tab(s);
3159         }
3160
3161         s->lambda= s->lambda_table[0];
3162         //FIXME broken
3163     }else
3164         s->lambda = s->current_picture.f.quality;
3165 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3166     update_qscale(s);
3167     return 0;
3168 }
3169
3170 /* must be called before writing the header */
3171 static void set_frame_distances(MpegEncContext * s){
3172     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3173     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3174
3175     if(s->pict_type==AV_PICTURE_TYPE_B){
3176         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3177         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3178     }else{
3179         s->pp_time= s->time - s->last_non_b_time;
3180         s->last_non_b_time= s->time;
3181         assert(s->picture_number==0 || s->pp_time > 0);
3182     }
3183 }
3184
3185 static int encode_picture(MpegEncContext *s, int picture_number)
3186 {
3187     int i;
3188     int bits;
3189     int context_count = s->slice_context_count;
3190
3191     s->picture_number = picture_number;
3192
3193     /* Reset the average MB variance */
3194     s->me.mb_var_sum_temp    =
3195     s->me.mc_mb_var_sum_temp = 0;
3196
3197     /* we need to initialize some time vars before we can encode b-frames */
3198     // RAL: Condition added for MPEG1VIDEO
3199     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3200         set_frame_distances(s);
3201     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3202         ff_set_mpeg4_time(s);
3203
3204     s->me.scene_change_score=0;
3205
3206 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3207
3208     if(s->pict_type==AV_PICTURE_TYPE_I){
3209         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3210         else                        s->no_rounding=0;
3211     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3212         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3213             s->no_rounding ^= 1;
3214     }
3215
3216     if(s->flags & CODEC_FLAG_PASS2){
3217         if (estimate_qp(s,1) < 0)
3218             return -1;
3219         ff_get_2pass_fcode(s);
3220     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3221         if(s->pict_type==AV_PICTURE_TYPE_B)
3222             s->lambda= s->last_lambda_for[s->pict_type];
3223         else
3224             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3225         update_qscale(s);
3226     }
3227
3228     if(s->codec_id != AV_CODEC_ID_AMV){
3229         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3230         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3231         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3232         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3233     }
3234
3235     s->mb_intra=0; //for the rate distortion & bit compare functions
3236     for(i=1; i<context_count; i++){
3237         ff_update_duplicate_context(s->thread_context[i], s);
3238     }
3239
3240     if(ff_init_me(s)<0)
3241         return -1;
3242
3243     /* Estimate motion for every MB */
3244     if(s->pict_type != AV_PICTURE_TYPE_I){
3245         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3246         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3247         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3248             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3249                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3250             }
3251         }
3252
3253         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3254     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3255         /* I-Frame */
3256         for(i=0; i<s->mb_stride*s->mb_height; i++)
3257             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3258
3259         if(!s->fixed_qscale){
3260             /* finding spatial complexity for I-frame rate control */
3261             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3262         }
3263     }
3264     for(i=1; i<context_count; i++){
3265         merge_context_after_me(s, s->thread_context[i]);
3266     }
3267     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3268     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3269     emms_c();
3270
3271     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3272         s->pict_type= AV_PICTURE_TYPE_I;
3273         for(i=0; i<s->mb_stride*s->mb_height; i++)
3274             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3275 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3276         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3277     }
3278
3279     if(!s->umvplus){
3280         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3281             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3282
3283             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3284                 int a,b;
3285                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3286                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3287                 s->f_code= FFMAX3(s->f_code, a, b);
3288             }
3289
3290             ff_fix_long_p_mvs(s);
3291             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3292             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3293                 int j;
3294                 for(i=0; i<2; i++){
3295                     for(j=0; j<2; j++)
3296                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3297                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3298                 }
3299             }
3300         }
3301
3302         if(s->pict_type==AV_PICTURE_TYPE_B){
3303             int a, b;
3304
3305             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3306             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3307             s->f_code = FFMAX(a, b);
3308
3309             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3310             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3311             s->b_code = FFMAX(a, b);
3312
3313             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3314             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3315             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3316             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3317             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3318                 int dir, j;
3319                 for(dir=0; dir<2; dir++){
3320                     for(i=0; i<2; i++){
3321                         for(j=0; j<2; j++){
3322                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3323                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3324                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3325                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3326                         }
3327                     }
3328                 }
3329             }
3330         }
3331     }
3332
3333     if (estimate_qp(s, 0) < 0)
3334         return -1;
3335
3336     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3337         s->qscale= 3; //reduce clipping problems
3338
3339     if (s->out_format == FMT_MJPEG) {
3340         /* for mjpeg, we do include qscale in the matrix */
3341         for(i=1;i<64;i++){
3342             int j= s->dsp.idct_permutation[i];
3343
3344             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3345         }
3346         s->y_dc_scale_table=
3347         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3348         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3349         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3350                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3351         s->qscale= 8;
3352     }
3353     if(s->codec_id == AV_CODEC_ID_AMV){
3354         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3355         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3356         for(i=1;i<64;i++){
3357             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3358
3359             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3360             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3361         }
3362         s->y_dc_scale_table= y;
3363         s->c_dc_scale_table= c;
3364         s->intra_matrix[0] = 13;
3365         s->chroma_intra_matrix[0] = 14;
3366         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3367                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3368         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3369                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3370         s->qscale= 8;
3371     }
3372
3373     //FIXME var duplication
3374     s->current_picture_ptr->f.key_frame =
3375     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3376     s->current_picture_ptr->f.pict_type =
3377     s->current_picture.f.pict_type = s->pict_type;
3378
3379     if (s->current_picture.f.key_frame)
3380         s->picture_in_gop_number=0;
3381
3382     s->mb_x = s->mb_y = 0;
3383     s->last_bits= put_bits_count(&s->pb);
3384     switch(s->out_format) {
3385     case FMT_MJPEG:
3386         if (CONFIG_MJPEG_ENCODER)
3387             ff_mjpeg_encode_picture_header(s);
3388         break;
3389     case FMT_H261:
3390         if (CONFIG_H261_ENCODER)
3391             ff_h261_encode_picture_header(s, picture_number);
3392         break;
3393     case FMT_H263:
3394         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3395             ff_wmv2_encode_picture_header(s, picture_number);
3396         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3397             ff_msmpeg4_encode_picture_header(s, picture_number);
3398         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3399             ff_mpeg4_encode_picture_header(s, picture_number);
3400         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3401             ff_rv10_encode_picture_header(s, picture_number);
3402         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3403             ff_rv20_encode_picture_header(s, picture_number);
3404         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3405             ff_flv_encode_picture_header(s, picture_number);
3406         else if (CONFIG_H263_ENCODER)
3407             ff_h263_encode_picture_header(s, picture_number);
3408         break;
3409     case FMT_MPEG1:
3410         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3411             ff_mpeg1_encode_picture_header(s, picture_number);
3412         break;
3413     case FMT_H264:
3414         break;
3415     default:
3416         av_assert0(0);
3417     }
3418     bits= put_bits_count(&s->pb);
3419     s->header_bits= bits - s->last_bits;
3420
3421     for(i=1; i<context_count; i++){
3422         update_duplicate_context_after_me(s->thread_context[i], s);
3423     }
3424     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3425     for(i=1; i<context_count; i++){
3426         merge_context_after_encode(s, s->thread_context[i]);
3427     }
3428     emms_c();
3429     return 0;
3430 }
3431
3432 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3433     const int intra= s->mb_intra;
3434     int i;
3435
3436     s->dct_count[intra]++;
3437
3438     for(i=0; i<64; i++){
3439         int level= block[i];
3440
3441         if(level){
3442             if(level>0){
3443                 s->dct_error_sum[intra][i] += level;
3444                 level -= s->dct_offset[intra][i];
3445                 if(level<0) level=0;
3446             }else{
3447                 s->dct_error_sum[intra][i] -= level;
3448                 level += s->dct_offset[intra][i];
3449                 if(level>0) level=0;
3450             }
3451             block[i]= level;
3452         }
3453     }
3454 }
3455
3456 static int dct_quantize_trellis_c(MpegEncContext *s,
3457                                   DCTELEM *block, int n,
3458                                   int qscale, int *overflow){
3459     const int *qmat;
3460     const uint8_t *scantable= s->intra_scantable.scantable;
3461     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3462     int max=0;
3463     unsigned int threshold1, threshold2;
3464     int bias=0;
3465     int run_tab[65];
3466     int level_tab[65];
3467     int score_tab[65];
3468     int survivor[65];
3469     int survivor_count;
3470     int last_run=0;
3471     int last_level=0;
3472     int last_score= 0;
3473     int last_i;
3474     int coeff[2][64];
3475     int coeff_count[64];
3476     int qmul, qadd, start_i, last_non_zero, i, dc;
3477     const int esc_length= s->ac_esc_length;
3478     uint8_t * length;
3479     uint8_t * last_length;
3480     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3481
3482     s->dsp.fdct (block);
3483
3484     if(s->dct_error_sum)
3485         s->denoise_dct(s, block);
3486     qmul= qscale*16;
3487     qadd= ((qscale-1)|1)*8;
3488
3489     if (s->mb_intra) {
3490         int q;
3491         if (!s->h263_aic) {
3492             if (n < 4)
3493                 q = s->y_dc_scale;
3494             else
3495                 q = s->c_dc_scale;
3496             q = q << 3;
3497         } else{
3498             /* For AIC we skip quant/dequant of INTRADC */
3499             q = 1 << 3;
3500             qadd=0;
3501         }
3502
3503         /* note: block[0] is assumed to be positive */
3504         block[0] = (block[0] + (q >> 1)) / q;
3505         start_i = 1;
3506         last_non_zero = 0;
3507         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3508         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3509             bias= 1<<(QMAT_SHIFT-1);
3510         length     = s->intra_ac_vlc_length;
3511         last_length= s->intra_ac_vlc_last_length;
3512     } else {
3513         start_i = 0;
3514         last_non_zero = -1;
3515         qmat = s->q_inter_matrix[qscale];
3516         length     = s->inter_ac_vlc_length;
3517         last_length= s->inter_ac_vlc_last_length;
3518     }
3519     last_i= start_i;
3520
3521     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3522     threshold2= (threshold1<<1);
3523
3524     for(i=63; i>=start_i; i--) {
3525         const int j = scantable[i];
3526         int level = block[j] * qmat[j];
3527
3528         if(((unsigned)(level+threshold1))>threshold2){
3529             last_non_zero = i;
3530             break;
3531         }
3532     }
3533
3534     for(i=start_i; i<=last_non_zero; i++) {
3535         const int j = scantable[i];
3536         int level = block[j] * qmat[j];
3537
3538 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3539 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3540         if(((unsigned)(level+threshold1))>threshold2){
3541             if(level>0){
3542                 level= (bias + level)>>QMAT_SHIFT;
3543                 coeff[0][i]= level;
3544                 coeff[1][i]= level-1;
3545 //                coeff[2][k]= level-2;
3546             }else{
3547                 level= (bias - level)>>QMAT_SHIFT;
3548                 coeff[0][i]= -level;
3549                 coeff[1][i]= -level+1;
3550 //                coeff[2][k]= -level+2;
3551             }
3552             coeff_count[i]= FFMIN(level, 2);
3553             av_assert2(coeff_count[i]);
3554             max |=level;
3555         }else{
3556             coeff[0][i]= (level>>31)|1;
3557             coeff_count[i]= 1;
3558         }
3559     }
3560
3561     *overflow= s->max_qcoeff < max; //overflow might have happened
3562
3563     if(last_non_zero < start_i){
3564         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3565         return last_non_zero;
3566     }
3567
3568     score_tab[start_i]= 0;
3569     survivor[0]= start_i;
3570     survivor_count= 1;
3571
3572     for(i=start_i; i<=last_non_zero; i++){
3573         int level_index, j, zero_distortion;
3574         int dct_coeff= FFABS(block[ scantable[i] ]);
3575         int best_score=256*256*256*120;
3576
3577         if (s->dsp.fdct == ff_fdct_ifast)
3578             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3579         zero_distortion= dct_coeff*dct_coeff;
3580
3581         for(level_index=0; level_index < coeff_count[i]; level_index++){
3582             int distortion;
3583             int level= coeff[level_index][i];
3584             const int alevel= FFABS(level);
3585             int unquant_coeff;
3586
3587             av_assert2(level);
3588
3589             if(s->out_format == FMT_H263){
3590                 unquant_coeff= alevel*qmul + qadd;
3591             }else{ //MPEG1
3592                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3593                 if(s->mb_intra){
3594                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3595                         unquant_coeff =   (unquant_coeff - 1) | 1;
3596                 }else{
3597                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3598                         unquant_coeff =   (unquant_coeff - 1) | 1;
3599                 }
3600                 unquant_coeff<<= 3;
3601             }
3602
3603             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3604             level+=64;
3605             if((level&(~127)) == 0){
3606                 for(j=survivor_count-1; j>=0; j--){
3607                     int run= i - survivor[j];
3608                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3609                     score += score_tab[i-run];
3610
3611                     if(score < best_score){
3612                         best_score= score;
3613                         run_tab[i+1]= run;
3614                         level_tab[i+1]= level-64;
3615                     }
3616                 }
3617
3618                 if(s->out_format == FMT_H263){
3619                     for(j=survivor_count-1; j>=0; j--){
3620                         int run= i - survivor[j];
3621                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3622                         score += score_tab[i-run];
3623                         if(score < last_score){
3624                             last_score= score;
3625                             last_run= run;
3626                             last_level= level-64;
3627                             last_i= i+1;
3628                         }
3629                     }
3630                 }
3631             }else{
3632                 distortion += esc_length*lambda;
3633                 for(j=survivor_count-1; j>=0; j--){
3634                     int run= i - survivor[j];
3635                     int score= distortion + score_tab[i-run];
3636
3637                     if(score < best_score){
3638                         best_score= score;
3639                         run_tab[i+1]= run;
3640                         level_tab[i+1]= level-64;
3641                     }
3642                 }
3643
3644                 if(s->out_format == FMT_H263){
3645                   for(j=survivor_count-1; j>=0; j--){
3646                         int run= i - survivor[j];
3647                         int score= distortion + score_tab[i-run];
3648                         if(score < last_score){
3649                             last_score= score;
3650                             last_run= run;
3651                             last_level= level-64;
3652                             last_i= i+1;
3653                         }
3654                     }
3655                 }
3656             }
3657         }
3658
3659         score_tab[i+1]= best_score;
3660
3661         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3662         if(last_non_zero <= 27){
3663             for(; survivor_count; survivor_count--){
3664                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3665                     break;
3666             }
3667         }else{
3668             for(; survivor_count; survivor_count--){
3669                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3670                     break;
3671             }
3672         }
3673
3674         survivor[ survivor_count++ ]= i+1;
3675     }
3676
3677     if(s->out_format != FMT_H263){
3678         last_score= 256*256*256*120;
3679         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3680             int score= score_tab[i];
3681             if(i) score += lambda*2; //FIXME exacter?
3682
3683             if(score < last_score){
3684                 last_score= score;
3685                 last_i= i;
3686                 last_level= level_tab[i];
3687                 last_run= run_tab[i];
3688             }
3689         }
3690     }
3691
3692     s->coded_score[n] = last_score;
3693
3694     dc= FFABS(block[0]);
3695     last_non_zero= last_i - 1;
3696     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3697
3698     if(last_non_zero < start_i)
3699         return last_non_zero;
3700
3701     if(last_non_zero == 0 && start_i == 0){
3702         int best_level= 0;
3703         int best_score= dc * dc;
3704
3705         for(i=0; i<coeff_count[0]; i++){
3706             int level= coeff[i][0];
3707             int alevel= FFABS(level);
3708             int unquant_coeff, score, distortion;
3709
3710             if(s->out_format == FMT_H263){
3711                     unquant_coeff= (alevel*qmul + qadd)>>3;
3712             }else{ //MPEG1
3713                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3714                     unquant_coeff =   (unquant_coeff - 1) | 1;
3715             }
3716             unquant_coeff = (unquant_coeff + 4) >> 3;
3717             unquant_coeff<<= 3 + 3;
3718
3719             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3720             level+=64;
3721             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3722             else                    score= distortion + esc_length*lambda;
3723
3724             if(score < best_score){
3725                 best_score= score;
3726                 best_level= level - 64;
3727             }
3728         }
3729         block[0]= best_level;
3730         s->coded_score[n] = best_score - dc*dc;
3731         if(best_level == 0) return -1;
3732         else                return last_non_zero;
3733     }
3734
3735     i= last_i;
3736     av_assert2(last_level);
3737
3738     block[ perm_scantable[last_non_zero] ]= last_level;
3739     i -= last_run + 1;
3740
3741     for(; i>start_i; i -= run_tab[i] + 1){
3742         block[ perm_scantable[i-1] ]= level_tab[i];
3743     }
3744
3745     return last_non_zero;
3746 }
3747
3748 //#define REFINE_STATS 1
3749 static int16_t basis[64][64];
3750
3751 static void build_basis(uint8_t *perm){
3752     int i, j, x, y;
3753     emms_c();
3754     for(i=0; i<8; i++){
3755         for(j=0; j<8; j++){
3756             for(y=0; y<8; y++){
3757                 for(x=0; x<8; x++){
3758                     double s= 0.25*(1<<BASIS_SHIFT);
3759                     int index= 8*i + j;
3760                     int perm_index= perm[index];
3761                     if(i==0) s*= sqrt(0.5);
3762                     if(j==0) s*= sqrt(0.5);
3763                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3764                 }
3765             }
3766         }
3767     }
3768 }
3769
3770 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3771                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3772                         int n, int qscale){
3773     int16_t rem[64];
3774     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3775     const uint8_t *scantable= s->intra_scantable.scantable;
3776     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3777 //    unsigned int threshold1, threshold2;
3778 //    int bias=0;
3779     int run_tab[65];
3780     int prev_run=0;
3781     int prev_level=0;
3782     int qmul, qadd, start_i, last_non_zero, i, dc;
3783     uint8_t * length;
3784     uint8_t * last_length;
3785     int lambda;
3786     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3787 #ifdef REFINE_STATS
3788 static int count=0;
3789 static int after_last=0;
3790 static int to_zero=0;
3791 static int from_zero=0;
3792 static int raise=0;
3793 static int lower=0;
3794 static int messed_sign=0;
3795 #endif
3796
3797     if(basis[0][0] == 0)
3798         build_basis(s->dsp.idct_permutation);
3799
3800     qmul= qscale*2;
3801     qadd= (qscale-1)|1;
3802     if (s->mb_intra) {
3803         if (!s->h263_aic) {
3804             if (n < 4)
3805                 q = s->y_dc_scale;
3806             else
3807                 q = s->c_dc_scale;
3808         } else{
3809             /* For AIC we skip quant/dequant of INTRADC */
3810             q = 1;
3811             qadd=0;
3812         }
3813         q <<= RECON_SHIFT-3;
3814         /* note: block[0] is assumed to be positive */
3815         dc= block[0]*q;
3816 //        block[0] = (block[0] + (q >> 1)) / q;
3817         start_i = 1;
3818 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3819 //            bias= 1<<(QMAT_SHIFT-1);
3820         length     = s->intra_ac_vlc_length;
3821         last_length= s->intra_ac_vlc_last_length;
3822     } else {
3823         dc= 0;
3824         start_i = 0;
3825         length     = s->inter_ac_vlc_length;
3826         last_length= s->inter_ac_vlc_last_length;
3827     }
3828     last_non_zero = s->block_last_index[n];
3829
3830 #ifdef REFINE_STATS
3831 {START_TIMER
3832 #endif
3833     dc += (1<<(RECON_SHIFT-1));
3834     for(i=0; i<64; i++){
3835         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3836     }
3837 #ifdef REFINE_STATS
3838 STOP_TIMER("memset rem[]")}
3839 #endif
3840     sum=0;
3841     for(i=0; i<64; i++){
3842         int one= 36;
3843         int qns=4;
3844         int w;
3845
3846         w= FFABS(weight[i]) + qns*one;
3847         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3848
3849         weight[i] = w;
3850 //        w=weight[i] = (63*qns + (w/2)) / w;
3851
3852         av_assert2(w>0);
3853         av_assert2(w<(1<<6));
3854         sum += w*w;
3855     }
3856     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3857 #ifdef REFINE_STATS
3858 {START_TIMER
3859 #endif
3860     run=0;
3861     rle_index=0;
3862     for(i=start_i; i<=last_non_zero; i++){
3863         int j= perm_scantable[i];
3864         const int level= block[j];
3865         int coeff;
3866
3867         if(level){
3868             if(level<0) coeff= qmul*level - qadd;
3869             else        coeff= qmul*level + qadd;
3870             run_tab[rle_index++]=run;
3871             run=0;
3872
3873             s->dsp.add_8x8basis(rem, basis[j], coeff);
3874         }else{
3875             run++;
3876         }
3877     }
3878 #ifdef REFINE_STATS
3879 if(last_non_zero>0){
3880 STOP_TIMER("init rem[]")
3881 }
3882 }
3883
3884 {START_TIMER
3885 #endif
3886     for(;;){
3887         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3888         int best_coeff=0;
3889         int best_change=0;
3890         int run2, best_unquant_change=0, analyze_gradient;
3891 #ifdef REFINE_STATS
3892 {START_TIMER
3893 #endif
3894         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3895
3896         if(analyze_gradient){
3897 #ifdef REFINE_STATS
3898 {START_TIMER
3899 #endif
3900             for(i=0; i<64; i++){
3901                 int w= weight[i];
3902
3903                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3904             }
3905 #ifdef REFINE_STATS
3906 STOP_TIMER("rem*w*w")}
3907 {START_TIMER
3908 #endif
3909             s->dsp.fdct(d1);
3910 #ifdef REFINE_STATS
3911 STOP_TIMER("dct")}
3912 #endif
3913         }
3914
3915         if(start_i){
3916             const int level= block[0];
3917             int change, old_coeff;
3918
3919             av_assert2(s->mb_intra);
3920
3921             old_coeff= q*level;
3922
3923             for(change=-1; change<=1; change+=2){
3924                 int new_level= level + change;
3925                 int score, new_coeff;
3926
3927                 new_coeff= q*new_level;
3928                 if(new_coeff >= 2048 || new_coeff < 0)
3929                     continue;
3930
3931                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3932                 if(score<best_score){
3933                     best_score= score;
3934                     best_coeff= 0;
3935                     best_change= change;
3936                     best_unquant_change= new_coeff - old_coeff;
3937                 }
3938             }
3939         }
3940
3941         run=0;
3942         rle_index=0;
3943         run2= run_tab[rle_index++];
3944         prev_level=0;
3945         prev_run=0;
3946
3947         for(i=start_i; i<64; i++){
3948             int j= perm_scantable[i];
3949             const int level= block[j];
3950             int change, old_coeff;
3951
3952             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3953                 break;
3954
3955             if(level){
3956                 if(level<0) old_coeff= qmul*level - qadd;
3957                 else        old_coeff= qmul*level + qadd;
3958                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3959             }else{
3960                 old_coeff=0;
3961                 run2--;
3962                 av_assert2(run2>=0 || i >= last_non_zero );
3963             }
3964
3965             for(change=-1; change<=1; change+=2){
3966                 int new_level= level + change;
3967                 int score, new_coeff, unquant_change;
3968
3969                 score=0;
3970                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3971                    continue;
3972
3973                 if(new_level){
3974                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3975                     else            new_coeff= qmul*new_level + qadd;
3976                     if(new_coeff >= 2048 || new_coeff <= -2048)
3977                         continue;
3978                     //FIXME check for overflow
3979
3980                     if(level){
3981                         if(level < 63 && level > -63){
3982                             if(i < last_non_zero)
3983                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3984                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3985                             else
3986                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3987                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3988                         }
3989                     }else{
3990                         av_assert2(FFABS(new_level)==1);
3991
3992                         if(analyze_gradient){
3993                             int g= d1[ scantable[i] ];
3994                             if(g && (g^new_level) >= 0)
3995                                 continue;
3996                         }
3997
3998                         if(i < last_non_zero){
3999                             int next_i= i + run2 + 1;
4000                             int next_level= block[ perm_scantable[next_i] ] + 64;
4001
4002                             if(next_level&(~127))
4003                                 next_level= 0;
4004
4005                             if(next_i < last_non_zero)
4006                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4007                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4008                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4009                             else
4010                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4011                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4012                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4013                         }else{
4014                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4015                             if(prev_level){
4016                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4017                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4018                             }
4019                         }
4020                     }
4021                 }else{
4022                     new_coeff=0;
4023                     av_assert2(FFABS(level)==1);
4024
4025                     if(i < last_non_zero){
4026                         int next_i= i + run2 + 1;
4027                         int next_level= block[ perm_scantable[next_i] ] + 64;
4028
4029                         if(next_level&(~127))
4030                             next_level= 0;
4031
4032                         if(next_i < last_non_zero)
4033                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4034                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4035                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4036                         else
4037                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4038                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4039                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4040                     }else{
4041                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4042                         if(prev_level){
4043                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4044                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4045                         }
4046                     }
4047                 }
4048
4049                 score *= lambda;
4050
4051                 unquant_change= new_coeff - old_coeff;
4052                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4053
4054                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4055                 if(score<best_score){
4056                     best_score= score;
4057                     best_coeff= i;
4058                     best_change= change;
4059                     best_unquant_change= unquant_change;
4060                 }
4061             }
4062             if(level){
4063                 prev_level= level + 64;
4064                 if(prev_level&(~127))
4065                     prev_level= 0;
4066                 prev_run= run;
4067                 run=0;
4068             }else{
4069                 run++;
4070             }
4071         }
4072 #ifdef REFINE_STATS
4073 STOP_TIMER("iterative step")}
4074 #endif
4075
4076         if(best_change){
4077             int j= perm_scantable[ best_coeff ];
4078
4079             block[j] += best_change;
4080
4081             if(best_coeff > last_non_zero){
4082                 last_non_zero= best_coeff;
4083                 av_assert2(block[j]);
4084 #ifdef REFINE_STATS
4085 after_last++;
4086 #endif
4087             }else{
4088 #ifdef REFINE_STATS
4089 if(block[j]){
4090     if(block[j] - best_change){
4091         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4092             raise++;
4093         }else{
4094             lower++;
4095         }
4096     }else{
4097         from_zero++;
4098     }
4099 }else{
4100     to_zero++;
4101 }
4102 #endif
4103                 for(; last_non_zero>=start_i; last_non_zero--){
4104                     if(block[perm_scantable[last_non_zero]])
4105                         break;
4106                 }
4107             }
4108 #ifdef REFINE_STATS
4109 count++;
4110 if(256*256*256*64 % count == 0){
4111     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4112 }
4113 #endif
4114             run=0;
4115             rle_index=0;
4116             for(i=start_i; i<=last_non_zero; i++){
4117                 int j= perm_scantable[i];
4118                 const int level= block[j];
4119
4120                  if(level){
4121                      run_tab[rle_index++]=run;
4122                      run=0;
4123                  }else{
4124                      run++;
4125                  }
4126             }
4127
4128             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4129         }else{
4130             break;
4131         }
4132     }
4133 #ifdef REFINE_STATS
4134 if(last_non_zero>0){
4135 STOP_TIMER("iterative search")
4136 }
4137 }
4138 #endif
4139
4140     return last_non_zero;
4141 }
4142
4143 int ff_dct_quantize_c(MpegEncContext *s,
4144                         DCTELEM *block, int n,
4145                         int qscale, int *overflow)
4146 {
4147     int i, j, level, last_non_zero, q, start_i;
4148     const int *qmat;
4149     const uint8_t *scantable= s->intra_scantable.scantable;
4150     int bias;
4151     int max=0;
4152     unsigned int threshold1, threshold2;
4153
4154     s->dsp.fdct (block);
4155
4156     if(s->dct_error_sum)
4157         s->denoise_dct(s, block);
4158
4159     if (s->mb_intra) {
4160         if (!s->h263_aic) {
4161             if (n < 4)
4162                 q = s->y_dc_scale;
4163             else
4164                 q = s->c_dc_scale;
4165             q = q << 3;
4166         } else
4167             /* For AIC we skip quant/dequant of INTRADC */
4168             q = 1 << 3;
4169
4170         /* note: block[0] is assumed to be positive */
4171         block[0] = (block[0] + (q >> 1)) / q;
4172         start_i = 1;
4173         last_non_zero = 0;
4174         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4175         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4176     } else {
4177         start_i = 0;
4178         last_non_zero = -1;
4179         qmat = s->q_inter_matrix[qscale];
4180         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4181     }
4182     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4183     threshold2= (threshold1<<1);
4184     for(i=63;i>=start_i;i--) {
4185         j = scantable[i];
4186         level = block[j] * qmat[j];
4187
4188         if(((unsigned)(level+threshold1))>threshold2){
4189             last_non_zero = i;
4190             break;
4191         }else{
4192             block[j]=0;
4193         }
4194     }
4195     for(i=start_i; i<=last_non_zero; i++) {
4196         j = scantable[i];
4197         level = block[j] * qmat[j];
4198
4199 //        if(   bias+level >= (1<<QMAT_SHIFT)
4200 //           || bias-level >= (1<<QMAT_SHIFT)){
4201         if(((unsigned)(level+threshold1))>threshold2){
4202             if(level>0){
4203                 level= (bias + level)>>QMAT_SHIFT;
4204                 block[j]= level;
4205             }else{
4206                 level= (bias - level)>>QMAT_SHIFT;
4207                 block[j]= -level;
4208             }
4209             max |=level;
4210         }else{
4211             block[j]=0;
4212         }
4213     }
4214     *overflow= s->max_qcoeff < max; //overflow might have happened
4215
4216     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4217     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4218         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4219
4220     return last_non_zero;
4221 }
4222
4223 #define OFFSET(x) offsetof(MpegEncContext, x)
4224 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4225 static const AVOption h263_options[] = {
4226     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4227     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4228     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4229     FF_MPV_COMMON_OPTS
4230     { NULL },
4231 };
4232
4233 static const AVClass h263_class = {
4234     .class_name = "H.263 encoder",
4235     .item_name  = av_default_item_name,
4236     .option     = h263_options,
4237     .version    = LIBAVUTIL_VERSION_INT,
4238 };
4239
4240 AVCodec ff_h263_encoder = {
4241     .name           = "h263",
4242     .type           = AVMEDIA_TYPE_VIDEO,
4243     .id             = AV_CODEC_ID_H263,
4244     .priv_data_size = sizeof(MpegEncContext),
4245     .init           = ff_MPV_encode_init,
4246     .encode2        = ff_MPV_encode_picture,
4247     .close          = ff_MPV_encode_end,
4248     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4249     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4250     .priv_class     = &h263_class,
4251 };
4252
4253 static const AVOption h263p_options[] = {
4254     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4255     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4256     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4257     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4258     FF_MPV_COMMON_OPTS
4259     { NULL },
4260 };
4261 static const AVClass h263p_class = {
4262     .class_name = "H.263p encoder",
4263     .item_name  = av_default_item_name,
4264     .option     = h263p_options,
4265     .version    = LIBAVUTIL_VERSION_INT,
4266 };
4267
4268 AVCodec ff_h263p_encoder = {
4269     .name           = "h263p",
4270     .type           = AVMEDIA_TYPE_VIDEO,
4271     .id             = AV_CODEC_ID_H263P,
4272     .priv_data_size = sizeof(MpegEncContext),
4273     .init           = ff_MPV_encode_init,
4274     .encode2        = ff_MPV_encode_picture,
4275     .close          = ff_MPV_encode_end,
4276     .capabilities   = CODEC_CAP_SLICE_THREADS,
4277     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4278     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4279     .priv_class     = &h263p_class,
4280 };
4281
4282 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4283
4284 AVCodec ff_msmpeg4v2_encoder = {
4285     .name           = "msmpeg4v2",
4286     .type           = AVMEDIA_TYPE_VIDEO,
4287     .id             = AV_CODEC_ID_MSMPEG4V2,
4288     .priv_data_size = sizeof(MpegEncContext),
4289     .init           = ff_MPV_encode_init,
4290     .encode2        = ff_MPV_encode_picture,
4291     .close          = ff_MPV_encode_end,
4292     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4293     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4294     .priv_class     = &msmpeg4v2_class,
4295 };
4296
4297 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4298
4299 AVCodec ff_msmpeg4v3_encoder = {
4300     .name           = "msmpeg4",
4301     .type           = AVMEDIA_TYPE_VIDEO,
4302     .id             = AV_CODEC_ID_MSMPEG4V3,
4303     .priv_data_size = sizeof(MpegEncContext),
4304     .init           = ff_MPV_encode_init,
4305     .encode2        = ff_MPV_encode_picture,
4306     .close          = ff_MPV_encode_end,
4307     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4308     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4309     .priv_class     = &msmpeg4v3_class,
4310 };
4311
4312 FF_MPV_GENERIC_CLASS(wmv1)
4313
4314 AVCodec ff_wmv1_encoder = {
4315     .name           = "wmv1",
4316     .type           = AVMEDIA_TYPE_VIDEO,
4317     .id             = AV_CODEC_ID_WMV1,
4318     .priv_data_size = sizeof(MpegEncContext),
4319     .init           = ff_MPV_encode_init,
4320     .encode2        = ff_MPV_encode_picture,
4321     .close          = ff_MPV_encode_end,
4322     .pix_fmts       = (const enum PixelFormat[]){ PIX_FMT_YUV420P, PIX_FMT_NONE },
4323     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4324     .priv_class     = &wmv1_class,
4325 };