]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
lavc: make rc_buffer_aggressivity/rc_initial_cplx into private options of mpegvideo...
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92 #if CONFIG_FAANDCT
93             fdsp->fdct == ff_faandct            ||
94 #endif /* CONFIG_FAANDCT */
95             fdsp->fdct == ff_jpeg_fdct_islow_10) {
96             for (i = 0; i < 64; i++) {
97                 const int j = s->idsp.idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
105                                         (qscale * quant_matrix[j]));
106             }
107         } else if (fdsp->fdct == ff_fdct_ifast) {
108             for (i = 0; i < 64; i++) {
109                 const int j = s->idsp.idct_permutation[i];
110                 /* 16 <= qscale * quant_matrix[i] <= 7905
111                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
112                  *             19952 <=              x  <= 249205026
113                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
114                  *           3444240 >= (1 << 36) / (x) >= 275 */
115
116                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
117                                         (ff_aanscales[i] * qscale *
118                                          quant_matrix[j]));
119             }
120         } else {
121             for (i = 0; i < 64; i++) {
122                 const int j = s->idsp.idct_permutation[i];
123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
124                  * Assume x = qscale * quant_matrix[i]
125                  * So             16 <=              x  <= 7905
126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
127                  * so          32768 >= (1 << 19) / (x) >= 67 */
128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
129                                         (qscale * quant_matrix[j]));
130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
131                 //                    (qscale * quant_matrix[i]);
132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
133                                        (qscale * quant_matrix[j]);
134
135                 if (qmat16[qscale][0][i] == 0 ||
136                     qmat16[qscale][0][i] == 128 * 256)
137                     qmat16[qscale][0][i] = 128 * 256 - 1;
138                 qmat16[qscale][1][i] =
139                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
140                                 qmat16[qscale][0][i]);
141             }
142         }
143
144         for (i = intra; i < 64; i++) {
145             int64_t max = 8191;
146             if (fdsp->fdct == ff_fdct_ifast) {
147                 max = (8191LL * ff_aanscales[i]) >> 14;
148             }
149             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
150                 shift++;
151             }
152         }
153     }
154     if (shift) {
155         av_log(NULL, AV_LOG_INFO,
156                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
157                QMAT_SHIFT - shift);
158     }
159 }
160
161 static inline void update_qscale(MpegEncContext *s)
162 {
163     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
164                 (FF_LAMBDA_SHIFT + 7);
165     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
166
167     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
168                  FF_LAMBDA_SHIFT;
169 }
170
171 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
172 {
173     int i;
174
175     if (matrix) {
176         put_bits(pb, 1, 1);
177         for (i = 0; i < 64; i++) {
178             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
179         }
180     } else
181         put_bits(pb, 1, 0);
182 }
183
184 /**
185  * init s->current_picture.qscale_table from s->lambda_table
186  */
187 void ff_init_qscale_tab(MpegEncContext *s)
188 {
189     int8_t * const qscale_table = s->current_picture.qscale_table;
190     int i;
191
192     for (i = 0; i < s->mb_num; i++) {
193         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
194         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
195         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
196                                                   s->avctx->qmax);
197     }
198 }
199
200 static void update_duplicate_context_after_me(MpegEncContext *dst,
201                                               MpegEncContext *src)
202 {
203 #define COPY(a) dst->a= src->a
204     COPY(pict_type);
205     COPY(current_picture);
206     COPY(f_code);
207     COPY(b_code);
208     COPY(qscale);
209     COPY(lambda);
210     COPY(lambda2);
211     COPY(picture_in_gop_number);
212     COPY(gop_picture_number);
213     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
214     COPY(progressive_frame);    // FIXME don't set in encode_header
215     COPY(partitioned_frame);    // FIXME don't set in encode_header
216 #undef COPY
217 }
218
219 /**
220  * Set the given MpegEncContext to defaults for encoding.
221  * the changed fields will not depend upon the prior state of the MpegEncContext.
222  */
223 static void mpv_encode_defaults(MpegEncContext *s)
224 {
225     int i;
226     ff_mpv_common_defaults(s);
227
228     for (i = -16; i < 16; i++) {
229         default_fcode_tab[i + MAX_MV] = 1;
230     }
231     s->me.mv_penalty = default_mv_penalty;
232     s->fcode_tab     = default_fcode_tab;
233
234     s->input_picture_number  = 0;
235     s->picture_in_gop_number = 0;
236 }
237
238 /* init video encoder */
239 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
240 {
241     MpegEncContext *s = avctx->priv_data;
242     int i, ret, format_supported;
243
244     mpv_encode_defaults(s);
245
246     switch (avctx->codec_id) {
247     case AV_CODEC_ID_MPEG2VIDEO:
248         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
249             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
250             av_log(avctx, AV_LOG_ERROR,
251                    "only YUV420 and YUV422 are supported\n");
252             return -1;
253         }
254         break;
255     case AV_CODEC_ID_MJPEG:
256         format_supported = 0;
257         /* JPEG color space */
258         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
259             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
260             (avctx->color_range == AVCOL_RANGE_JPEG &&
261              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
262               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
263             format_supported = 1;
264         /* MPEG color space */
265         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
266                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
267                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
268             format_supported = 1;
269
270         if (!format_supported) {
271             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
272             return -1;
273         }
274         break;
275     default:
276         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
277             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
278             return -1;
279         }
280     }
281
282     switch (avctx->pix_fmt) {
283     case AV_PIX_FMT_YUVJ422P:
284     case AV_PIX_FMT_YUV422P:
285         s->chroma_format = CHROMA_422;
286         break;
287     case AV_PIX_FMT_YUVJ420P:
288     case AV_PIX_FMT_YUV420P:
289     default:
290         s->chroma_format = CHROMA_420;
291         break;
292     }
293
294     s->bit_rate = avctx->bit_rate;
295     s->width    = avctx->width;
296     s->height   = avctx->height;
297     if (avctx->gop_size > 600 &&
298         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
299         av_log(avctx, AV_LOG_ERROR,
300                "Warning keyframe interval too large! reducing it ...\n");
301         avctx->gop_size = 600;
302     }
303     s->gop_size     = avctx->gop_size;
304     s->avctx        = avctx;
305     s->flags        = avctx->flags;
306     s->flags2       = avctx->flags2;
307     if (avctx->max_b_frames > MAX_B_FRAMES) {
308         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
309                "is %d.\n", MAX_B_FRAMES);
310     }
311     s->max_b_frames = avctx->max_b_frames;
312     s->codec_id     = avctx->codec->id;
313     s->strict_std_compliance = avctx->strict_std_compliance;
314     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
315     s->mpeg_quant         = avctx->mpeg_quant;
316     s->rtp_mode           = !!avctx->rtp_payload_size;
317     s->intra_dc_precision = avctx->intra_dc_precision;
318     s->user_specified_pts = AV_NOPTS_VALUE;
319
320     if (s->gop_size <= 1) {
321         s->intra_only = 1;
322         s->gop_size   = 12;
323     } else {
324         s->intra_only = 0;
325     }
326
327     s->me_method = avctx->me_method;
328
329     /* Fixed QSCALE */
330     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
331
332     s->adaptive_quant = (s->avctx->lumi_masking ||
333                          s->avctx->dark_masking ||
334                          s->avctx->temporal_cplx_masking ||
335                          s->avctx->spatial_cplx_masking  ||
336                          s->avctx->p_masking      ||
337                          s->avctx->border_masking ||
338                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
339                         !s->fixed_qscale;
340
341     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
342
343     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
344         av_log(avctx, AV_LOG_ERROR,
345                "a vbv buffer size is needed, "
346                "for encoding with a maximum bitrate\n");
347         return -1;
348     }
349
350     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
351         av_log(avctx, AV_LOG_INFO,
352                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
353     }
354
355     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
356         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
357         return -1;
358     }
359
360     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
361         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
362         return -1;
363     }
364
365     if (avctx->rc_max_rate &&
366         avctx->rc_max_rate == avctx->bit_rate &&
367         avctx->rc_max_rate != avctx->rc_min_rate) {
368         av_log(avctx, AV_LOG_INFO,
369                "impossible bitrate constraints, this will fail\n");
370     }
371
372     if (avctx->rc_buffer_size &&
373         avctx->bit_rate * (int64_t)avctx->time_base.num >
374             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
375         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
376         return -1;
377     }
378
379     if (!s->fixed_qscale &&
380         avctx->bit_rate * av_q2d(avctx->time_base) >
381             avctx->bit_rate_tolerance) {
382         av_log(avctx, AV_LOG_ERROR,
383                "bitrate tolerance too small for bitrate\n");
384         return -1;
385     }
386
387     if (s->avctx->rc_max_rate &&
388         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
389         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
390          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
391         90000LL * (avctx->rc_buffer_size - 1) >
392             s->avctx->rc_max_rate * 0xFFFFLL) {
393         av_log(avctx, AV_LOG_INFO,
394                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
395                "specified vbv buffer is too large for the given bitrate!\n");
396     }
397
398     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
399         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
400         s->codec_id != AV_CODEC_ID_FLV1) {
401         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
402         return -1;
403     }
404
405     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
406         av_log(avctx, AV_LOG_ERROR,
407                "OBMC is only supported with simple mb decision\n");
408         return -1;
409     }
410
411     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
412         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
413         return -1;
414     }
415
416     if (s->max_b_frames                    &&
417         s->codec_id != AV_CODEC_ID_MPEG4      &&
418         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
419         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
420         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
421         return -1;
422     }
423
424     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
425          s->codec_id == AV_CODEC_ID_H263  ||
426          s->codec_id == AV_CODEC_ID_H263P) &&
427         (avctx->sample_aspect_ratio.num > 255 ||
428          avctx->sample_aspect_ratio.den > 255)) {
429         av_log(avctx, AV_LOG_ERROR,
430                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
431                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
432         return -1;
433     }
434
435     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
436         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
437         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
438         return -1;
439     }
440
441     // FIXME mpeg2 uses that too
442     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
443         av_log(avctx, AV_LOG_ERROR,
444                "mpeg2 style quantization not supported by codec\n");
445         return -1;
446     }
447
448     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
449         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
450         return -1;
451     }
452
453     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
454         s->avctx->mb_decision != FF_MB_DECISION_RD) {
455         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
456         return -1;
457     }
458
459     if (s->avctx->scenechange_threshold < 1000000000 &&
460         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
461         av_log(avctx, AV_LOG_ERROR,
462                "closed gop with scene change detection are not supported yet, "
463                "set threshold to 1000000000\n");
464         return -1;
465     }
466
467     if (s->flags & CODEC_FLAG_LOW_DELAY) {
468         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
469             av_log(avctx, AV_LOG_ERROR,
470                   "low delay forcing is only available for mpeg2\n");
471             return -1;
472         }
473         if (s->max_b_frames != 0) {
474             av_log(avctx, AV_LOG_ERROR,
475                    "b frames cannot be used with low delay\n");
476             return -1;
477         }
478     }
479
480     if (s->q_scale_type == 1) {
481         if (avctx->qmax > 12) {
482             av_log(avctx, AV_LOG_ERROR,
483                    "non linear quant only supports qmax <= 12 currently\n");
484             return -1;
485         }
486     }
487
488     if (s->avctx->thread_count > 1         &&
489         s->codec_id != AV_CODEC_ID_MPEG4      &&
490         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
491         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
492         (s->codec_id != AV_CODEC_ID_H263P)) {
493         av_log(avctx, AV_LOG_ERROR,
494                "multi threaded encoding not supported by codec\n");
495         return -1;
496     }
497
498     if (s->avctx->thread_count < 1) {
499         av_log(avctx, AV_LOG_ERROR,
500                "automatic thread number detection not supported by codec,"
501                "patch welcome\n");
502         return -1;
503     }
504
505     if (s->avctx->thread_count > 1)
506         s->rtp_mode = 1;
507
508     if (!avctx->time_base.den || !avctx->time_base.num) {
509         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
510         return -1;
511     }
512
513     i = (INT_MAX / 2 + 128) >> 8;
514     if (avctx->mb_threshold >= i) {
515         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
516                i - 1);
517         return -1;
518     }
519
520     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
521         av_log(avctx, AV_LOG_INFO,
522                "notice: b_frame_strategy only affects the first pass\n");
523         avctx->b_frame_strategy = 0;
524     }
525
526     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
527     if (i > 1) {
528         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
529         avctx->time_base.den /= i;
530         avctx->time_base.num /= i;
531         //return -1;
532     }
533
534     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
535         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
536         // (a + x * 3 / 8) / x
537         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
538         s->inter_quant_bias = 0;
539     } else {
540         s->intra_quant_bias = 0;
541         // (a - x / 4) / x
542         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
543     }
544
545     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
546         s->intra_quant_bias = avctx->intra_quant_bias;
547     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
548         s->inter_quant_bias = avctx->inter_quant_bias;
549
550     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
551         s->avctx->time_base.den > (1 << 16) - 1) {
552         av_log(avctx, AV_LOG_ERROR,
553                "timebase %d/%d not supported by MPEG 4 standard, "
554                "the maximum admitted value for the timebase denominator "
555                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
556                (1 << 16) - 1);
557         return -1;
558     }
559     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
560
561     switch (avctx->codec->id) {
562     case AV_CODEC_ID_MPEG1VIDEO:
563         s->out_format = FMT_MPEG1;
564         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
565         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
566         break;
567     case AV_CODEC_ID_MPEG2VIDEO:
568         s->out_format = FMT_MPEG1;
569         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
570         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
571         s->rtp_mode   = 1;
572         break;
573     case AV_CODEC_ID_MJPEG:
574         s->out_format = FMT_MJPEG;
575         s->intra_only = 1; /* force intra only for jpeg */
576         if (!CONFIG_MJPEG_ENCODER ||
577             ff_mjpeg_encode_init(s) < 0)
578             return -1;
579         avctx->delay = 0;
580         s->low_delay = 1;
581         break;
582     case AV_CODEC_ID_H261:
583         if (!CONFIG_H261_ENCODER)
584             return -1;
585         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
586             av_log(avctx, AV_LOG_ERROR,
587                    "The specified picture size of %dx%d is not valid for the "
588                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
589                     s->width, s->height);
590             return -1;
591         }
592         s->out_format = FMT_H261;
593         avctx->delay  = 0;
594         s->low_delay  = 1;
595         break;
596     case AV_CODEC_ID_H263:
597         if (!CONFIG_H263_ENCODER)
598         return -1;
599         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
600                              s->width, s->height) == 8) {
601             av_log(avctx, AV_LOG_INFO,
602                    "The specified picture size of %dx%d is not valid for "
603                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
604                    "352x288, 704x576, and 1408x1152."
605                    "Try H.263+.\n", s->width, s->height);
606             return -1;
607         }
608         s->out_format = FMT_H263;
609         avctx->delay  = 0;
610         s->low_delay  = 1;
611         break;
612     case AV_CODEC_ID_H263P:
613         s->out_format = FMT_H263;
614         s->h263_plus  = 1;
615         /* Fx */
616         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
617         s->modified_quant  = s->h263_aic;
618         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
619         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
620
621         /* /Fx */
622         /* These are just to be sure */
623         avctx->delay = 0;
624         s->low_delay = 1;
625         break;
626     case AV_CODEC_ID_FLV1:
627         s->out_format      = FMT_H263;
628         s->h263_flv        = 2; /* format = 1; 11-bit codes */
629         s->unrestricted_mv = 1;
630         s->rtp_mode  = 0; /* don't allow GOB */
631         avctx->delay = 0;
632         s->low_delay = 1;
633         break;
634     case AV_CODEC_ID_RV10:
635         s->out_format = FMT_H263;
636         avctx->delay  = 0;
637         s->low_delay  = 1;
638         break;
639     case AV_CODEC_ID_RV20:
640         s->out_format      = FMT_H263;
641         avctx->delay       = 0;
642         s->low_delay       = 1;
643         s->modified_quant  = 1;
644         s->h263_aic        = 1;
645         s->h263_plus       = 1;
646         s->loop_filter     = 1;
647         s->unrestricted_mv = 0;
648         break;
649     case AV_CODEC_ID_MPEG4:
650         s->out_format      = FMT_H263;
651         s->h263_pred       = 1;
652         s->unrestricted_mv = 1;
653         s->low_delay       = s->max_b_frames ? 0 : 1;
654         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
655         break;
656     case AV_CODEC_ID_MSMPEG4V2:
657         s->out_format      = FMT_H263;
658         s->h263_pred       = 1;
659         s->unrestricted_mv = 1;
660         s->msmpeg4_version = 2;
661         avctx->delay       = 0;
662         s->low_delay       = 1;
663         break;
664     case AV_CODEC_ID_MSMPEG4V3:
665         s->out_format        = FMT_H263;
666         s->h263_pred         = 1;
667         s->unrestricted_mv   = 1;
668         s->msmpeg4_version   = 3;
669         s->flipflop_rounding = 1;
670         avctx->delay         = 0;
671         s->low_delay         = 1;
672         break;
673     case AV_CODEC_ID_WMV1:
674         s->out_format        = FMT_H263;
675         s->h263_pred         = 1;
676         s->unrestricted_mv   = 1;
677         s->msmpeg4_version   = 4;
678         s->flipflop_rounding = 1;
679         avctx->delay         = 0;
680         s->low_delay         = 1;
681         break;
682     case AV_CODEC_ID_WMV2:
683         s->out_format        = FMT_H263;
684         s->h263_pred         = 1;
685         s->unrestricted_mv   = 1;
686         s->msmpeg4_version   = 5;
687         s->flipflop_rounding = 1;
688         avctx->delay         = 0;
689         s->low_delay         = 1;
690         break;
691     default:
692         return -1;
693     }
694
695     avctx->has_b_frames = !s->low_delay;
696
697     s->encoding = 1;
698
699     s->progressive_frame    =
700     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
701                                                 CODEC_FLAG_INTERLACED_ME) ||
702                                 s->alternate_scan);
703
704     /* init */
705     ff_mpv_idct_init(s);
706     if (ff_mpv_common_init(s) < 0)
707         return -1;
708
709     if (ARCH_X86)
710         ff_mpv_encode_init_x86(s);
711
712     ff_fdctdsp_init(&s->fdsp, avctx);
713     ff_me_cmp_init(&s->mecc, avctx);
714     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
715     ff_pixblockdsp_init(&s->pdsp, avctx);
716     ff_qpeldsp_init(&s->qdsp);
717
718     s->avctx->coded_frame = s->current_picture.f;
719
720     if (s->msmpeg4_version) {
721         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
722                           2 * 2 * (MAX_LEVEL + 1) *
723                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
724     }
725     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
726
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
730     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
731     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
732                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
733     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
734                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
735
736     if (s->avctx->noise_reduction) {
737         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
738                           2 * 64 * sizeof(uint16_t), fail);
739     }
740
741     if (CONFIG_H263_ENCODER)
742         ff_h263dsp_init(&s->h263dsp);
743     if (!s->dct_quantize)
744         s->dct_quantize = ff_dct_quantize_c;
745     if (!s->denoise_dct)
746         s->denoise_dct  = denoise_dct_c;
747     s->fast_dct_quantize = s->dct_quantize;
748     if (avctx->trellis)
749         s->dct_quantize  = dct_quantize_trellis_c;
750
751     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
752         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
753
754     s->quant_precision = 5;
755
756     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
757     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
758
759     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
760         ff_h261_encode_init(s);
761     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
762         ff_h263_encode_init(s);
763     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
764         ff_msmpeg4_encode_init(s);
765     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
766         && s->out_format == FMT_MPEG1)
767         ff_mpeg1_encode_init(s);
768
769     /* init q matrix */
770     for (i = 0; i < 64; i++) {
771         int j = s->idsp.idct_permutation[i];
772         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
773             s->mpeg_quant) {
774             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
775             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
776         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
777             s->intra_matrix[j] =
778             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
779         } else {
780             /* mpeg1/2 */
781             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
782             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
783         }
784         if (s->avctx->intra_matrix)
785             s->intra_matrix[j] = s->avctx->intra_matrix[i];
786         if (s->avctx->inter_matrix)
787             s->inter_matrix[j] = s->avctx->inter_matrix[i];
788     }
789
790     /* precompute matrix */
791     /* for mjpeg, we do include qscale in the matrix */
792     if (s->out_format != FMT_MJPEG) {
793         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
794                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
795                           31, 1);
796         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
797                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
798                           31, 0);
799     }
800
801     if (ff_rate_control_init(s) < 0)
802         return -1;
803
804 #if FF_API_ERROR_RATE
805     FF_DISABLE_DEPRECATION_WARNINGS
806     if (avctx->error_rate)
807         s->error_rate = avctx->error_rate;
808     FF_ENABLE_DEPRECATION_WARNINGS;
809 #endif
810
811 #if FF_API_NORMALIZE_AQP
812     FF_DISABLE_DEPRECATION_WARNINGS
813     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
814         s->mpv_flags |= FF_MPV_FLAG_NAQ;
815     FF_ENABLE_DEPRECATION_WARNINGS;
816 #endif
817
818 #if FF_API_MV0
819     FF_DISABLE_DEPRECATION_WARNINGS
820     if (avctx->flags & CODEC_FLAG_MV0)
821         s->mpv_flags |= FF_MPV_FLAG_MV0;
822     FF_ENABLE_DEPRECATION_WARNINGS
823 #endif
824
825 #if FF_API_MPV_OPT
826     FF_DISABLE_DEPRECATION_WARNINGS
827     if (avctx->rc_qsquish != 0.0)
828         s->rc_qsquish = avctx->rc_qsquish;
829     if (avctx->rc_qmod_amp != 0.0)
830         s->rc_qmod_amp = avctx->rc_qmod_amp;
831     if (avctx->rc_qmod_freq)
832         s->rc_qmod_freq = avctx->rc_qmod_freq;
833     if (avctx->rc_buffer_aggressivity != 1.0)
834         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
835     if (avctx->rc_initial_cplx != 0.0)
836         s->rc_initial_cplx = avctx->rc_initial_cplx;
837
838     if (avctx->rc_eq) {
839         av_freep(&s->rc_eq);
840         s->rc_eq = av_strdup(avctx->rc_eq);
841         if (!s->rc_eq)
842             return AVERROR(ENOMEM);
843     }
844     FF_ENABLE_DEPRECATION_WARNINGS
845 #endif
846
847     if (avctx->b_frame_strategy == 2) {
848         for (i = 0; i < s->max_b_frames + 2; i++) {
849             s->tmp_frames[i] = av_frame_alloc();
850             if (!s->tmp_frames[i])
851                 return AVERROR(ENOMEM);
852
853             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
854             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
855             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
856
857             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
858             if (ret < 0)
859                 return ret;
860         }
861     }
862
863     return 0;
864 fail:
865     ff_mpv_encode_end(avctx);
866     return AVERROR_UNKNOWN;
867 }
868
869 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
870 {
871     MpegEncContext *s = avctx->priv_data;
872     int i;
873
874     ff_rate_control_uninit(s);
875
876     ff_mpv_common_end(s);
877     if (CONFIG_MJPEG_ENCODER &&
878         s->out_format == FMT_MJPEG)
879         ff_mjpeg_encode_close(s);
880
881     av_freep(&avctx->extradata);
882
883     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
884         av_frame_free(&s->tmp_frames[i]);
885
886     ff_free_picture_tables(&s->new_picture);
887     ff_mpeg_unref_picture(s, &s->new_picture);
888
889     av_freep(&s->avctx->stats_out);
890     av_freep(&s->ac_stats);
891
892     av_freep(&s->q_intra_matrix);
893     av_freep(&s->q_inter_matrix);
894     av_freep(&s->q_intra_matrix16);
895     av_freep(&s->q_inter_matrix16);
896     av_freep(&s->input_picture);
897     av_freep(&s->reordered_input_picture);
898     av_freep(&s->dct_offset);
899
900     return 0;
901 }
902
903 static int get_sae(uint8_t *src, int ref, int stride)
904 {
905     int x,y;
906     int acc = 0;
907
908     for (y = 0; y < 16; y++) {
909         for (x = 0; x < 16; x++) {
910             acc += FFABS(src[x + y * stride] - ref);
911         }
912     }
913
914     return acc;
915 }
916
917 static int get_intra_count(MpegEncContext *s, uint8_t *src,
918                            uint8_t *ref, int stride)
919 {
920     int x, y, w, h;
921     int acc = 0;
922
923     w = s->width  & ~15;
924     h = s->height & ~15;
925
926     for (y = 0; y < h; y += 16) {
927         for (x = 0; x < w; x += 16) {
928             int offset = x + y * stride;
929             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
930                                       stride, 16);
931             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
932             int sae  = get_sae(src + offset, mean, stride);
933
934             acc += sae + 500 < sad;
935         }
936     }
937     return acc;
938 }
939
940
941 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
942 {
943     Picture *pic = NULL;
944     int64_t pts;
945     int i, display_picture_number = 0, ret;
946     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
947                                                  (s->low_delay ? 0 : 1);
948     int direct = 1;
949
950     if (pic_arg) {
951         pts = pic_arg->pts;
952         display_picture_number = s->input_picture_number++;
953
954         if (pts != AV_NOPTS_VALUE) {
955             if (s->user_specified_pts != AV_NOPTS_VALUE) {
956                 int64_t time = pts;
957                 int64_t last = s->user_specified_pts;
958
959                 if (time <= last) {
960                     av_log(s->avctx, AV_LOG_ERROR,
961                            "Error, Invalid timestamp=%"PRId64", "
962                            "last=%"PRId64"\n", pts, s->user_specified_pts);
963                     return -1;
964                 }
965
966                 if (!s->low_delay && display_picture_number == 1)
967                     s->dts_delta = time - last;
968             }
969             s->user_specified_pts = pts;
970         } else {
971             if (s->user_specified_pts != AV_NOPTS_VALUE) {
972                 s->user_specified_pts =
973                 pts = s->user_specified_pts + 1;
974                 av_log(s->avctx, AV_LOG_INFO,
975                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
976                        pts);
977             } else {
978                 pts = display_picture_number;
979             }
980         }
981     }
982
983     if (pic_arg) {
984         if (!pic_arg->buf[0]);
985             direct = 0;
986         if (pic_arg->linesize[0] != s->linesize)
987             direct = 0;
988         if (pic_arg->linesize[1] != s->uvlinesize)
989             direct = 0;
990         if (pic_arg->linesize[2] != s->uvlinesize)
991             direct = 0;
992
993         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
994                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
995
996         if (direct) {
997             i = ff_find_unused_picture(s, 1);
998             if (i < 0)
999                 return i;
1000
1001             pic = &s->picture[i];
1002             pic->reference = 3;
1003
1004             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1005                 return ret;
1006             if (ff_alloc_picture(s, pic, 1) < 0) {
1007                 return -1;
1008             }
1009         } else {
1010             i = ff_find_unused_picture(s, 0);
1011             if (i < 0)
1012                 return i;
1013
1014             pic = &s->picture[i];
1015             pic->reference = 3;
1016
1017             if (ff_alloc_picture(s, pic, 0) < 0) {
1018                 return -1;
1019             }
1020
1021             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1022                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1023                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1024                 // empty
1025             } else {
1026                 int h_chroma_shift, v_chroma_shift;
1027                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1028                                                  &h_chroma_shift,
1029                                                  &v_chroma_shift);
1030
1031                 for (i = 0; i < 3; i++) {
1032                     int src_stride = pic_arg->linesize[i];
1033                     int dst_stride = i ? s->uvlinesize : s->linesize;
1034                     int h_shift = i ? h_chroma_shift : 0;
1035                     int v_shift = i ? v_chroma_shift : 0;
1036                     int w = s->width  >> h_shift;
1037                     int h = s->height >> v_shift;
1038                     uint8_t *src = pic_arg->data[i];
1039                     uint8_t *dst = pic->f->data[i];
1040
1041                     if (!s->avctx->rc_buffer_size)
1042                         dst += INPLACE_OFFSET;
1043
1044                     if (src_stride == dst_stride)
1045                         memcpy(dst, src, src_stride * h);
1046                     else {
1047                         while (h--) {
1048                             memcpy(dst, src, w);
1049                             dst += dst_stride;
1050                             src += src_stride;
1051                         }
1052                     }
1053                 }
1054             }
1055         }
1056         ret = av_frame_copy_props(pic->f, pic_arg);
1057         if (ret < 0)
1058             return ret;
1059
1060         pic->f->display_picture_number = display_picture_number;
1061         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1062     }
1063
1064     /* shift buffer entries */
1065     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1066         s->input_picture[i - 1] = s->input_picture[i];
1067
1068     s->input_picture[encoding_delay] = (Picture*) pic;
1069
1070     return 0;
1071 }
1072
1073 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1074 {
1075     int x, y, plane;
1076     int score = 0;
1077     int64_t score64 = 0;
1078
1079     for (plane = 0; plane < 3; plane++) {
1080         const int stride = p->f->linesize[plane];
1081         const int bw = plane ? 1 : 2;
1082         for (y = 0; y < s->mb_height * bw; y++) {
1083             for (x = 0; x < s->mb_width * bw; x++) {
1084                 int off = p->shared ? 0 : 16;
1085                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1086                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1087                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1088
1089                 switch (s->avctx->frame_skip_exp) {
1090                 case 0: score    =  FFMAX(score, v);          break;
1091                 case 1: score   += FFABS(v);                  break;
1092                 case 2: score   += v * v;                     break;
1093                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1094                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1095                 }
1096             }
1097         }
1098     }
1099
1100     if (score)
1101         score64 = score;
1102
1103     if (score64 < s->avctx->frame_skip_threshold)
1104         return 1;
1105     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1106         return 1;
1107     return 0;
1108 }
1109
1110 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1111 {
1112     AVPacket pkt = { 0 };
1113     int ret, got_output;
1114
1115     av_init_packet(&pkt);
1116     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1117     if (ret < 0)
1118         return ret;
1119
1120     ret = pkt.size;
1121     av_free_packet(&pkt);
1122     return ret;
1123 }
1124
1125 static int estimate_best_b_count(MpegEncContext *s)
1126 {
1127     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1128     AVCodecContext *c = avcodec_alloc_context3(NULL);
1129     const int scale = s->avctx->brd_scale;
1130     int i, j, out_size, p_lambda, b_lambda, lambda2;
1131     int64_t best_rd  = INT64_MAX;
1132     int best_b_count = -1;
1133
1134     assert(scale >= 0 && scale <= 3);
1135
1136     //emms_c();
1137     //s->next_picture_ptr->quality;
1138     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1139     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1140     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1141     if (!b_lambda) // FIXME we should do this somewhere else
1142         b_lambda = p_lambda;
1143     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1144                FF_LAMBDA_SHIFT;
1145
1146     c->width        = s->width  >> scale;
1147     c->height       = s->height >> scale;
1148     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1149     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1150     c->mb_decision  = s->avctx->mb_decision;
1151     c->me_cmp       = s->avctx->me_cmp;
1152     c->mb_cmp       = s->avctx->mb_cmp;
1153     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1154     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1155     c->time_base    = s->avctx->time_base;
1156     c->max_b_frames = s->max_b_frames;
1157
1158     if (avcodec_open2(c, codec, NULL) < 0)
1159         return -1;
1160
1161     for (i = 0; i < s->max_b_frames + 2; i++) {
1162         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1163                                                 s->next_picture_ptr;
1164
1165         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1166             pre_input = *pre_input_ptr;
1167
1168             if (!pre_input.shared && i) {
1169                 pre_input.f->data[0] += INPLACE_OFFSET;
1170                 pre_input.f->data[1] += INPLACE_OFFSET;
1171                 pre_input.f->data[2] += INPLACE_OFFSET;
1172             }
1173
1174             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1175                                        s->tmp_frames[i]->linesize[0],
1176                                        pre_input.f->data[0],
1177                                        pre_input.f->linesize[0],
1178                                        c->width, c->height);
1179             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1180                                        s->tmp_frames[i]->linesize[1],
1181                                        pre_input.f->data[1],
1182                                        pre_input.f->linesize[1],
1183                                        c->width >> 1, c->height >> 1);
1184             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1185                                        s->tmp_frames[i]->linesize[2],
1186                                        pre_input.f->data[2],
1187                                        pre_input.f->linesize[2],
1188                                        c->width >> 1, c->height >> 1);
1189         }
1190     }
1191
1192     for (j = 0; j < s->max_b_frames + 1; j++) {
1193         int64_t rd = 0;
1194
1195         if (!s->input_picture[j])
1196             break;
1197
1198         c->error[0] = c->error[1] = c->error[2] = 0;
1199
1200         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1201         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1202
1203         out_size = encode_frame(c, s->tmp_frames[0]);
1204
1205         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1206
1207         for (i = 0; i < s->max_b_frames + 1; i++) {
1208             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1209
1210             s->tmp_frames[i + 1]->pict_type = is_p ?
1211                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1212             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1213
1214             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1215
1216             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1217         }
1218
1219         /* get the delayed frames */
1220         while (out_size) {
1221             out_size = encode_frame(c, NULL);
1222             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1223         }
1224
1225         rd += c->error[0] + c->error[1] + c->error[2];
1226
1227         if (rd < best_rd) {
1228             best_rd = rd;
1229             best_b_count = j;
1230         }
1231     }
1232
1233     avcodec_close(c);
1234     av_freep(&c);
1235
1236     return best_b_count;
1237 }
1238
1239 static int select_input_picture(MpegEncContext *s)
1240 {
1241     int i, ret;
1242
1243     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1244         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1245     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1246
1247     /* set next picture type & ordering */
1248     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1249         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1250             !s->next_picture_ptr || s->intra_only) {
1251             s->reordered_input_picture[0] = s->input_picture[0];
1252             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1253             s->reordered_input_picture[0]->f->coded_picture_number =
1254                 s->coded_picture_number++;
1255         } else {
1256             int b_frames;
1257
1258             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1259                 if (s->picture_in_gop_number < s->gop_size &&
1260                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1261                     // FIXME check that te gop check above is +-1 correct
1262                     av_frame_unref(s->input_picture[0]->f);
1263
1264                     emms_c();
1265                     ff_vbv_update(s, 0);
1266
1267                     goto no_output_pic;
1268                 }
1269             }
1270
1271             if (s->flags & CODEC_FLAG_PASS2) {
1272                 for (i = 0; i < s->max_b_frames + 1; i++) {
1273                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1274
1275                     if (pict_num >= s->rc_context.num_entries)
1276                         break;
1277                     if (!s->input_picture[i]) {
1278                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1279                         break;
1280                     }
1281
1282                     s->input_picture[i]->f->pict_type =
1283                         s->rc_context.entry[pict_num].new_pict_type;
1284                 }
1285             }
1286
1287             if (s->avctx->b_frame_strategy == 0) {
1288                 b_frames = s->max_b_frames;
1289                 while (b_frames && !s->input_picture[b_frames])
1290                     b_frames--;
1291             } else if (s->avctx->b_frame_strategy == 1) {
1292                 for (i = 1; i < s->max_b_frames + 1; i++) {
1293                     if (s->input_picture[i] &&
1294                         s->input_picture[i]->b_frame_score == 0) {
1295                         s->input_picture[i]->b_frame_score =
1296                             get_intra_count(s,
1297                                             s->input_picture[i    ]->f->data[0],
1298                                             s->input_picture[i - 1]->f->data[0],
1299                                             s->linesize) + 1;
1300                     }
1301                 }
1302                 for (i = 0; i < s->max_b_frames + 1; i++) {
1303                     if (!s->input_picture[i] ||
1304                         s->input_picture[i]->b_frame_score - 1 >
1305                             s->mb_num / s->avctx->b_sensitivity)
1306                         break;
1307                 }
1308
1309                 b_frames = FFMAX(0, i - 1);
1310
1311                 /* reset scores */
1312                 for (i = 0; i < b_frames + 1; i++) {
1313                     s->input_picture[i]->b_frame_score = 0;
1314                 }
1315             } else if (s->avctx->b_frame_strategy == 2) {
1316                 b_frames = estimate_best_b_count(s);
1317             } else {
1318                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1319                 b_frames = 0;
1320             }
1321
1322             emms_c();
1323
1324             for (i = b_frames - 1; i >= 0; i--) {
1325                 int type = s->input_picture[i]->f->pict_type;
1326                 if (type && type != AV_PICTURE_TYPE_B)
1327                     b_frames = i;
1328             }
1329             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1330                 b_frames == s->max_b_frames) {
1331                 av_log(s->avctx, AV_LOG_ERROR,
1332                        "warning, too many b frames in a row\n");
1333             }
1334
1335             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1336                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1337                     s->gop_size > s->picture_in_gop_number) {
1338                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1339                 } else {
1340                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1341                         b_frames = 0;
1342                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1343                 }
1344             }
1345
1346             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1347                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1348                 b_frames--;
1349
1350             s->reordered_input_picture[0] = s->input_picture[b_frames];
1351             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1352                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1353             s->reordered_input_picture[0]->f->coded_picture_number =
1354                 s->coded_picture_number++;
1355             for (i = 0; i < b_frames; i++) {
1356                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1357                 s->reordered_input_picture[i + 1]->f->pict_type =
1358                     AV_PICTURE_TYPE_B;
1359                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1360                     s->coded_picture_number++;
1361             }
1362         }
1363     }
1364 no_output_pic:
1365     if (s->reordered_input_picture[0]) {
1366         s->reordered_input_picture[0]->reference =
1367            s->reordered_input_picture[0]->f->pict_type !=
1368                AV_PICTURE_TYPE_B ? 3 : 0;
1369
1370         ff_mpeg_unref_picture(s, &s->new_picture);
1371         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1372             return ret;
1373
1374         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1375             // input is a shared pix, so we can't modifiy it -> alloc a new
1376             // one & ensure that the shared one is reuseable
1377
1378             Picture *pic;
1379             int i = ff_find_unused_picture(s, 0);
1380             if (i < 0)
1381                 return i;
1382             pic = &s->picture[i];
1383
1384             pic->reference = s->reordered_input_picture[0]->reference;
1385             if (ff_alloc_picture(s, pic, 0) < 0) {
1386                 return -1;
1387             }
1388
1389             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1390             if (ret < 0)
1391                 return ret;
1392
1393             /* mark us unused / free shared pic */
1394             av_frame_unref(s->reordered_input_picture[0]->f);
1395             s->reordered_input_picture[0]->shared = 0;
1396
1397             s->current_picture_ptr = pic;
1398         } else {
1399             // input is not a shared pix -> reuse buffer for current_pix
1400             s->current_picture_ptr = s->reordered_input_picture[0];
1401             for (i = 0; i < 4; i++) {
1402                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1403             }
1404         }
1405         ff_mpeg_unref_picture(s, &s->current_picture);
1406         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1407                                        s->current_picture_ptr)) < 0)
1408             return ret;
1409
1410         s->picture_number = s->new_picture.f->display_picture_number;
1411     } else {
1412         ff_mpeg_unref_picture(s, &s->new_picture);
1413     }
1414     return 0;
1415 }
1416
1417 static void frame_end(MpegEncContext *s)
1418 {
1419     int i;
1420
1421     if (s->unrestricted_mv &&
1422         s->current_picture.reference &&
1423         !s->intra_only) {
1424         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1425         int hshift = desc->log2_chroma_w;
1426         int vshift = desc->log2_chroma_h;
1427         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1428                                 s->h_edge_pos, s->v_edge_pos,
1429                                 EDGE_WIDTH, EDGE_WIDTH,
1430                                 EDGE_TOP | EDGE_BOTTOM);
1431         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1432                                 s->h_edge_pos >> hshift,
1433                                 s->v_edge_pos >> vshift,
1434                                 EDGE_WIDTH >> hshift,
1435                                 EDGE_WIDTH >> vshift,
1436                                 EDGE_TOP | EDGE_BOTTOM);
1437         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1438                                 s->h_edge_pos >> hshift,
1439                                 s->v_edge_pos >> vshift,
1440                                 EDGE_WIDTH >> hshift,
1441                                 EDGE_WIDTH >> vshift,
1442                                 EDGE_TOP | EDGE_BOTTOM);
1443     }
1444
1445     emms_c();
1446
1447     s->last_pict_type                 = s->pict_type;
1448     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1449     if (s->pict_type!= AV_PICTURE_TYPE_B)
1450         s->last_non_b_pict_type = s->pict_type;
1451
1452     if (s->encoding) {
1453         /* release non-reference frames */
1454         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1455             if (!s->picture[i].reference)
1456                 ff_mpeg_unref_picture(s, &s->picture[i]);
1457         }
1458     }
1459
1460     s->avctx->coded_frame = s->current_picture_ptr->f;
1461
1462 }
1463
1464 static void update_noise_reduction(MpegEncContext *s)
1465 {
1466     int intra, i;
1467
1468     for (intra = 0; intra < 2; intra++) {
1469         if (s->dct_count[intra] > (1 << 16)) {
1470             for (i = 0; i < 64; i++) {
1471                 s->dct_error_sum[intra][i] >>= 1;
1472             }
1473             s->dct_count[intra] >>= 1;
1474         }
1475
1476         for (i = 0; i < 64; i++) {
1477             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1478                                        s->dct_count[intra] +
1479                                        s->dct_error_sum[intra][i] / 2) /
1480                                       (s->dct_error_sum[intra][i] + 1);
1481         }
1482     }
1483 }
1484
1485 static int frame_start(MpegEncContext *s)
1486 {
1487     int ret;
1488
1489     /* mark & release old frames */
1490     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1491         s->last_picture_ptr != s->next_picture_ptr &&
1492         s->last_picture_ptr->f->buf[0]) {
1493         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1494     }
1495
1496     s->current_picture_ptr->f->pict_type = s->pict_type;
1497     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1498
1499     ff_mpeg_unref_picture(s, &s->current_picture);
1500     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1501                                    s->current_picture_ptr)) < 0)
1502         return ret;
1503
1504     if (s->pict_type != AV_PICTURE_TYPE_B) {
1505         s->last_picture_ptr = s->next_picture_ptr;
1506         if (!s->droppable)
1507             s->next_picture_ptr = s->current_picture_ptr;
1508     }
1509
1510     if (s->last_picture_ptr) {
1511         ff_mpeg_unref_picture(s, &s->last_picture);
1512         if (s->last_picture_ptr->f->buf[0] &&
1513             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1514                                        s->last_picture_ptr)) < 0)
1515             return ret;
1516     }
1517     if (s->next_picture_ptr) {
1518         ff_mpeg_unref_picture(s, &s->next_picture);
1519         if (s->next_picture_ptr->f->buf[0] &&
1520             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1521                                        s->next_picture_ptr)) < 0)
1522             return ret;
1523     }
1524
1525     if (s->picture_structure!= PICT_FRAME) {
1526         int i;
1527         for (i = 0; i < 4; i++) {
1528             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1529                 s->current_picture.f->data[i] +=
1530                     s->current_picture.f->linesize[i];
1531             }
1532             s->current_picture.f->linesize[i] *= 2;
1533             s->last_picture.f->linesize[i]    *= 2;
1534             s->next_picture.f->linesize[i]    *= 2;
1535         }
1536     }
1537
1538     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1539         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1540         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1541     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1542         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1543         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1544     } else {
1545         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1546         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1547     }
1548
1549     if (s->dct_error_sum) {
1550         assert(s->avctx->noise_reduction && s->encoding);
1551         update_noise_reduction(s);
1552     }
1553
1554     return 0;
1555 }
1556
1557 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1558                           const AVFrame *pic_arg, int *got_packet)
1559 {
1560     MpegEncContext *s = avctx->priv_data;
1561     int i, stuffing_count, ret;
1562     int context_count = s->slice_context_count;
1563
1564     s->picture_in_gop_number++;
1565
1566     if (load_input_picture(s, pic_arg) < 0)
1567         return -1;
1568
1569     if (select_input_picture(s) < 0) {
1570         return -1;
1571     }
1572
1573     /* output? */
1574     if (s->new_picture.f->data[0]) {
1575         if (!pkt->data &&
1576             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1577             return ret;
1578         if (s->mb_info) {
1579             s->mb_info_ptr = av_packet_new_side_data(pkt,
1580                                  AV_PKT_DATA_H263_MB_INFO,
1581                                  s->mb_width*s->mb_height*12);
1582             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1583         }
1584
1585         for (i = 0; i < context_count; i++) {
1586             int start_y = s->thread_context[i]->start_mb_y;
1587             int   end_y = s->thread_context[i]->  end_mb_y;
1588             int h       = s->mb_height;
1589             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1590             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1591
1592             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1593         }
1594
1595         s->pict_type = s->new_picture.f->pict_type;
1596         //emms_c();
1597         ret = frame_start(s);
1598         if (ret < 0)
1599             return ret;
1600 vbv_retry:
1601         if (encode_picture(s, s->picture_number) < 0)
1602             return -1;
1603
1604         avctx->header_bits = s->header_bits;
1605         avctx->mv_bits     = s->mv_bits;
1606         avctx->misc_bits   = s->misc_bits;
1607         avctx->i_tex_bits  = s->i_tex_bits;
1608         avctx->p_tex_bits  = s->p_tex_bits;
1609         avctx->i_count     = s->i_count;
1610         // FIXME f/b_count in avctx
1611         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1612         avctx->skip_count  = s->skip_count;
1613
1614         frame_end(s);
1615
1616         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1617             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1618
1619         if (avctx->rc_buffer_size) {
1620             RateControlContext *rcc = &s->rc_context;
1621             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1622
1623             if (put_bits_count(&s->pb) > max_size &&
1624                 s->lambda < s->avctx->lmax) {
1625                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1626                                        (s->qscale + 1) / s->qscale);
1627                 if (s->adaptive_quant) {
1628                     int i;
1629                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1630                         s->lambda_table[i] =
1631                             FFMAX(s->lambda_table[i] + 1,
1632                                   s->lambda_table[i] * (s->qscale + 1) /
1633                                   s->qscale);
1634                 }
1635                 s->mb_skipped = 0;        // done in frame_start()
1636                 // done in encode_picture() so we must undo it
1637                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1638                     if (s->flipflop_rounding          ||
1639                         s->codec_id == AV_CODEC_ID_H263P ||
1640                         s->codec_id == AV_CODEC_ID_MPEG4)
1641                         s->no_rounding ^= 1;
1642                 }
1643                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1644                     s->time_base       = s->last_time_base;
1645                     s->last_non_b_time = s->time - s->pp_time;
1646                 }
1647                 for (i = 0; i < context_count; i++) {
1648                     PutBitContext *pb = &s->thread_context[i]->pb;
1649                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1650                 }
1651                 goto vbv_retry;
1652             }
1653
1654             assert(s->avctx->rc_max_rate);
1655         }
1656
1657         if (s->flags & CODEC_FLAG_PASS1)
1658             ff_write_pass1_stats(s);
1659
1660         for (i = 0; i < 4; i++) {
1661             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1662             avctx->error[i] += s->current_picture_ptr->f->error[i];
1663         }
1664
1665         if (s->flags & CODEC_FLAG_PASS1)
1666             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1667                    avctx->i_tex_bits + avctx->p_tex_bits ==
1668                        put_bits_count(&s->pb));
1669         flush_put_bits(&s->pb);
1670         s->frame_bits  = put_bits_count(&s->pb);
1671
1672         stuffing_count = ff_vbv_update(s, s->frame_bits);
1673         if (stuffing_count) {
1674             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1675                     stuffing_count + 50) {
1676                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1677                 return -1;
1678             }
1679
1680             switch (s->codec_id) {
1681             case AV_CODEC_ID_MPEG1VIDEO:
1682             case AV_CODEC_ID_MPEG2VIDEO:
1683                 while (stuffing_count--) {
1684                     put_bits(&s->pb, 8, 0);
1685                 }
1686             break;
1687             case AV_CODEC_ID_MPEG4:
1688                 put_bits(&s->pb, 16, 0);
1689                 put_bits(&s->pb, 16, 0x1C3);
1690                 stuffing_count -= 4;
1691                 while (stuffing_count--) {
1692                     put_bits(&s->pb, 8, 0xFF);
1693                 }
1694             break;
1695             default:
1696                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1697             }
1698             flush_put_bits(&s->pb);
1699             s->frame_bits  = put_bits_count(&s->pb);
1700         }
1701
1702         /* update mpeg1/2 vbv_delay for CBR */
1703         if (s->avctx->rc_max_rate                          &&
1704             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1705             s->out_format == FMT_MPEG1                     &&
1706             90000LL * (avctx->rc_buffer_size - 1) <=
1707                 s->avctx->rc_max_rate * 0xFFFFLL) {
1708             int vbv_delay, min_delay;
1709             double inbits  = s->avctx->rc_max_rate *
1710                              av_q2d(s->avctx->time_base);
1711             int    minbits = s->frame_bits - 8 *
1712                              (s->vbv_delay_ptr - s->pb.buf - 1);
1713             double bits    = s->rc_context.buffer_index + minbits - inbits;
1714
1715             if (bits < 0)
1716                 av_log(s->avctx, AV_LOG_ERROR,
1717                        "Internal error, negative bits\n");
1718
1719             assert(s->repeat_first_field == 0);
1720
1721             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1722             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1723                         s->avctx->rc_max_rate;
1724
1725             vbv_delay = FFMAX(vbv_delay, min_delay);
1726
1727             assert(vbv_delay < 0xFFFF);
1728
1729             s->vbv_delay_ptr[0] &= 0xF8;
1730             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1731             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1732             s->vbv_delay_ptr[2] &= 0x07;
1733             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1734             avctx->vbv_delay     = vbv_delay * 300;
1735         }
1736         s->total_bits     += s->frame_bits;
1737         avctx->frame_bits  = s->frame_bits;
1738
1739         pkt->pts = s->current_picture.f->pts;
1740         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1741             if (!s->current_picture.f->coded_picture_number)
1742                 pkt->dts = pkt->pts - s->dts_delta;
1743             else
1744                 pkt->dts = s->reordered_pts;
1745             s->reordered_pts = pkt->pts;
1746         } else
1747             pkt->dts = pkt->pts;
1748         if (s->current_picture.f->key_frame)
1749             pkt->flags |= AV_PKT_FLAG_KEY;
1750         if (s->mb_info)
1751             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1752     } else {
1753         s->frame_bits = 0;
1754     }
1755     assert((s->frame_bits & 7) == 0);
1756
1757     pkt->size = s->frame_bits / 8;
1758     *got_packet = !!pkt->size;
1759     return 0;
1760 }
1761
1762 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1763                                                 int n, int threshold)
1764 {
1765     static const char tab[64] = {
1766         3, 2, 2, 1, 1, 1, 1, 1,
1767         1, 1, 1, 1, 1, 1, 1, 1,
1768         1, 1, 1, 1, 1, 1, 1, 1,
1769         0, 0, 0, 0, 0, 0, 0, 0,
1770         0, 0, 0, 0, 0, 0, 0, 0,
1771         0, 0, 0, 0, 0, 0, 0, 0,
1772         0, 0, 0, 0, 0, 0, 0, 0,
1773         0, 0, 0, 0, 0, 0, 0, 0
1774     };
1775     int score = 0;
1776     int run = 0;
1777     int i;
1778     int16_t *block = s->block[n];
1779     const int last_index = s->block_last_index[n];
1780     int skip_dc;
1781
1782     if (threshold < 0) {
1783         skip_dc = 0;
1784         threshold = -threshold;
1785     } else
1786         skip_dc = 1;
1787
1788     /* Are all we could set to zero already zero? */
1789     if (last_index <= skip_dc - 1)
1790         return;
1791
1792     for (i = 0; i <= last_index; i++) {
1793         const int j = s->intra_scantable.permutated[i];
1794         const int level = FFABS(block[j]);
1795         if (level == 1) {
1796             if (skip_dc && i == 0)
1797                 continue;
1798             score += tab[run];
1799             run = 0;
1800         } else if (level > 1) {
1801             return;
1802         } else {
1803             run++;
1804         }
1805     }
1806     if (score >= threshold)
1807         return;
1808     for (i = skip_dc; i <= last_index; i++) {
1809         const int j = s->intra_scantable.permutated[i];
1810         block[j] = 0;
1811     }
1812     if (block[0])
1813         s->block_last_index[n] = 0;
1814     else
1815         s->block_last_index[n] = -1;
1816 }
1817
1818 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1819                                int last_index)
1820 {
1821     int i;
1822     const int maxlevel = s->max_qcoeff;
1823     const int minlevel = s->min_qcoeff;
1824     int overflow = 0;
1825
1826     if (s->mb_intra) {
1827         i = 1; // skip clipping of intra dc
1828     } else
1829         i = 0;
1830
1831     for (; i <= last_index; i++) {
1832         const int j = s->intra_scantable.permutated[i];
1833         int level = block[j];
1834
1835         if (level > maxlevel) {
1836             level = maxlevel;
1837             overflow++;
1838         } else if (level < minlevel) {
1839             level = minlevel;
1840             overflow++;
1841         }
1842
1843         block[j] = level;
1844     }
1845
1846     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1847         av_log(s->avctx, AV_LOG_INFO,
1848                "warning, clipping %d dct coefficients to %d..%d\n",
1849                overflow, minlevel, maxlevel);
1850 }
1851
1852 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1853 {
1854     int x, y;
1855     // FIXME optimize
1856     for (y = 0; y < 8; y++) {
1857         for (x = 0; x < 8; x++) {
1858             int x2, y2;
1859             int sum = 0;
1860             int sqr = 0;
1861             int count = 0;
1862
1863             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1864                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1865                     int v = ptr[x2 + y2 * stride];
1866                     sum += v;
1867                     sqr += v * v;
1868                     count++;
1869                 }
1870             }
1871             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1872         }
1873     }
1874 }
1875
1876 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1877                                                 int motion_x, int motion_y,
1878                                                 int mb_block_height,
1879                                                 int mb_block_count)
1880 {
1881     int16_t weight[8][64];
1882     int16_t orig[8][64];
1883     const int mb_x = s->mb_x;
1884     const int mb_y = s->mb_y;
1885     int i;
1886     int skip_dct[8];
1887     int dct_offset = s->linesize * 8; // default for progressive frames
1888     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1889     ptrdiff_t wrap_y, wrap_c;
1890
1891     for (i = 0; i < mb_block_count; i++)
1892         skip_dct[i] = s->skipdct;
1893
1894     if (s->adaptive_quant) {
1895         const int last_qp = s->qscale;
1896         const int mb_xy = mb_x + mb_y * s->mb_stride;
1897
1898         s->lambda = s->lambda_table[mb_xy];
1899         update_qscale(s);
1900
1901         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1902             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1903             s->dquant = s->qscale - last_qp;
1904
1905             if (s->out_format == FMT_H263) {
1906                 s->dquant = av_clip(s->dquant, -2, 2);
1907
1908                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1909                     if (!s->mb_intra) {
1910                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1911                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1912                                 s->dquant = 0;
1913                         }
1914                         if (s->mv_type == MV_TYPE_8X8)
1915                             s->dquant = 0;
1916                     }
1917                 }
1918             }
1919         }
1920         ff_set_qscale(s, last_qp + s->dquant);
1921     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1922         ff_set_qscale(s, s->qscale + s->dquant);
1923
1924     wrap_y = s->linesize;
1925     wrap_c = s->uvlinesize;
1926     ptr_y  = s->new_picture.f->data[0] +
1927              (mb_y * 16 * wrap_y)              + mb_x * 16;
1928     ptr_cb = s->new_picture.f->data[1] +
1929              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1930     ptr_cr = s->new_picture.f->data[2] +
1931              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1932
1933     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1934         uint8_t *ebuf = s->edge_emu_buffer + 32;
1935         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1936                                  wrap_y, wrap_y,
1937                                  16, 16, mb_x * 16, mb_y * 16,
1938                                  s->width, s->height);
1939         ptr_y = ebuf;
1940         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1941                                  wrap_c, wrap_c,
1942                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1943                                  s->width >> 1, s->height >> 1);
1944         ptr_cb = ebuf + 18 * wrap_y;
1945         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1946                                  wrap_c, wrap_c,
1947                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1948                                  s->width >> 1, s->height >> 1);
1949         ptr_cr = ebuf + 18 * wrap_y + 8;
1950     }
1951
1952     if (s->mb_intra) {
1953         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1954             int progressive_score, interlaced_score;
1955
1956             s->interlaced_dct = 0;
1957             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1958                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1959                                                      NULL, wrap_y, 8) - 400;
1960
1961             if (progressive_score > 0) {
1962                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1963                                                         NULL, wrap_y * 2, 8) +
1964                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1965                                                         NULL, wrap_y * 2, 8);
1966                 if (progressive_score > interlaced_score) {
1967                     s->interlaced_dct = 1;
1968
1969                     dct_offset = wrap_y;
1970                     wrap_y <<= 1;
1971                     if (s->chroma_format == CHROMA_422)
1972                         wrap_c <<= 1;
1973                 }
1974             }
1975         }
1976
1977         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1978         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1979         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1980         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1981
1982         if (s->flags & CODEC_FLAG_GRAY) {
1983             skip_dct[4] = 1;
1984             skip_dct[5] = 1;
1985         } else {
1986             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1987             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1988             if (!s->chroma_y_shift) { /* 422 */
1989                 s->pdsp.get_pixels(s->block[6],
1990                                    ptr_cb + (dct_offset >> 1), wrap_c);
1991                 s->pdsp.get_pixels(s->block[7],
1992                                    ptr_cr + (dct_offset >> 1), wrap_c);
1993             }
1994         }
1995     } else {
1996         op_pixels_func (*op_pix)[4];
1997         qpel_mc_func (*op_qpix)[16];
1998         uint8_t *dest_y, *dest_cb, *dest_cr;
1999
2000         dest_y  = s->dest[0];
2001         dest_cb = s->dest[1];
2002         dest_cr = s->dest[2];
2003
2004         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2005             op_pix  = s->hdsp.put_pixels_tab;
2006             op_qpix = s->qdsp.put_qpel_pixels_tab;
2007         } else {
2008             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2009             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2010         }
2011
2012         if (s->mv_dir & MV_DIR_FORWARD) {
2013             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2014                           s->last_picture.f->data,
2015                           op_pix, op_qpix);
2016             op_pix  = s->hdsp.avg_pixels_tab;
2017             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2018         }
2019         if (s->mv_dir & MV_DIR_BACKWARD) {
2020             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2021                           s->next_picture.f->data,
2022                           op_pix, op_qpix);
2023         }
2024
2025         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2026             int progressive_score, interlaced_score;
2027
2028             s->interlaced_dct = 0;
2029             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2030                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2031                                                      ptr_y + wrap_y * 8,
2032                                                      wrap_y, 8) - 400;
2033
2034             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2035                 progressive_score -= 400;
2036
2037             if (progressive_score > 0) {
2038                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2039                                                         wrap_y * 2, 8) +
2040                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2041                                                         ptr_y + wrap_y,
2042                                                         wrap_y * 2, 8);
2043
2044                 if (progressive_score > interlaced_score) {
2045                     s->interlaced_dct = 1;
2046
2047                     dct_offset = wrap_y;
2048                     wrap_y <<= 1;
2049                     if (s->chroma_format == CHROMA_422)
2050                         wrap_c <<= 1;
2051                 }
2052             }
2053         }
2054
2055         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2056         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2057         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2058                             dest_y + dct_offset, wrap_y);
2059         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2060                             dest_y + dct_offset + 8, wrap_y);
2061
2062         if (s->flags & CODEC_FLAG_GRAY) {
2063             skip_dct[4] = 1;
2064             skip_dct[5] = 1;
2065         } else {
2066             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2067             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2068             if (!s->chroma_y_shift) { /* 422 */
2069                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2070                                     dest_cb + (dct_offset >> 1), wrap_c);
2071                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2072                                     dest_cr + (dct_offset >> 1), wrap_c);
2073             }
2074         }
2075         /* pre quantization */
2076         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2077                 2 * s->qscale * s->qscale) {
2078             // FIXME optimize
2079             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2080                 skip_dct[0] = 1;
2081             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2082                 skip_dct[1] = 1;
2083             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2084                                wrap_y, 8) < 20 * s->qscale)
2085                 skip_dct[2] = 1;
2086             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2087                                wrap_y, 8) < 20 * s->qscale)
2088                 skip_dct[3] = 1;
2089             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2090                 skip_dct[4] = 1;
2091             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2092                 skip_dct[5] = 1;
2093             if (!s->chroma_y_shift) { /* 422 */
2094                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2095                                    dest_cb + (dct_offset >> 1),
2096                                    wrap_c, 8) < 20 * s->qscale)
2097                     skip_dct[6] = 1;
2098                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2099                                    dest_cr + (dct_offset >> 1),
2100                                    wrap_c, 8) < 20 * s->qscale)
2101                     skip_dct[7] = 1;
2102             }
2103         }
2104     }
2105
2106     if (s->quantizer_noise_shaping) {
2107         if (!skip_dct[0])
2108             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2109         if (!skip_dct[1])
2110             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2111         if (!skip_dct[2])
2112             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2113         if (!skip_dct[3])
2114             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2115         if (!skip_dct[4])
2116             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2117         if (!skip_dct[5])
2118             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2119         if (!s->chroma_y_shift) { /* 422 */
2120             if (!skip_dct[6])
2121                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2122                                   wrap_c);
2123             if (!skip_dct[7])
2124                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2125                                   wrap_c);
2126         }
2127         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2128     }
2129
2130     /* DCT & quantize */
2131     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2132     {
2133         for (i = 0; i < mb_block_count; i++) {
2134             if (!skip_dct[i]) {
2135                 int overflow;
2136                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2137                 // FIXME we could decide to change to quantizer instead of
2138                 // clipping
2139                 // JS: I don't think that would be a good idea it could lower
2140                 //     quality instead of improve it. Just INTRADC clipping
2141                 //     deserves changes in quantizer
2142                 if (overflow)
2143                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2144             } else
2145                 s->block_last_index[i] = -1;
2146         }
2147         if (s->quantizer_noise_shaping) {
2148             for (i = 0; i < mb_block_count; i++) {
2149                 if (!skip_dct[i]) {
2150                     s->block_last_index[i] =
2151                         dct_quantize_refine(s, s->block[i], weight[i],
2152                                             orig[i], i, s->qscale);
2153                 }
2154             }
2155         }
2156
2157         if (s->luma_elim_threshold && !s->mb_intra)
2158             for (i = 0; i < 4; i++)
2159                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2160         if (s->chroma_elim_threshold && !s->mb_intra)
2161             for (i = 4; i < mb_block_count; i++)
2162                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2163
2164         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2165             for (i = 0; i < mb_block_count; i++) {
2166                 if (s->block_last_index[i] == -1)
2167                     s->coded_score[i] = INT_MAX / 256;
2168             }
2169         }
2170     }
2171
2172     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2173         s->block_last_index[4] =
2174         s->block_last_index[5] = 0;
2175         s->block[4][0] =
2176         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2177     }
2178
2179     // non c quantize code returns incorrect block_last_index FIXME
2180     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2181         for (i = 0; i < mb_block_count; i++) {
2182             int j;
2183             if (s->block_last_index[i] > 0) {
2184                 for (j = 63; j > 0; j--) {
2185                     if (s->block[i][s->intra_scantable.permutated[j]])
2186                         break;
2187                 }
2188                 s->block_last_index[i] = j;
2189             }
2190         }
2191     }
2192
2193     /* huffman encode */
2194     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2195     case AV_CODEC_ID_MPEG1VIDEO:
2196     case AV_CODEC_ID_MPEG2VIDEO:
2197         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2198             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2199         break;
2200     case AV_CODEC_ID_MPEG4:
2201         if (CONFIG_MPEG4_ENCODER)
2202             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2203         break;
2204     case AV_CODEC_ID_MSMPEG4V2:
2205     case AV_CODEC_ID_MSMPEG4V3:
2206     case AV_CODEC_ID_WMV1:
2207         if (CONFIG_MSMPEG4_ENCODER)
2208             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2209         break;
2210     case AV_CODEC_ID_WMV2:
2211         if (CONFIG_WMV2_ENCODER)
2212             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2213         break;
2214     case AV_CODEC_ID_H261:
2215         if (CONFIG_H261_ENCODER)
2216             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2217         break;
2218     case AV_CODEC_ID_H263:
2219     case AV_CODEC_ID_H263P:
2220     case AV_CODEC_ID_FLV1:
2221     case AV_CODEC_ID_RV10:
2222     case AV_CODEC_ID_RV20:
2223         if (CONFIG_H263_ENCODER)
2224             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2225         break;
2226     case AV_CODEC_ID_MJPEG:
2227         if (CONFIG_MJPEG_ENCODER)
2228             ff_mjpeg_encode_mb(s, s->block);
2229         break;
2230     default:
2231         assert(0);
2232     }
2233 }
2234
2235 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2236 {
2237     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2238     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2239 }
2240
2241 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2242     int i;
2243
2244     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2245
2246     /* mpeg1 */
2247     d->mb_skip_run= s->mb_skip_run;
2248     for(i=0; i<3; i++)
2249         d->last_dc[i] = s->last_dc[i];
2250
2251     /* statistics */
2252     d->mv_bits= s->mv_bits;
2253     d->i_tex_bits= s->i_tex_bits;
2254     d->p_tex_bits= s->p_tex_bits;
2255     d->i_count= s->i_count;
2256     d->f_count= s->f_count;
2257     d->b_count= s->b_count;
2258     d->skip_count= s->skip_count;
2259     d->misc_bits= s->misc_bits;
2260     d->last_bits= 0;
2261
2262     d->mb_skipped= 0;
2263     d->qscale= s->qscale;
2264     d->dquant= s->dquant;
2265
2266     d->esc3_level_length= s->esc3_level_length;
2267 }
2268
2269 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2270     int i;
2271
2272     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2273     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2274
2275     /* mpeg1 */
2276     d->mb_skip_run= s->mb_skip_run;
2277     for(i=0; i<3; i++)
2278         d->last_dc[i] = s->last_dc[i];
2279
2280     /* statistics */
2281     d->mv_bits= s->mv_bits;
2282     d->i_tex_bits= s->i_tex_bits;
2283     d->p_tex_bits= s->p_tex_bits;
2284     d->i_count= s->i_count;
2285     d->f_count= s->f_count;
2286     d->b_count= s->b_count;
2287     d->skip_count= s->skip_count;
2288     d->misc_bits= s->misc_bits;
2289
2290     d->mb_intra= s->mb_intra;
2291     d->mb_skipped= s->mb_skipped;
2292     d->mv_type= s->mv_type;
2293     d->mv_dir= s->mv_dir;
2294     d->pb= s->pb;
2295     if(s->data_partitioning){
2296         d->pb2= s->pb2;
2297         d->tex_pb= s->tex_pb;
2298     }
2299     d->block= s->block;
2300     for(i=0; i<8; i++)
2301         d->block_last_index[i]= s->block_last_index[i];
2302     d->interlaced_dct= s->interlaced_dct;
2303     d->qscale= s->qscale;
2304
2305     d->esc3_level_length= s->esc3_level_length;
2306 }
2307
2308 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2309                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2310                            int *dmin, int *next_block, int motion_x, int motion_y)
2311 {
2312     int score;
2313     uint8_t *dest_backup[3];
2314
2315     copy_context_before_encode(s, backup, type);
2316
2317     s->block= s->blocks[*next_block];
2318     s->pb= pb[*next_block];
2319     if(s->data_partitioning){
2320         s->pb2   = pb2   [*next_block];
2321         s->tex_pb= tex_pb[*next_block];
2322     }
2323
2324     if(*next_block){
2325         memcpy(dest_backup, s->dest, sizeof(s->dest));
2326         s->dest[0] = s->rd_scratchpad;
2327         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2328         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2329         assert(s->linesize >= 32); //FIXME
2330     }
2331
2332     encode_mb(s, motion_x, motion_y);
2333
2334     score= put_bits_count(&s->pb);
2335     if(s->data_partitioning){
2336         score+= put_bits_count(&s->pb2);
2337         score+= put_bits_count(&s->tex_pb);
2338     }
2339
2340     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2341         ff_mpv_decode_mb(s, s->block);
2342
2343         score *= s->lambda2;
2344         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2345     }
2346
2347     if(*next_block){
2348         memcpy(s->dest, dest_backup, sizeof(s->dest));
2349     }
2350
2351     if(score<*dmin){
2352         *dmin= score;
2353         *next_block^=1;
2354
2355         copy_context_after_encode(best, s, type);
2356     }
2357 }
2358
2359 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2360     uint32_t *sq = ff_square_tab + 256;
2361     int acc=0;
2362     int x,y;
2363
2364     if(w==16 && h==16)
2365         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2366     else if(w==8 && h==8)
2367         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2368
2369     for(y=0; y<h; y++){
2370         for(x=0; x<w; x++){
2371             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2372         }
2373     }
2374
2375     assert(acc>=0);
2376
2377     return acc;
2378 }
2379
2380 static int sse_mb(MpegEncContext *s){
2381     int w= 16;
2382     int h= 16;
2383
2384     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2385     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2386
2387     if(w==16 && h==16)
2388       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2389         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2390                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2391                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2392       }else{
2393         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2394                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2395                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2396       }
2397     else
2398         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2399                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2400                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2401 }
2402
2403 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2404     MpegEncContext *s= *(void**)arg;
2405
2406
2407     s->me.pre_pass=1;
2408     s->me.dia_size= s->avctx->pre_dia_size;
2409     s->first_slice_line=1;
2410     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2411         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2412             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2413         }
2414         s->first_slice_line=0;
2415     }
2416
2417     s->me.pre_pass=0;
2418
2419     return 0;
2420 }
2421
2422 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2423     MpegEncContext *s= *(void**)arg;
2424
2425     s->me.dia_size= s->avctx->dia_size;
2426     s->first_slice_line=1;
2427     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2428         s->mb_x=0; //for block init below
2429         ff_init_block_index(s);
2430         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2431             s->block_index[0]+=2;
2432             s->block_index[1]+=2;
2433             s->block_index[2]+=2;
2434             s->block_index[3]+=2;
2435
2436             /* compute motion vector & mb_type and store in context */
2437             if(s->pict_type==AV_PICTURE_TYPE_B)
2438                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2439             else
2440                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2441         }
2442         s->first_slice_line=0;
2443     }
2444     return 0;
2445 }
2446
2447 static int mb_var_thread(AVCodecContext *c, void *arg){
2448     MpegEncContext *s= *(void**)arg;
2449     int mb_x, mb_y;
2450
2451     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2452         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2453             int xx = mb_x * 16;
2454             int yy = mb_y * 16;
2455             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2456             int varc;
2457             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2458
2459             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2460                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2461
2462             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2463             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2464             s->me.mb_var_sum_temp    += varc;
2465         }
2466     }
2467     return 0;
2468 }
2469
2470 static void write_slice_end(MpegEncContext *s){
2471     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2472         if(s->partitioned_frame){
2473             ff_mpeg4_merge_partitions(s);
2474         }
2475
2476         ff_mpeg4_stuffing(&s->pb);
2477     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2478         ff_mjpeg_encode_stuffing(&s->pb);
2479     }
2480
2481     avpriv_align_put_bits(&s->pb);
2482     flush_put_bits(&s->pb);
2483
2484     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2485         s->misc_bits+= get_bits_diff(s);
2486 }
2487
2488 static void write_mb_info(MpegEncContext *s)
2489 {
2490     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2491     int offset = put_bits_count(&s->pb);
2492     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2493     int gobn = s->mb_y / s->gob_index;
2494     int pred_x, pred_y;
2495     if (CONFIG_H263_ENCODER)
2496         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2497     bytestream_put_le32(&ptr, offset);
2498     bytestream_put_byte(&ptr, s->qscale);
2499     bytestream_put_byte(&ptr, gobn);
2500     bytestream_put_le16(&ptr, mba);
2501     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2502     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2503     /* 4MV not implemented */
2504     bytestream_put_byte(&ptr, 0); /* hmv2 */
2505     bytestream_put_byte(&ptr, 0); /* vmv2 */
2506 }
2507
2508 static void update_mb_info(MpegEncContext *s, int startcode)
2509 {
2510     if (!s->mb_info)
2511         return;
2512     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2513         s->mb_info_size += 12;
2514         s->prev_mb_info = s->last_mb_info;
2515     }
2516     if (startcode) {
2517         s->prev_mb_info = put_bits_count(&s->pb)/8;
2518         /* This might have incremented mb_info_size above, and we return without
2519          * actually writing any info into that slot yet. But in that case,
2520          * this will be called again at the start of the after writing the
2521          * start code, actually writing the mb info. */
2522         return;
2523     }
2524
2525     s->last_mb_info = put_bits_count(&s->pb)/8;
2526     if (!s->mb_info_size)
2527         s->mb_info_size += 12;
2528     write_mb_info(s);
2529 }
2530
2531 static int encode_thread(AVCodecContext *c, void *arg){
2532     MpegEncContext *s= *(void**)arg;
2533     int mb_x, mb_y, pdif = 0;
2534     int chr_h= 16>>s->chroma_y_shift;
2535     int i, j;
2536     MpegEncContext best_s, backup_s;
2537     uint8_t bit_buf[2][MAX_MB_BYTES];
2538     uint8_t bit_buf2[2][MAX_MB_BYTES];
2539     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2540     PutBitContext pb[2], pb2[2], tex_pb[2];
2541
2542     for(i=0; i<2; i++){
2543         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2544         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2545         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2546     }
2547
2548     s->last_bits= put_bits_count(&s->pb);
2549     s->mv_bits=0;
2550     s->misc_bits=0;
2551     s->i_tex_bits=0;
2552     s->p_tex_bits=0;
2553     s->i_count=0;
2554     s->f_count=0;
2555     s->b_count=0;
2556     s->skip_count=0;
2557
2558     for(i=0; i<3; i++){
2559         /* init last dc values */
2560         /* note: quant matrix value (8) is implied here */
2561         s->last_dc[i] = 128 << s->intra_dc_precision;
2562
2563         s->current_picture.f->error[i] = 0;
2564     }
2565     s->mb_skip_run = 0;
2566     memset(s->last_mv, 0, sizeof(s->last_mv));
2567
2568     s->last_mv_dir = 0;
2569
2570     switch(s->codec_id){
2571     case AV_CODEC_ID_H263:
2572     case AV_CODEC_ID_H263P:
2573     case AV_CODEC_ID_FLV1:
2574         if (CONFIG_H263_ENCODER)
2575             s->gob_index = ff_h263_get_gob_height(s);
2576         break;
2577     case AV_CODEC_ID_MPEG4:
2578         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2579             ff_mpeg4_init_partitions(s);
2580         break;
2581     }
2582
2583     s->resync_mb_x=0;
2584     s->resync_mb_y=0;
2585     s->first_slice_line = 1;
2586     s->ptr_lastgob = s->pb.buf;
2587     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2588         s->mb_x=0;
2589         s->mb_y= mb_y;
2590
2591         ff_set_qscale(s, s->qscale);
2592         ff_init_block_index(s);
2593
2594         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2595             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2596             int mb_type= s->mb_type[xy];
2597 //            int d;
2598             int dmin= INT_MAX;
2599             int dir;
2600
2601             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2602                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2603                 return -1;
2604             }
2605             if(s->data_partitioning){
2606                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2607                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2608                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2609                     return -1;
2610                 }
2611             }
2612
2613             s->mb_x = mb_x;
2614             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2615             ff_update_block_index(s);
2616
2617             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2618                 ff_h261_reorder_mb_index(s);
2619                 xy= s->mb_y*s->mb_stride + s->mb_x;
2620                 mb_type= s->mb_type[xy];
2621             }
2622
2623             /* write gob / video packet header  */
2624             if(s->rtp_mode){
2625                 int current_packet_size, is_gob_start;
2626
2627                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2628
2629                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2630
2631                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2632
2633                 switch(s->codec_id){
2634                 case AV_CODEC_ID_H263:
2635                 case AV_CODEC_ID_H263P:
2636                     if(!s->h263_slice_structured)
2637                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2638                     break;
2639                 case AV_CODEC_ID_MPEG2VIDEO:
2640                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2641                 case AV_CODEC_ID_MPEG1VIDEO:
2642                     if(s->mb_skip_run) is_gob_start=0;
2643                     break;
2644                 }
2645
2646                 if(is_gob_start){
2647                     if(s->start_mb_y != mb_y || mb_x!=0){
2648                         write_slice_end(s);
2649
2650                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2651                             ff_mpeg4_init_partitions(s);
2652                         }
2653                     }
2654
2655                     assert((put_bits_count(&s->pb)&7) == 0);
2656                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2657
2658                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2659                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2660                         int d = 100 / s->error_rate;
2661                         if(r % d == 0){
2662                             current_packet_size=0;
2663                             s->pb.buf_ptr= s->ptr_lastgob;
2664                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2665                         }
2666                     }
2667
2668                     if (s->avctx->rtp_callback){
2669                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2670                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2671                     }
2672                     update_mb_info(s, 1);
2673
2674                     switch(s->codec_id){
2675                     case AV_CODEC_ID_MPEG4:
2676                         if (CONFIG_MPEG4_ENCODER) {
2677                             ff_mpeg4_encode_video_packet_header(s);
2678                             ff_mpeg4_clean_buffers(s);
2679                         }
2680                     break;
2681                     case AV_CODEC_ID_MPEG1VIDEO:
2682                     case AV_CODEC_ID_MPEG2VIDEO:
2683                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2684                             ff_mpeg1_encode_slice_header(s);
2685                             ff_mpeg1_clean_buffers(s);
2686                         }
2687                     break;
2688                     case AV_CODEC_ID_H263:
2689                     case AV_CODEC_ID_H263P:
2690                         if (CONFIG_H263_ENCODER)
2691                             ff_h263_encode_gob_header(s, mb_y);
2692                     break;
2693                     }
2694
2695                     if(s->flags&CODEC_FLAG_PASS1){
2696                         int bits= put_bits_count(&s->pb);
2697                         s->misc_bits+= bits - s->last_bits;
2698                         s->last_bits= bits;
2699                     }
2700
2701                     s->ptr_lastgob += current_packet_size;
2702                     s->first_slice_line=1;
2703                     s->resync_mb_x=mb_x;
2704                     s->resync_mb_y=mb_y;
2705                 }
2706             }
2707
2708             if(  (s->resync_mb_x   == s->mb_x)
2709                && s->resync_mb_y+1 == s->mb_y){
2710                 s->first_slice_line=0;
2711             }
2712
2713             s->mb_skipped=0;
2714             s->dquant=0; //only for QP_RD
2715
2716             update_mb_info(s, 0);
2717
2718             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2719                 int next_block=0;
2720                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2721
2722                 copy_context_before_encode(&backup_s, s, -1);
2723                 backup_s.pb= s->pb;
2724                 best_s.data_partitioning= s->data_partitioning;
2725                 best_s.partitioned_frame= s->partitioned_frame;
2726                 if(s->data_partitioning){
2727                     backup_s.pb2= s->pb2;
2728                     backup_s.tex_pb= s->tex_pb;
2729                 }
2730
2731                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2732                     s->mv_dir = MV_DIR_FORWARD;
2733                     s->mv_type = MV_TYPE_16X16;
2734                     s->mb_intra= 0;
2735                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2736                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2737                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2738                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2739                 }
2740                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2741                     s->mv_dir = MV_DIR_FORWARD;
2742                     s->mv_type = MV_TYPE_FIELD;
2743                     s->mb_intra= 0;
2744                     for(i=0; i<2; i++){
2745                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2746                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2747                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2748                     }
2749                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2750                                  &dmin, &next_block, 0, 0);
2751                 }
2752                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2753                     s->mv_dir = MV_DIR_FORWARD;
2754                     s->mv_type = MV_TYPE_16X16;
2755                     s->mb_intra= 0;
2756                     s->mv[0][0][0] = 0;
2757                     s->mv[0][0][1] = 0;
2758                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2759                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2760                 }
2761                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2762                     s->mv_dir = MV_DIR_FORWARD;
2763                     s->mv_type = MV_TYPE_8X8;
2764                     s->mb_intra= 0;
2765                     for(i=0; i<4; i++){
2766                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2767                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2768                     }
2769                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2770                                  &dmin, &next_block, 0, 0);
2771                 }
2772                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2773                     s->mv_dir = MV_DIR_FORWARD;
2774                     s->mv_type = MV_TYPE_16X16;
2775                     s->mb_intra= 0;
2776                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2777                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2778                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2779                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2780                 }
2781                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2782                     s->mv_dir = MV_DIR_BACKWARD;
2783                     s->mv_type = MV_TYPE_16X16;
2784                     s->mb_intra= 0;
2785                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2786                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2787                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2788                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2789                 }
2790                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2791                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2792                     s->mv_type = MV_TYPE_16X16;
2793                     s->mb_intra= 0;
2794                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2795                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2796                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2797                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2798                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2799                                  &dmin, &next_block, 0, 0);
2800                 }
2801                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2802                     s->mv_dir = MV_DIR_FORWARD;
2803                     s->mv_type = MV_TYPE_FIELD;
2804                     s->mb_intra= 0;
2805                     for(i=0; i<2; i++){
2806                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2807                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2808                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2809                     }
2810                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2811                                  &dmin, &next_block, 0, 0);
2812                 }
2813                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2814                     s->mv_dir = MV_DIR_BACKWARD;
2815                     s->mv_type = MV_TYPE_FIELD;
2816                     s->mb_intra= 0;
2817                     for(i=0; i<2; i++){
2818                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2819                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2820                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2821                     }
2822                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2823                                  &dmin, &next_block, 0, 0);
2824                 }
2825                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2826                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2827                     s->mv_type = MV_TYPE_FIELD;
2828                     s->mb_intra= 0;
2829                     for(dir=0; dir<2; dir++){
2830                         for(i=0; i<2; i++){
2831                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2832                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2833                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2834                         }
2835                     }
2836                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2837                                  &dmin, &next_block, 0, 0);
2838                 }
2839                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2840                     s->mv_dir = 0;
2841                     s->mv_type = MV_TYPE_16X16;
2842                     s->mb_intra= 1;
2843                     s->mv[0][0][0] = 0;
2844                     s->mv[0][0][1] = 0;
2845                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2846                                  &dmin, &next_block, 0, 0);
2847                     if(s->h263_pred || s->h263_aic){
2848                         if(best_s.mb_intra)
2849                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2850                         else
2851                             ff_clean_intra_table_entries(s); //old mode?
2852                     }
2853                 }
2854
2855                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2856                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2857                         const int last_qp= backup_s.qscale;
2858                         int qpi, qp, dc[6];
2859                         int16_t ac[6][16];
2860                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2861                         static const int dquant_tab[4]={-1,1,-2,2};
2862
2863                         assert(backup_s.dquant == 0);
2864
2865                         //FIXME intra
2866                         s->mv_dir= best_s.mv_dir;
2867                         s->mv_type = MV_TYPE_16X16;
2868                         s->mb_intra= best_s.mb_intra;
2869                         s->mv[0][0][0] = best_s.mv[0][0][0];
2870                         s->mv[0][0][1] = best_s.mv[0][0][1];
2871                         s->mv[1][0][0] = best_s.mv[1][0][0];
2872                         s->mv[1][0][1] = best_s.mv[1][0][1];
2873
2874                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2875                         for(; qpi<4; qpi++){
2876                             int dquant= dquant_tab[qpi];
2877                             qp= last_qp + dquant;
2878                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2879                                 continue;
2880                             backup_s.dquant= dquant;
2881                             if(s->mb_intra && s->dc_val[0]){
2882                                 for(i=0; i<6; i++){
2883                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2884                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2885                                 }
2886                             }
2887
2888                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2889                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2890                             if(best_s.qscale != qp){
2891                                 if(s->mb_intra && s->dc_val[0]){
2892                                     for(i=0; i<6; i++){
2893                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2894                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2895                                     }
2896                                 }
2897                             }
2898                         }
2899                     }
2900                 }
2901                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2902                     int mx= s->b_direct_mv_table[xy][0];
2903                     int my= s->b_direct_mv_table[xy][1];
2904
2905                     backup_s.dquant = 0;
2906                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2907                     s->mb_intra= 0;
2908                     ff_mpeg4_set_direct_mv(s, mx, my);
2909                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2910                                  &dmin, &next_block, mx, my);
2911                 }
2912                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2913                     backup_s.dquant = 0;
2914                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2915                     s->mb_intra= 0;
2916                     ff_mpeg4_set_direct_mv(s, 0, 0);
2917                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2918                                  &dmin, &next_block, 0, 0);
2919                 }
2920                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2921                     int coded=0;
2922                     for(i=0; i<6; i++)
2923                         coded |= s->block_last_index[i];
2924                     if(coded){
2925                         int mx,my;
2926                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2927                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2928                             mx=my=0; //FIXME find the one we actually used
2929                             ff_mpeg4_set_direct_mv(s, mx, my);
2930                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2931                             mx= s->mv[1][0][0];
2932                             my= s->mv[1][0][1];
2933                         }else{
2934                             mx= s->mv[0][0][0];
2935                             my= s->mv[0][0][1];
2936                         }
2937
2938                         s->mv_dir= best_s.mv_dir;
2939                         s->mv_type = best_s.mv_type;
2940                         s->mb_intra= 0;
2941 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2942                         s->mv[0][0][1] = best_s.mv[0][0][1];
2943                         s->mv[1][0][0] = best_s.mv[1][0][0];
2944                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2945                         backup_s.dquant= 0;
2946                         s->skipdct=1;
2947                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2948                                         &dmin, &next_block, mx, my);
2949                         s->skipdct=0;
2950                     }
2951                 }
2952
2953                 s->current_picture.qscale_table[xy] = best_s.qscale;
2954
2955                 copy_context_after_encode(s, &best_s, -1);
2956
2957                 pb_bits_count= put_bits_count(&s->pb);
2958                 flush_put_bits(&s->pb);
2959                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2960                 s->pb= backup_s.pb;
2961
2962                 if(s->data_partitioning){
2963                     pb2_bits_count= put_bits_count(&s->pb2);
2964                     flush_put_bits(&s->pb2);
2965                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2966                     s->pb2= backup_s.pb2;
2967
2968                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2969                     flush_put_bits(&s->tex_pb);
2970                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2971                     s->tex_pb= backup_s.tex_pb;
2972                 }
2973                 s->last_bits= put_bits_count(&s->pb);
2974
2975                 if (CONFIG_H263_ENCODER &&
2976                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2977                     ff_h263_update_motion_val(s);
2978
2979                 if(next_block==0){ //FIXME 16 vs linesize16
2980                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2981                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2982                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2983                 }
2984
2985                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2986                     ff_mpv_decode_mb(s, s->block);
2987             } else {
2988                 int motion_x = 0, motion_y = 0;
2989                 s->mv_type=MV_TYPE_16X16;
2990                 // only one MB-Type possible
2991
2992                 switch(mb_type){
2993                 case CANDIDATE_MB_TYPE_INTRA:
2994                     s->mv_dir = 0;
2995                     s->mb_intra= 1;
2996                     motion_x= s->mv[0][0][0] = 0;
2997                     motion_y= s->mv[0][0][1] = 0;
2998                     break;
2999                 case CANDIDATE_MB_TYPE_INTER:
3000                     s->mv_dir = MV_DIR_FORWARD;
3001                     s->mb_intra= 0;
3002                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3003                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3004                     break;
3005                 case CANDIDATE_MB_TYPE_INTER_I:
3006                     s->mv_dir = MV_DIR_FORWARD;
3007                     s->mv_type = MV_TYPE_FIELD;
3008                     s->mb_intra= 0;
3009                     for(i=0; i<2; i++){
3010                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3011                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3012                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3013                     }
3014                     break;
3015                 case CANDIDATE_MB_TYPE_INTER4V:
3016                     s->mv_dir = MV_DIR_FORWARD;
3017                     s->mv_type = MV_TYPE_8X8;
3018                     s->mb_intra= 0;
3019                     for(i=0; i<4; i++){
3020                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3021                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3022                     }
3023                     break;
3024                 case CANDIDATE_MB_TYPE_DIRECT:
3025                     if (CONFIG_MPEG4_ENCODER) {
3026                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3027                         s->mb_intra= 0;
3028                         motion_x=s->b_direct_mv_table[xy][0];
3029                         motion_y=s->b_direct_mv_table[xy][1];
3030                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3031                     }
3032                     break;
3033                 case CANDIDATE_MB_TYPE_DIRECT0:
3034                     if (CONFIG_MPEG4_ENCODER) {
3035                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3036                         s->mb_intra= 0;
3037                         ff_mpeg4_set_direct_mv(s, 0, 0);
3038                     }
3039                     break;
3040                 case CANDIDATE_MB_TYPE_BIDIR:
3041                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3042                     s->mb_intra= 0;
3043                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3044                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3045                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3046                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3047                     break;
3048                 case CANDIDATE_MB_TYPE_BACKWARD:
3049                     s->mv_dir = MV_DIR_BACKWARD;
3050                     s->mb_intra= 0;
3051                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3052                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3053                     break;
3054                 case CANDIDATE_MB_TYPE_FORWARD:
3055                     s->mv_dir = MV_DIR_FORWARD;
3056                     s->mb_intra= 0;
3057                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3058                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3059                     break;
3060                 case CANDIDATE_MB_TYPE_FORWARD_I:
3061                     s->mv_dir = MV_DIR_FORWARD;
3062                     s->mv_type = MV_TYPE_FIELD;
3063                     s->mb_intra= 0;
3064                     for(i=0; i<2; i++){
3065                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3066                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3067                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3068                     }
3069                     break;
3070                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3071                     s->mv_dir = MV_DIR_BACKWARD;
3072                     s->mv_type = MV_TYPE_FIELD;
3073                     s->mb_intra= 0;
3074                     for(i=0; i<2; i++){
3075                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3076                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3077                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3078                     }
3079                     break;
3080                 case CANDIDATE_MB_TYPE_BIDIR_I:
3081                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3082                     s->mv_type = MV_TYPE_FIELD;
3083                     s->mb_intra= 0;
3084                     for(dir=0; dir<2; dir++){
3085                         for(i=0; i<2; i++){
3086                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3087                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3088                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3089                         }
3090                     }
3091                     break;
3092                 default:
3093                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3094                 }
3095
3096                 encode_mb(s, motion_x, motion_y);
3097
3098                 // RAL: Update last macroblock type
3099                 s->last_mv_dir = s->mv_dir;
3100
3101                 if (CONFIG_H263_ENCODER &&
3102                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3103                     ff_h263_update_motion_val(s);
3104
3105                 ff_mpv_decode_mb(s, s->block);
3106             }
3107
3108             /* clean the MV table in IPS frames for direct mode in B frames */
3109             if(s->mb_intra /* && I,P,S_TYPE */){
3110                 s->p_mv_table[xy][0]=0;
3111                 s->p_mv_table[xy][1]=0;
3112             }
3113
3114             if(s->flags&CODEC_FLAG_PSNR){
3115                 int w= 16;
3116                 int h= 16;
3117
3118                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3119                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3120
3121                 s->current_picture.f->error[0] += sse(
3122                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3123                     s->dest[0], w, h, s->linesize);
3124                 s->current_picture.f->error[1] += sse(
3125                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3126                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3127                 s->current_picture.f->error[2] += sse(
3128                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3129                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3130             }
3131             if(s->loop_filter){
3132                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3133                     ff_h263_loop_filter(s);
3134             }
3135             av_dlog(s->avctx, "MB %d %d bits\n",
3136                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3137         }
3138     }
3139
3140     //not beautiful here but we must write it before flushing so it has to be here
3141     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3142         ff_msmpeg4_encode_ext_header(s);
3143
3144     write_slice_end(s);
3145
3146     /* Send the last GOB if RTP */
3147     if (s->avctx->rtp_callback) {
3148         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3149         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3150         /* Call the RTP callback to send the last GOB */
3151         emms_c();
3152         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3153     }
3154
3155     return 0;
3156 }
3157
3158 #define MERGE(field) dst->field += src->field; src->field=0
3159 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3160     MERGE(me.scene_change_score);
3161     MERGE(me.mc_mb_var_sum_temp);
3162     MERGE(me.mb_var_sum_temp);
3163 }
3164
3165 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3166     int i;
3167
3168     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3169     MERGE(dct_count[1]);
3170     MERGE(mv_bits);
3171     MERGE(i_tex_bits);
3172     MERGE(p_tex_bits);
3173     MERGE(i_count);
3174     MERGE(f_count);
3175     MERGE(b_count);
3176     MERGE(skip_count);
3177     MERGE(misc_bits);
3178     MERGE(er.error_count);
3179     MERGE(padding_bug_score);
3180     MERGE(current_picture.f->error[0]);
3181     MERGE(current_picture.f->error[1]);
3182     MERGE(current_picture.f->error[2]);
3183
3184     if(dst->avctx->noise_reduction){
3185         for(i=0; i<64; i++){
3186             MERGE(dct_error_sum[0][i]);
3187             MERGE(dct_error_sum[1][i]);
3188         }
3189     }
3190
3191     assert(put_bits_count(&src->pb) % 8 ==0);
3192     assert(put_bits_count(&dst->pb) % 8 ==0);
3193     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3194     flush_put_bits(&dst->pb);
3195 }
3196
3197 static int estimate_qp(MpegEncContext *s, int dry_run){
3198     if (s->next_lambda){
3199         s->current_picture_ptr->f->quality =
3200         s->current_picture.f->quality = s->next_lambda;
3201         if(!dry_run) s->next_lambda= 0;
3202     } else if (!s->fixed_qscale) {
3203         s->current_picture_ptr->f->quality =
3204         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3205         if (s->current_picture.f->quality < 0)
3206             return -1;
3207     }
3208
3209     if(s->adaptive_quant){
3210         switch(s->codec_id){
3211         case AV_CODEC_ID_MPEG4:
3212             if (CONFIG_MPEG4_ENCODER)
3213                 ff_clean_mpeg4_qscales(s);
3214             break;
3215         case AV_CODEC_ID_H263:
3216         case AV_CODEC_ID_H263P:
3217         case AV_CODEC_ID_FLV1:
3218             if (CONFIG_H263_ENCODER)
3219                 ff_clean_h263_qscales(s);
3220             break;
3221         default:
3222             ff_init_qscale_tab(s);
3223         }
3224
3225         s->lambda= s->lambda_table[0];
3226         //FIXME broken
3227     }else
3228         s->lambda = s->current_picture.f->quality;
3229     update_qscale(s);
3230     return 0;
3231 }
3232
3233 /* must be called before writing the header */
3234 static void set_frame_distances(MpegEncContext * s){
3235     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3236     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3237
3238     if(s->pict_type==AV_PICTURE_TYPE_B){
3239         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3240         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3241     }else{
3242         s->pp_time= s->time - s->last_non_b_time;
3243         s->last_non_b_time= s->time;
3244         assert(s->picture_number==0 || s->pp_time > 0);
3245     }
3246 }
3247
3248 static int encode_picture(MpegEncContext *s, int picture_number)
3249 {
3250     int i, ret;
3251     int bits;
3252     int context_count = s->slice_context_count;
3253
3254     s->picture_number = picture_number;
3255
3256     /* Reset the average MB variance */
3257     s->me.mb_var_sum_temp    =
3258     s->me.mc_mb_var_sum_temp = 0;
3259
3260     /* we need to initialize some time vars before we can encode b-frames */
3261     // RAL: Condition added for MPEG1VIDEO
3262     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3263         set_frame_distances(s);
3264     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3265         ff_set_mpeg4_time(s);
3266
3267     s->me.scene_change_score=0;
3268
3269 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3270
3271     if(s->pict_type==AV_PICTURE_TYPE_I){
3272         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3273         else                        s->no_rounding=0;
3274     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3275         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3276             s->no_rounding ^= 1;
3277     }
3278
3279     if(s->flags & CODEC_FLAG_PASS2){
3280         if (estimate_qp(s,1) < 0)
3281             return -1;
3282         ff_get_2pass_fcode(s);
3283     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3284         if(s->pict_type==AV_PICTURE_TYPE_B)
3285             s->lambda= s->last_lambda_for[s->pict_type];
3286         else
3287             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3288         update_qscale(s);
3289     }
3290
3291     s->mb_intra=0; //for the rate distortion & bit compare functions
3292     for(i=1; i<context_count; i++){
3293         ret = ff_update_duplicate_context(s->thread_context[i], s);
3294         if (ret < 0)
3295             return ret;
3296     }
3297
3298     if(ff_init_me(s)<0)
3299         return -1;
3300
3301     /* Estimate motion for every MB */
3302     if(s->pict_type != AV_PICTURE_TYPE_I){
3303         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3304         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3305         if (s->pict_type != AV_PICTURE_TYPE_B) {
3306             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3307                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3308             }
3309         }
3310
3311         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3312     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3313         /* I-Frame */
3314         for(i=0; i<s->mb_stride*s->mb_height; i++)
3315             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3316
3317         if(!s->fixed_qscale){
3318             /* finding spatial complexity for I-frame rate control */
3319             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3320         }
3321     }
3322     for(i=1; i<context_count; i++){
3323         merge_context_after_me(s, s->thread_context[i]);
3324     }
3325     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3326     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3327     emms_c();
3328
3329     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3330         s->pict_type= AV_PICTURE_TYPE_I;
3331         for(i=0; i<s->mb_stride*s->mb_height; i++)
3332             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3333         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3334                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3335     }
3336
3337     if(!s->umvplus){
3338         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3339             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3340
3341             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3342                 int a,b;
3343                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3344                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3345                 s->f_code= FFMAX3(s->f_code, a, b);
3346             }
3347
3348             ff_fix_long_p_mvs(s);
3349             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3350             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3351                 int j;
3352                 for(i=0; i<2; i++){
3353                     for(j=0; j<2; j++)
3354                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3355                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3356                 }
3357             }
3358         }
3359
3360         if(s->pict_type==AV_PICTURE_TYPE_B){
3361             int a, b;
3362
3363             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3364             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3365             s->f_code = FFMAX(a, b);
3366
3367             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3368             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3369             s->b_code = FFMAX(a, b);
3370
3371             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3372             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3373             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3374             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3375             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3376                 int dir, j;
3377                 for(dir=0; dir<2; dir++){
3378                     for(i=0; i<2; i++){
3379                         for(j=0; j<2; j++){
3380                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3381                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3382                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3383                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3384                         }
3385                     }
3386                 }
3387             }
3388         }
3389     }
3390
3391     if (estimate_qp(s, 0) < 0)
3392         return -1;
3393
3394     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3395         s->qscale= 3; //reduce clipping problems
3396
3397     if (s->out_format == FMT_MJPEG) {
3398         /* for mjpeg, we do include qscale in the matrix */
3399         for(i=1;i<64;i++){
3400             int j = s->idsp.idct_permutation[i];
3401
3402             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3403         }
3404         s->y_dc_scale_table=
3405         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3406         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3407         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3408                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3409         s->qscale= 8;
3410     }
3411
3412     //FIXME var duplication
3413     s->current_picture_ptr->f->key_frame =
3414     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3415     s->current_picture_ptr->f->pict_type =
3416     s->current_picture.f->pict_type = s->pict_type;
3417
3418     if (s->current_picture.f->key_frame)
3419         s->picture_in_gop_number=0;
3420
3421     s->last_bits= put_bits_count(&s->pb);
3422     switch(s->out_format) {
3423     case FMT_MJPEG:
3424         if (CONFIG_MJPEG_ENCODER)
3425             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3426                                            s->intra_matrix);
3427         break;
3428     case FMT_H261:
3429         if (CONFIG_H261_ENCODER)
3430             ff_h261_encode_picture_header(s, picture_number);
3431         break;
3432     case FMT_H263:
3433         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3434             ff_wmv2_encode_picture_header(s, picture_number);
3435         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3436             ff_msmpeg4_encode_picture_header(s, picture_number);
3437         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3438             ff_mpeg4_encode_picture_header(s, picture_number);
3439         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3440             ff_rv10_encode_picture_header(s, picture_number);
3441         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3442             ff_rv20_encode_picture_header(s, picture_number);
3443         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3444             ff_flv_encode_picture_header(s, picture_number);
3445         else if (CONFIG_H263_ENCODER)
3446             ff_h263_encode_picture_header(s, picture_number);
3447         break;
3448     case FMT_MPEG1:
3449         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3450             ff_mpeg1_encode_picture_header(s, picture_number);
3451         break;
3452     default:
3453         assert(0);
3454     }
3455     bits= put_bits_count(&s->pb);
3456     s->header_bits= bits - s->last_bits;
3457
3458     for(i=1; i<context_count; i++){
3459         update_duplicate_context_after_me(s->thread_context[i], s);
3460     }
3461     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3462     for(i=1; i<context_count; i++){
3463         merge_context_after_encode(s, s->thread_context[i]);
3464     }
3465     emms_c();
3466     return 0;
3467 }
3468
3469 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3470     const int intra= s->mb_intra;
3471     int i;
3472
3473     s->dct_count[intra]++;
3474
3475     for(i=0; i<64; i++){
3476         int level= block[i];
3477
3478         if(level){
3479             if(level>0){
3480                 s->dct_error_sum[intra][i] += level;
3481                 level -= s->dct_offset[intra][i];
3482                 if(level<0) level=0;
3483             }else{
3484                 s->dct_error_sum[intra][i] -= level;
3485                 level += s->dct_offset[intra][i];
3486                 if(level>0) level=0;
3487             }
3488             block[i]= level;
3489         }
3490     }
3491 }
3492
3493 static int dct_quantize_trellis_c(MpegEncContext *s,
3494                                   int16_t *block, int n,
3495                                   int qscale, int *overflow){
3496     const int *qmat;
3497     const uint8_t *scantable= s->intra_scantable.scantable;
3498     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3499     int max=0;
3500     unsigned int threshold1, threshold2;
3501     int bias=0;
3502     int run_tab[65];
3503     int level_tab[65];
3504     int score_tab[65];
3505     int survivor[65];
3506     int survivor_count;
3507     int last_run=0;
3508     int last_level=0;
3509     int last_score= 0;
3510     int last_i;
3511     int coeff[2][64];
3512     int coeff_count[64];
3513     int qmul, qadd, start_i, last_non_zero, i, dc;
3514     const int esc_length= s->ac_esc_length;
3515     uint8_t * length;
3516     uint8_t * last_length;
3517     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3518
3519     s->fdsp.fdct(block);
3520
3521     if(s->dct_error_sum)
3522         s->denoise_dct(s, block);
3523     qmul= qscale*16;
3524     qadd= ((qscale-1)|1)*8;
3525
3526     if (s->mb_intra) {
3527         int q;
3528         if (!s->h263_aic) {
3529             if (n < 4)
3530                 q = s->y_dc_scale;
3531             else
3532                 q = s->c_dc_scale;
3533             q = q << 3;
3534         } else{
3535             /* For AIC we skip quant/dequant of INTRADC */
3536             q = 1 << 3;
3537             qadd=0;
3538         }
3539
3540         /* note: block[0] is assumed to be positive */
3541         block[0] = (block[0] + (q >> 1)) / q;
3542         start_i = 1;
3543         last_non_zero = 0;
3544         qmat = s->q_intra_matrix[qscale];
3545         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3546             bias= 1<<(QMAT_SHIFT-1);
3547         length     = s->intra_ac_vlc_length;
3548         last_length= s->intra_ac_vlc_last_length;
3549     } else {
3550         start_i = 0;
3551         last_non_zero = -1;
3552         qmat = s->q_inter_matrix[qscale];
3553         length     = s->inter_ac_vlc_length;
3554         last_length= s->inter_ac_vlc_last_length;
3555     }
3556     last_i= start_i;
3557
3558     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3559     threshold2= (threshold1<<1);
3560
3561     for(i=63; i>=start_i; i--) {
3562         const int j = scantable[i];
3563         int level = block[j] * qmat[j];
3564
3565         if(((unsigned)(level+threshold1))>threshold2){
3566             last_non_zero = i;
3567             break;
3568         }
3569     }
3570
3571     for(i=start_i; i<=last_non_zero; i++) {
3572         const int j = scantable[i];
3573         int level = block[j] * qmat[j];
3574
3575 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3576 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3577         if(((unsigned)(level+threshold1))>threshold2){
3578             if(level>0){
3579                 level= (bias + level)>>QMAT_SHIFT;
3580                 coeff[0][i]= level;
3581                 coeff[1][i]= level-1;
3582 //                coeff[2][k]= level-2;
3583             }else{
3584                 level= (bias - level)>>QMAT_SHIFT;
3585                 coeff[0][i]= -level;
3586                 coeff[1][i]= -level+1;
3587 //                coeff[2][k]= -level+2;
3588             }
3589             coeff_count[i]= FFMIN(level, 2);
3590             assert(coeff_count[i]);
3591             max |=level;
3592         }else{
3593             coeff[0][i]= (level>>31)|1;
3594             coeff_count[i]= 1;
3595         }
3596     }
3597
3598     *overflow= s->max_qcoeff < max; //overflow might have happened
3599
3600     if(last_non_zero < start_i){
3601         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3602         return last_non_zero;
3603     }
3604
3605     score_tab[start_i]= 0;
3606     survivor[0]= start_i;
3607     survivor_count= 1;
3608
3609     for(i=start_i; i<=last_non_zero; i++){
3610         int level_index, j, zero_distortion;
3611         int dct_coeff= FFABS(block[ scantable[i] ]);
3612         int best_score=256*256*256*120;
3613
3614         if (s->fdsp.fdct == ff_fdct_ifast)
3615             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3616         zero_distortion= dct_coeff*dct_coeff;
3617
3618         for(level_index=0; level_index < coeff_count[i]; level_index++){
3619             int distortion;
3620             int level= coeff[level_index][i];
3621             const int alevel= FFABS(level);
3622             int unquant_coeff;
3623
3624             assert(level);
3625
3626             if(s->out_format == FMT_H263){
3627                 unquant_coeff= alevel*qmul + qadd;
3628             }else{ //MPEG1
3629                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3630                 if(s->mb_intra){
3631                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3632                         unquant_coeff =   (unquant_coeff - 1) | 1;
3633                 }else{
3634                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3635                         unquant_coeff =   (unquant_coeff - 1) | 1;
3636                 }
3637                 unquant_coeff<<= 3;
3638             }
3639
3640             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3641             level+=64;
3642             if((level&(~127)) == 0){
3643                 for(j=survivor_count-1; j>=0; j--){
3644                     int run= i - survivor[j];
3645                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3646                     score += score_tab[i-run];
3647
3648                     if(score < best_score){
3649                         best_score= score;
3650                         run_tab[i+1]= run;
3651                         level_tab[i+1]= level-64;
3652                     }
3653                 }
3654
3655                 if(s->out_format == FMT_H263){
3656                     for(j=survivor_count-1; j>=0; j--){
3657                         int run= i - survivor[j];
3658                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3659                         score += score_tab[i-run];
3660                         if(score < last_score){
3661                             last_score= score;
3662                             last_run= run;
3663                             last_level= level-64;
3664                             last_i= i+1;
3665                         }
3666                     }
3667                 }
3668             }else{
3669                 distortion += esc_length*lambda;
3670                 for(j=survivor_count-1; j>=0; j--){
3671                     int run= i - survivor[j];
3672                     int score= distortion + score_tab[i-run];
3673
3674                     if(score < best_score){
3675                         best_score= score;
3676                         run_tab[i+1]= run;
3677                         level_tab[i+1]= level-64;
3678                     }
3679                 }
3680
3681                 if(s->out_format == FMT_H263){
3682                   for(j=survivor_count-1; j>=0; j--){
3683                         int run= i - survivor[j];
3684                         int score= distortion + score_tab[i-run];
3685                         if(score < last_score){
3686                             last_score= score;
3687                             last_run= run;
3688                             last_level= level-64;
3689                             last_i= i+1;
3690                         }
3691                     }
3692                 }
3693             }
3694         }
3695
3696         score_tab[i+1]= best_score;
3697
3698         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3699         if(last_non_zero <= 27){
3700             for(; survivor_count; survivor_count--){
3701                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3702                     break;
3703             }
3704         }else{
3705             for(; survivor_count; survivor_count--){
3706                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3707                     break;
3708             }
3709         }
3710
3711         survivor[ survivor_count++ ]= i+1;
3712     }
3713
3714     if(s->out_format != FMT_H263){
3715         last_score= 256*256*256*120;
3716         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3717             int score= score_tab[i];
3718             if(i) score += lambda*2; //FIXME exacter?
3719
3720             if(score < last_score){
3721                 last_score= score;
3722                 last_i= i;
3723                 last_level= level_tab[i];
3724                 last_run= run_tab[i];
3725             }
3726         }
3727     }
3728
3729     s->coded_score[n] = last_score;
3730
3731     dc= FFABS(block[0]);
3732     last_non_zero= last_i - 1;
3733     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3734
3735     if(last_non_zero < start_i)
3736         return last_non_zero;
3737
3738     if(last_non_zero == 0 && start_i == 0){
3739         int best_level= 0;
3740         int best_score= dc * dc;
3741
3742         for(i=0; i<coeff_count[0]; i++){
3743             int level= coeff[i][0];
3744             int alevel= FFABS(level);
3745             int unquant_coeff, score, distortion;
3746
3747             if(s->out_format == FMT_H263){
3748                     unquant_coeff= (alevel*qmul + qadd)>>3;
3749             }else{ //MPEG1
3750                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3751                     unquant_coeff =   (unquant_coeff - 1) | 1;
3752             }
3753             unquant_coeff = (unquant_coeff + 4) >> 3;
3754             unquant_coeff<<= 3 + 3;
3755
3756             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3757             level+=64;
3758             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3759             else                    score= distortion + esc_length*lambda;
3760
3761             if(score < best_score){
3762                 best_score= score;
3763                 best_level= level - 64;
3764             }
3765         }
3766         block[0]= best_level;
3767         s->coded_score[n] = best_score - dc*dc;
3768         if(best_level == 0) return -1;
3769         else                return last_non_zero;
3770     }
3771
3772     i= last_i;
3773     assert(last_level);
3774
3775     block[ perm_scantable[last_non_zero] ]= last_level;
3776     i -= last_run + 1;
3777
3778     for(; i>start_i; i -= run_tab[i] + 1){
3779         block[ perm_scantable[i-1] ]= level_tab[i];
3780     }
3781
3782     return last_non_zero;
3783 }
3784
3785 //#define REFINE_STATS 1
3786 static int16_t basis[64][64];
3787
3788 static void build_basis(uint8_t *perm){
3789     int i, j, x, y;
3790     emms_c();
3791     for(i=0; i<8; i++){
3792         for(j=0; j<8; j++){
3793             for(y=0; y<8; y++){
3794                 for(x=0; x<8; x++){
3795                     double s= 0.25*(1<<BASIS_SHIFT);
3796                     int index= 8*i + j;
3797                     int perm_index= perm[index];
3798                     if(i==0) s*= sqrt(0.5);
3799                     if(j==0) s*= sqrt(0.5);
3800                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3801                 }
3802             }
3803         }
3804     }
3805 }
3806
3807 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3808                         int16_t *block, int16_t *weight, int16_t *orig,
3809                         int n, int qscale){
3810     int16_t rem[64];
3811     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3812     const uint8_t *scantable= s->intra_scantable.scantable;
3813     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3814 //    unsigned int threshold1, threshold2;
3815 //    int bias=0;
3816     int run_tab[65];
3817     int prev_run=0;
3818     int prev_level=0;
3819     int qmul, qadd, start_i, last_non_zero, i, dc;
3820     uint8_t * length;
3821     uint8_t * last_length;
3822     int lambda;
3823     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3824 #ifdef REFINE_STATS
3825 static int count=0;
3826 static int after_last=0;
3827 static int to_zero=0;
3828 static int from_zero=0;
3829 static int raise=0;
3830 static int lower=0;
3831 static int messed_sign=0;
3832 #endif
3833
3834     if(basis[0][0] == 0)
3835         build_basis(s->idsp.idct_permutation);
3836
3837     qmul= qscale*2;
3838     qadd= (qscale-1)|1;
3839     if (s->mb_intra) {
3840         if (!s->h263_aic) {
3841             if (n < 4)
3842                 q = s->y_dc_scale;
3843             else
3844                 q = s->c_dc_scale;
3845         } else{
3846             /* For AIC we skip quant/dequant of INTRADC */
3847             q = 1;
3848             qadd=0;
3849         }
3850         q <<= RECON_SHIFT-3;
3851         /* note: block[0] is assumed to be positive */
3852         dc= block[0]*q;
3853 //        block[0] = (block[0] + (q >> 1)) / q;
3854         start_i = 1;
3855 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3856 //            bias= 1<<(QMAT_SHIFT-1);
3857         length     = s->intra_ac_vlc_length;
3858         last_length= s->intra_ac_vlc_last_length;
3859     } else {
3860         dc= 0;
3861         start_i = 0;
3862         length     = s->inter_ac_vlc_length;
3863         last_length= s->inter_ac_vlc_last_length;
3864     }
3865     last_non_zero = s->block_last_index[n];
3866
3867 #ifdef REFINE_STATS
3868 {START_TIMER
3869 #endif
3870     dc += (1<<(RECON_SHIFT-1));
3871     for(i=0; i<64; i++){
3872         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3873     }
3874 #ifdef REFINE_STATS
3875 STOP_TIMER("memset rem[]")}
3876 #endif
3877     sum=0;
3878     for(i=0; i<64; i++){
3879         int one= 36;
3880         int qns=4;
3881         int w;
3882
3883         w= FFABS(weight[i]) + qns*one;
3884         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3885
3886         weight[i] = w;
3887 //        w=weight[i] = (63*qns + (w/2)) / w;
3888
3889         assert(w>0);
3890         assert(w<(1<<6));
3891         sum += w*w;
3892     }
3893     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3894 #ifdef REFINE_STATS
3895 {START_TIMER
3896 #endif
3897     run=0;
3898     rle_index=0;
3899     for(i=start_i; i<=last_non_zero; i++){
3900         int j= perm_scantable[i];
3901         const int level= block[j];
3902         int coeff;
3903
3904         if(level){
3905             if(level<0) coeff= qmul*level - qadd;
3906             else        coeff= qmul*level + qadd;
3907             run_tab[rle_index++]=run;
3908             run=0;
3909
3910             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3911         }else{
3912             run++;
3913         }
3914     }
3915 #ifdef REFINE_STATS
3916 if(last_non_zero>0){
3917 STOP_TIMER("init rem[]")
3918 }
3919 }
3920
3921 {START_TIMER
3922 #endif
3923     for(;;){
3924         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3925         int best_coeff=0;
3926         int best_change=0;
3927         int run2, best_unquant_change=0, analyze_gradient;
3928 #ifdef REFINE_STATS
3929 {START_TIMER
3930 #endif
3931         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3932
3933         if(analyze_gradient){
3934 #ifdef REFINE_STATS
3935 {START_TIMER
3936 #endif
3937             for(i=0; i<64; i++){
3938                 int w= weight[i];
3939
3940                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3941             }
3942 #ifdef REFINE_STATS
3943 STOP_TIMER("rem*w*w")}
3944 {START_TIMER
3945 #endif
3946             s->fdsp.fdct(d1);
3947 #ifdef REFINE_STATS
3948 STOP_TIMER("dct")}
3949 #endif
3950         }
3951
3952         if(start_i){
3953             const int level= block[0];
3954             int change, old_coeff;
3955
3956             assert(s->mb_intra);
3957
3958             old_coeff= q*level;
3959
3960             for(change=-1; change<=1; change+=2){
3961                 int new_level= level + change;
3962                 int score, new_coeff;
3963
3964                 new_coeff= q*new_level;
3965                 if(new_coeff >= 2048 || new_coeff < 0)
3966                     continue;
3967
3968                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3969                                                   new_coeff - old_coeff);
3970                 if(score<best_score){
3971                     best_score= score;
3972                     best_coeff= 0;
3973                     best_change= change;
3974                     best_unquant_change= new_coeff - old_coeff;
3975                 }
3976             }
3977         }
3978
3979         run=0;
3980         rle_index=0;
3981         run2= run_tab[rle_index++];
3982         prev_level=0;
3983         prev_run=0;
3984
3985         for(i=start_i; i<64; i++){
3986             int j= perm_scantable[i];
3987             const int level= block[j];
3988             int change, old_coeff;
3989
3990             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3991                 break;
3992
3993             if(level){
3994                 if(level<0) old_coeff= qmul*level - qadd;
3995                 else        old_coeff= qmul*level + qadd;
3996                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3997             }else{
3998                 old_coeff=0;
3999                 run2--;
4000                 assert(run2>=0 || i >= last_non_zero );
4001             }
4002
4003             for(change=-1; change<=1; change+=2){
4004                 int new_level= level + change;
4005                 int score, new_coeff, unquant_change;
4006
4007                 score=0;
4008                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4009                    continue;
4010
4011                 if(new_level){
4012                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4013                     else            new_coeff= qmul*new_level + qadd;
4014                     if(new_coeff >= 2048 || new_coeff <= -2048)
4015                         continue;
4016                     //FIXME check for overflow
4017
4018                     if(level){
4019                         if(level < 63 && level > -63){
4020                             if(i < last_non_zero)
4021                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4022                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4023                             else
4024                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4025                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4026                         }
4027                     }else{
4028                         assert(FFABS(new_level)==1);
4029
4030                         if(analyze_gradient){
4031                             int g= d1[ scantable[i] ];
4032                             if(g && (g^new_level) >= 0)
4033                                 continue;
4034                         }
4035
4036                         if(i < last_non_zero){
4037                             int next_i= i + run2 + 1;
4038                             int next_level= block[ perm_scantable[next_i] ] + 64;
4039
4040                             if(next_level&(~127))
4041                                 next_level= 0;
4042
4043                             if(next_i < last_non_zero)
4044                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4045                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4046                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4047                             else
4048                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4049                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4050                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4051                         }else{
4052                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4053                             if(prev_level){
4054                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4055                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4056                             }
4057                         }
4058                     }
4059                 }else{
4060                     new_coeff=0;
4061                     assert(FFABS(level)==1);
4062
4063                     if(i < last_non_zero){
4064                         int next_i= i + run2 + 1;
4065                         int next_level= block[ perm_scantable[next_i] ] + 64;
4066
4067                         if(next_level&(~127))
4068                             next_level= 0;
4069
4070                         if(next_i < last_non_zero)
4071                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4072                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4073                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4074                         else
4075                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4076                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4077                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4078                     }else{
4079                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4080                         if(prev_level){
4081                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4082                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4083                         }
4084                     }
4085                 }
4086
4087                 score *= lambda;
4088
4089                 unquant_change= new_coeff - old_coeff;
4090                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4091
4092                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4093                                                    unquant_change);
4094                 if(score<best_score){
4095                     best_score= score;
4096                     best_coeff= i;
4097                     best_change= change;
4098                     best_unquant_change= unquant_change;
4099                 }
4100             }
4101             if(level){
4102                 prev_level= level + 64;
4103                 if(prev_level&(~127))
4104                     prev_level= 0;
4105                 prev_run= run;
4106                 run=0;
4107             }else{
4108                 run++;
4109             }
4110         }
4111 #ifdef REFINE_STATS
4112 STOP_TIMER("iterative step")}
4113 #endif
4114
4115         if(best_change){
4116             int j= perm_scantable[ best_coeff ];
4117
4118             block[j] += best_change;
4119
4120             if(best_coeff > last_non_zero){
4121                 last_non_zero= best_coeff;
4122                 assert(block[j]);
4123 #ifdef REFINE_STATS
4124 after_last++;
4125 #endif
4126             }else{
4127 #ifdef REFINE_STATS
4128 if(block[j]){
4129     if(block[j] - best_change){
4130         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4131             raise++;
4132         }else{
4133             lower++;
4134         }
4135     }else{
4136         from_zero++;
4137     }
4138 }else{
4139     to_zero++;
4140 }
4141 #endif
4142                 for(; last_non_zero>=start_i; last_non_zero--){
4143                     if(block[perm_scantable[last_non_zero]])
4144                         break;
4145                 }
4146             }
4147 #ifdef REFINE_STATS
4148 count++;
4149 if(256*256*256*64 % count == 0){
4150     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4151 }
4152 #endif
4153             run=0;
4154             rle_index=0;
4155             for(i=start_i; i<=last_non_zero; i++){
4156                 int j= perm_scantable[i];
4157                 const int level= block[j];
4158
4159                  if(level){
4160                      run_tab[rle_index++]=run;
4161                      run=0;
4162                  }else{
4163                      run++;
4164                  }
4165             }
4166
4167             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4168         }else{
4169             break;
4170         }
4171     }
4172 #ifdef REFINE_STATS
4173 if(last_non_zero>0){
4174 STOP_TIMER("iterative search")
4175 }
4176 }
4177 #endif
4178
4179     return last_non_zero;
4180 }
4181
4182 int ff_dct_quantize_c(MpegEncContext *s,
4183                         int16_t *block, int n,
4184                         int qscale, int *overflow)
4185 {
4186     int i, j, level, last_non_zero, q, start_i;
4187     const int *qmat;
4188     const uint8_t *scantable= s->intra_scantable.scantable;
4189     int bias;
4190     int max=0;
4191     unsigned int threshold1, threshold2;
4192
4193     s->fdsp.fdct(block);
4194
4195     if(s->dct_error_sum)
4196         s->denoise_dct(s, block);
4197
4198     if (s->mb_intra) {
4199         if (!s->h263_aic) {
4200             if (n < 4)
4201                 q = s->y_dc_scale;
4202             else
4203                 q = s->c_dc_scale;
4204             q = q << 3;
4205         } else
4206             /* For AIC we skip quant/dequant of INTRADC */
4207             q = 1 << 3;
4208
4209         /* note: block[0] is assumed to be positive */
4210         block[0] = (block[0] + (q >> 1)) / q;
4211         start_i = 1;
4212         last_non_zero = 0;
4213         qmat = s->q_intra_matrix[qscale];
4214         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4215     } else {
4216         start_i = 0;
4217         last_non_zero = -1;
4218         qmat = s->q_inter_matrix[qscale];
4219         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4220     }
4221     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4222     threshold2= (threshold1<<1);
4223     for(i=63;i>=start_i;i--) {
4224         j = scantable[i];
4225         level = block[j] * qmat[j];
4226
4227         if(((unsigned)(level+threshold1))>threshold2){
4228             last_non_zero = i;
4229             break;
4230         }else{
4231             block[j]=0;
4232         }
4233     }
4234     for(i=start_i; i<=last_non_zero; i++) {
4235         j = scantable[i];
4236         level = block[j] * qmat[j];
4237
4238 //        if(   bias+level >= (1<<QMAT_SHIFT)
4239 //           || bias-level >= (1<<QMAT_SHIFT)){
4240         if(((unsigned)(level+threshold1))>threshold2){
4241             if(level>0){
4242                 level= (bias + level)>>QMAT_SHIFT;
4243                 block[j]= level;
4244             }else{
4245                 level= (bias - level)>>QMAT_SHIFT;
4246                 block[j]= -level;
4247             }
4248             max |=level;
4249         }else{
4250             block[j]=0;
4251         }
4252     }
4253     *overflow= s->max_qcoeff < max; //overflow might have happened
4254
4255     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4256     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4257         ff_block_permute(block, s->idsp.idct_permutation,
4258                          scantable, last_non_zero);
4259
4260     return last_non_zero;
4261 }
4262
4263 #define OFFSET(x) offsetof(MpegEncContext, x)
4264 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4265 static const AVOption h263_options[] = {
4266     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4267     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4268     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4269     FF_MPV_COMMON_OPTS
4270     { NULL },
4271 };
4272
4273 static const AVClass h263_class = {
4274     .class_name = "H.263 encoder",
4275     .item_name  = av_default_item_name,
4276     .option     = h263_options,
4277     .version    = LIBAVUTIL_VERSION_INT,
4278 };
4279
4280 AVCodec ff_h263_encoder = {
4281     .name           = "h263",
4282     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4283     .type           = AVMEDIA_TYPE_VIDEO,
4284     .id             = AV_CODEC_ID_H263,
4285     .priv_data_size = sizeof(MpegEncContext),
4286     .init           = ff_mpv_encode_init,
4287     .encode2        = ff_mpv_encode_picture,
4288     .close          = ff_mpv_encode_end,
4289     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4290     .priv_class     = &h263_class,
4291 };
4292
4293 static const AVOption h263p_options[] = {
4294     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4295     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4296     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4297     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4298     FF_MPV_COMMON_OPTS
4299     { NULL },
4300 };
4301 static const AVClass h263p_class = {
4302     .class_name = "H.263p encoder",
4303     .item_name  = av_default_item_name,
4304     .option     = h263p_options,
4305     .version    = LIBAVUTIL_VERSION_INT,
4306 };
4307
4308 AVCodec ff_h263p_encoder = {
4309     .name           = "h263p",
4310     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4311     .type           = AVMEDIA_TYPE_VIDEO,
4312     .id             = AV_CODEC_ID_H263P,
4313     .priv_data_size = sizeof(MpegEncContext),
4314     .init           = ff_mpv_encode_init,
4315     .encode2        = ff_mpv_encode_picture,
4316     .close          = ff_mpv_encode_end,
4317     .capabilities   = CODEC_CAP_SLICE_THREADS,
4318     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4319     .priv_class     = &h263p_class,
4320 };
4321
4322 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4323
4324 AVCodec ff_msmpeg4v2_encoder = {
4325     .name           = "msmpeg4v2",
4326     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4327     .type           = AVMEDIA_TYPE_VIDEO,
4328     .id             = AV_CODEC_ID_MSMPEG4V2,
4329     .priv_data_size = sizeof(MpegEncContext),
4330     .init           = ff_mpv_encode_init,
4331     .encode2        = ff_mpv_encode_picture,
4332     .close          = ff_mpv_encode_end,
4333     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4334     .priv_class     = &msmpeg4v2_class,
4335 };
4336
4337 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4338
4339 AVCodec ff_msmpeg4v3_encoder = {
4340     .name           = "msmpeg4",
4341     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4342     .type           = AVMEDIA_TYPE_VIDEO,
4343     .id             = AV_CODEC_ID_MSMPEG4V3,
4344     .priv_data_size = sizeof(MpegEncContext),
4345     .init           = ff_mpv_encode_init,
4346     .encode2        = ff_mpv_encode_picture,
4347     .close          = ff_mpv_encode_end,
4348     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4349     .priv_class     = &msmpeg4v3_class,
4350 };
4351
4352 FF_MPV_GENERIC_CLASS(wmv1)
4353
4354 AVCodec ff_wmv1_encoder = {
4355     .name           = "wmv1",
4356     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4357     .type           = AVMEDIA_TYPE_VIDEO,
4358     .id             = AV_CODEC_ID_WMV1,
4359     .priv_data_size = sizeof(MpegEncContext),
4360     .init           = ff_mpv_encode_init,
4361     .encode2        = ff_mpv_encode_picture,
4362     .close          = ff_mpv_encode_end,
4363     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4364     .priv_class     = &wmv1_class,
4365 };