]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
lavc: make rc_eq into private options of mpegvideo encoders
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92 #if CONFIG_FAANDCT
93             fdsp->fdct == ff_faandct            ||
94 #endif /* CONFIG_FAANDCT */
95             fdsp->fdct == ff_jpeg_fdct_islow_10) {
96             for (i = 0; i < 64; i++) {
97                 const int j = s->idsp.idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
105                                         (qscale * quant_matrix[j]));
106             }
107         } else if (fdsp->fdct == ff_fdct_ifast) {
108             for (i = 0; i < 64; i++) {
109                 const int j = s->idsp.idct_permutation[i];
110                 /* 16 <= qscale * quant_matrix[i] <= 7905
111                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
112                  *             19952 <=              x  <= 249205026
113                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
114                  *           3444240 >= (1 << 36) / (x) >= 275 */
115
116                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
117                                         (ff_aanscales[i] * qscale *
118                                          quant_matrix[j]));
119             }
120         } else {
121             for (i = 0; i < 64; i++) {
122                 const int j = s->idsp.idct_permutation[i];
123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
124                  * Assume x = qscale * quant_matrix[i]
125                  * So             16 <=              x  <= 7905
126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
127                  * so          32768 >= (1 << 19) / (x) >= 67 */
128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
129                                         (qscale * quant_matrix[j]));
130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
131                 //                    (qscale * quant_matrix[i]);
132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
133                                        (qscale * quant_matrix[j]);
134
135                 if (qmat16[qscale][0][i] == 0 ||
136                     qmat16[qscale][0][i] == 128 * 256)
137                     qmat16[qscale][0][i] = 128 * 256 - 1;
138                 qmat16[qscale][1][i] =
139                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
140                                 qmat16[qscale][0][i]);
141             }
142         }
143
144         for (i = intra; i < 64; i++) {
145             int64_t max = 8191;
146             if (fdsp->fdct == ff_fdct_ifast) {
147                 max = (8191LL * ff_aanscales[i]) >> 14;
148             }
149             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
150                 shift++;
151             }
152         }
153     }
154     if (shift) {
155         av_log(NULL, AV_LOG_INFO,
156                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
157                QMAT_SHIFT - shift);
158     }
159 }
160
161 static inline void update_qscale(MpegEncContext *s)
162 {
163     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
164                 (FF_LAMBDA_SHIFT + 7);
165     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
166
167     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
168                  FF_LAMBDA_SHIFT;
169 }
170
171 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
172 {
173     int i;
174
175     if (matrix) {
176         put_bits(pb, 1, 1);
177         for (i = 0; i < 64; i++) {
178             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
179         }
180     } else
181         put_bits(pb, 1, 0);
182 }
183
184 /**
185  * init s->current_picture.qscale_table from s->lambda_table
186  */
187 void ff_init_qscale_tab(MpegEncContext *s)
188 {
189     int8_t * const qscale_table = s->current_picture.qscale_table;
190     int i;
191
192     for (i = 0; i < s->mb_num; i++) {
193         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
194         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
195         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
196                                                   s->avctx->qmax);
197     }
198 }
199
200 static void update_duplicate_context_after_me(MpegEncContext *dst,
201                                               MpegEncContext *src)
202 {
203 #define COPY(a) dst->a= src->a
204     COPY(pict_type);
205     COPY(current_picture);
206     COPY(f_code);
207     COPY(b_code);
208     COPY(qscale);
209     COPY(lambda);
210     COPY(lambda2);
211     COPY(picture_in_gop_number);
212     COPY(gop_picture_number);
213     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
214     COPY(progressive_frame);    // FIXME don't set in encode_header
215     COPY(partitioned_frame);    // FIXME don't set in encode_header
216 #undef COPY
217 }
218
219 /**
220  * Set the given MpegEncContext to defaults for encoding.
221  * the changed fields will not depend upon the prior state of the MpegEncContext.
222  */
223 static void mpv_encode_defaults(MpegEncContext *s)
224 {
225     int i;
226     ff_mpv_common_defaults(s);
227
228     for (i = -16; i < 16; i++) {
229         default_fcode_tab[i + MAX_MV] = 1;
230     }
231     s->me.mv_penalty = default_mv_penalty;
232     s->fcode_tab     = default_fcode_tab;
233
234     s->input_picture_number  = 0;
235     s->picture_in_gop_number = 0;
236 }
237
238 /* init video encoder */
239 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
240 {
241     MpegEncContext *s = avctx->priv_data;
242     int i, ret, format_supported;
243
244     mpv_encode_defaults(s);
245
246     switch (avctx->codec_id) {
247     case AV_CODEC_ID_MPEG2VIDEO:
248         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
249             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
250             av_log(avctx, AV_LOG_ERROR,
251                    "only YUV420 and YUV422 are supported\n");
252             return -1;
253         }
254         break;
255     case AV_CODEC_ID_MJPEG:
256         format_supported = 0;
257         /* JPEG color space */
258         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
259             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
260             (avctx->color_range == AVCOL_RANGE_JPEG &&
261              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
262               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
263             format_supported = 1;
264         /* MPEG color space */
265         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
266                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
267                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
268             format_supported = 1;
269
270         if (!format_supported) {
271             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
272             return -1;
273         }
274         break;
275     default:
276         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
277             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
278             return -1;
279         }
280     }
281
282     switch (avctx->pix_fmt) {
283     case AV_PIX_FMT_YUVJ422P:
284     case AV_PIX_FMT_YUV422P:
285         s->chroma_format = CHROMA_422;
286         break;
287     case AV_PIX_FMT_YUVJ420P:
288     case AV_PIX_FMT_YUV420P:
289     default:
290         s->chroma_format = CHROMA_420;
291         break;
292     }
293
294     s->bit_rate = avctx->bit_rate;
295     s->width    = avctx->width;
296     s->height   = avctx->height;
297     if (avctx->gop_size > 600 &&
298         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
299         av_log(avctx, AV_LOG_ERROR,
300                "Warning keyframe interval too large! reducing it ...\n");
301         avctx->gop_size = 600;
302     }
303     s->gop_size     = avctx->gop_size;
304     s->avctx        = avctx;
305     s->flags        = avctx->flags;
306     s->flags2       = avctx->flags2;
307     if (avctx->max_b_frames > MAX_B_FRAMES) {
308         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
309                "is %d.\n", MAX_B_FRAMES);
310     }
311     s->max_b_frames = avctx->max_b_frames;
312     s->codec_id     = avctx->codec->id;
313     s->strict_std_compliance = avctx->strict_std_compliance;
314     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
315     s->mpeg_quant         = avctx->mpeg_quant;
316     s->rtp_mode           = !!avctx->rtp_payload_size;
317     s->intra_dc_precision = avctx->intra_dc_precision;
318     s->user_specified_pts = AV_NOPTS_VALUE;
319
320     if (s->gop_size <= 1) {
321         s->intra_only = 1;
322         s->gop_size   = 12;
323     } else {
324         s->intra_only = 0;
325     }
326
327     s->me_method = avctx->me_method;
328
329     /* Fixed QSCALE */
330     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
331
332     s->adaptive_quant = (s->avctx->lumi_masking ||
333                          s->avctx->dark_masking ||
334                          s->avctx->temporal_cplx_masking ||
335                          s->avctx->spatial_cplx_masking  ||
336                          s->avctx->p_masking      ||
337                          s->avctx->border_masking ||
338                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
339                         !s->fixed_qscale;
340
341     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
342
343     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
344         av_log(avctx, AV_LOG_ERROR,
345                "a vbv buffer size is needed, "
346                "for encoding with a maximum bitrate\n");
347         return -1;
348     }
349
350     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
351         av_log(avctx, AV_LOG_INFO,
352                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
353     }
354
355     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
356         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
357         return -1;
358     }
359
360     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
361         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
362         return -1;
363     }
364
365     if (avctx->rc_max_rate &&
366         avctx->rc_max_rate == avctx->bit_rate &&
367         avctx->rc_max_rate != avctx->rc_min_rate) {
368         av_log(avctx, AV_LOG_INFO,
369                "impossible bitrate constraints, this will fail\n");
370     }
371
372     if (avctx->rc_buffer_size &&
373         avctx->bit_rate * (int64_t)avctx->time_base.num >
374             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
375         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
376         return -1;
377     }
378
379     if (!s->fixed_qscale &&
380         avctx->bit_rate * av_q2d(avctx->time_base) >
381             avctx->bit_rate_tolerance) {
382         av_log(avctx, AV_LOG_ERROR,
383                "bitrate tolerance too small for bitrate\n");
384         return -1;
385     }
386
387     if (s->avctx->rc_max_rate &&
388         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
389         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
390          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
391         90000LL * (avctx->rc_buffer_size - 1) >
392             s->avctx->rc_max_rate * 0xFFFFLL) {
393         av_log(avctx, AV_LOG_INFO,
394                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
395                "specified vbv buffer is too large for the given bitrate!\n");
396     }
397
398     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
399         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
400         s->codec_id != AV_CODEC_ID_FLV1) {
401         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
402         return -1;
403     }
404
405     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
406         av_log(avctx, AV_LOG_ERROR,
407                "OBMC is only supported with simple mb decision\n");
408         return -1;
409     }
410
411     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
412         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
413         return -1;
414     }
415
416     if (s->max_b_frames                    &&
417         s->codec_id != AV_CODEC_ID_MPEG4      &&
418         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
419         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
420         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
421         return -1;
422     }
423
424     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
425          s->codec_id == AV_CODEC_ID_H263  ||
426          s->codec_id == AV_CODEC_ID_H263P) &&
427         (avctx->sample_aspect_ratio.num > 255 ||
428          avctx->sample_aspect_ratio.den > 255)) {
429         av_log(avctx, AV_LOG_ERROR,
430                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
431                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
432         return -1;
433     }
434
435     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
436         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
437         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
438         return -1;
439     }
440
441     // FIXME mpeg2 uses that too
442     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
443         av_log(avctx, AV_LOG_ERROR,
444                "mpeg2 style quantization not supported by codec\n");
445         return -1;
446     }
447
448     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
449         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
450         return -1;
451     }
452
453     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
454         s->avctx->mb_decision != FF_MB_DECISION_RD) {
455         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
456         return -1;
457     }
458
459     if (s->avctx->scenechange_threshold < 1000000000 &&
460         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
461         av_log(avctx, AV_LOG_ERROR,
462                "closed gop with scene change detection are not supported yet, "
463                "set threshold to 1000000000\n");
464         return -1;
465     }
466
467     if (s->flags & CODEC_FLAG_LOW_DELAY) {
468         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
469             av_log(avctx, AV_LOG_ERROR,
470                   "low delay forcing is only available for mpeg2\n");
471             return -1;
472         }
473         if (s->max_b_frames != 0) {
474             av_log(avctx, AV_LOG_ERROR,
475                    "b frames cannot be used with low delay\n");
476             return -1;
477         }
478     }
479
480     if (s->q_scale_type == 1) {
481         if (avctx->qmax > 12) {
482             av_log(avctx, AV_LOG_ERROR,
483                    "non linear quant only supports qmax <= 12 currently\n");
484             return -1;
485         }
486     }
487
488     if (s->avctx->thread_count > 1         &&
489         s->codec_id != AV_CODEC_ID_MPEG4      &&
490         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
491         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
492         (s->codec_id != AV_CODEC_ID_H263P)) {
493         av_log(avctx, AV_LOG_ERROR,
494                "multi threaded encoding not supported by codec\n");
495         return -1;
496     }
497
498     if (s->avctx->thread_count < 1) {
499         av_log(avctx, AV_LOG_ERROR,
500                "automatic thread number detection not supported by codec,"
501                "patch welcome\n");
502         return -1;
503     }
504
505     if (s->avctx->thread_count > 1)
506         s->rtp_mode = 1;
507
508     if (!avctx->time_base.den || !avctx->time_base.num) {
509         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
510         return -1;
511     }
512
513     i = (INT_MAX / 2 + 128) >> 8;
514     if (avctx->mb_threshold >= i) {
515         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
516                i - 1);
517         return -1;
518     }
519
520     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
521         av_log(avctx, AV_LOG_INFO,
522                "notice: b_frame_strategy only affects the first pass\n");
523         avctx->b_frame_strategy = 0;
524     }
525
526     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
527     if (i > 1) {
528         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
529         avctx->time_base.den /= i;
530         avctx->time_base.num /= i;
531         //return -1;
532     }
533
534     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
535         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
536         // (a + x * 3 / 8) / x
537         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
538         s->inter_quant_bias = 0;
539     } else {
540         s->intra_quant_bias = 0;
541         // (a - x / 4) / x
542         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
543     }
544
545     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
546         s->intra_quant_bias = avctx->intra_quant_bias;
547     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
548         s->inter_quant_bias = avctx->inter_quant_bias;
549
550     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
551         s->avctx->time_base.den > (1 << 16) - 1) {
552         av_log(avctx, AV_LOG_ERROR,
553                "timebase %d/%d not supported by MPEG 4 standard, "
554                "the maximum admitted value for the timebase denominator "
555                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
556                (1 << 16) - 1);
557         return -1;
558     }
559     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
560
561     switch (avctx->codec->id) {
562     case AV_CODEC_ID_MPEG1VIDEO:
563         s->out_format = FMT_MPEG1;
564         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
565         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
566         break;
567     case AV_CODEC_ID_MPEG2VIDEO:
568         s->out_format = FMT_MPEG1;
569         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
570         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
571         s->rtp_mode   = 1;
572         break;
573     case AV_CODEC_ID_MJPEG:
574         s->out_format = FMT_MJPEG;
575         s->intra_only = 1; /* force intra only for jpeg */
576         if (!CONFIG_MJPEG_ENCODER ||
577             ff_mjpeg_encode_init(s) < 0)
578             return -1;
579         avctx->delay = 0;
580         s->low_delay = 1;
581         break;
582     case AV_CODEC_ID_H261:
583         if (!CONFIG_H261_ENCODER)
584             return -1;
585         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
586             av_log(avctx, AV_LOG_ERROR,
587                    "The specified picture size of %dx%d is not valid for the "
588                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
589                     s->width, s->height);
590             return -1;
591         }
592         s->out_format = FMT_H261;
593         avctx->delay  = 0;
594         s->low_delay  = 1;
595         break;
596     case AV_CODEC_ID_H263:
597         if (!CONFIG_H263_ENCODER)
598         return -1;
599         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
600                              s->width, s->height) == 8) {
601             av_log(avctx, AV_LOG_INFO,
602                    "The specified picture size of %dx%d is not valid for "
603                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
604                    "352x288, 704x576, and 1408x1152."
605                    "Try H.263+.\n", s->width, s->height);
606             return -1;
607         }
608         s->out_format = FMT_H263;
609         avctx->delay  = 0;
610         s->low_delay  = 1;
611         break;
612     case AV_CODEC_ID_H263P:
613         s->out_format = FMT_H263;
614         s->h263_plus  = 1;
615         /* Fx */
616         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
617         s->modified_quant  = s->h263_aic;
618         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
619         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
620
621         /* /Fx */
622         /* These are just to be sure */
623         avctx->delay = 0;
624         s->low_delay = 1;
625         break;
626     case AV_CODEC_ID_FLV1:
627         s->out_format      = FMT_H263;
628         s->h263_flv        = 2; /* format = 1; 11-bit codes */
629         s->unrestricted_mv = 1;
630         s->rtp_mode  = 0; /* don't allow GOB */
631         avctx->delay = 0;
632         s->low_delay = 1;
633         break;
634     case AV_CODEC_ID_RV10:
635         s->out_format = FMT_H263;
636         avctx->delay  = 0;
637         s->low_delay  = 1;
638         break;
639     case AV_CODEC_ID_RV20:
640         s->out_format      = FMT_H263;
641         avctx->delay       = 0;
642         s->low_delay       = 1;
643         s->modified_quant  = 1;
644         s->h263_aic        = 1;
645         s->h263_plus       = 1;
646         s->loop_filter     = 1;
647         s->unrestricted_mv = 0;
648         break;
649     case AV_CODEC_ID_MPEG4:
650         s->out_format      = FMT_H263;
651         s->h263_pred       = 1;
652         s->unrestricted_mv = 1;
653         s->low_delay       = s->max_b_frames ? 0 : 1;
654         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
655         break;
656     case AV_CODEC_ID_MSMPEG4V2:
657         s->out_format      = FMT_H263;
658         s->h263_pred       = 1;
659         s->unrestricted_mv = 1;
660         s->msmpeg4_version = 2;
661         avctx->delay       = 0;
662         s->low_delay       = 1;
663         break;
664     case AV_CODEC_ID_MSMPEG4V3:
665         s->out_format        = FMT_H263;
666         s->h263_pred         = 1;
667         s->unrestricted_mv   = 1;
668         s->msmpeg4_version   = 3;
669         s->flipflop_rounding = 1;
670         avctx->delay         = 0;
671         s->low_delay         = 1;
672         break;
673     case AV_CODEC_ID_WMV1:
674         s->out_format        = FMT_H263;
675         s->h263_pred         = 1;
676         s->unrestricted_mv   = 1;
677         s->msmpeg4_version   = 4;
678         s->flipflop_rounding = 1;
679         avctx->delay         = 0;
680         s->low_delay         = 1;
681         break;
682     case AV_CODEC_ID_WMV2:
683         s->out_format        = FMT_H263;
684         s->h263_pred         = 1;
685         s->unrestricted_mv   = 1;
686         s->msmpeg4_version   = 5;
687         s->flipflop_rounding = 1;
688         avctx->delay         = 0;
689         s->low_delay         = 1;
690         break;
691     default:
692         return -1;
693     }
694
695     avctx->has_b_frames = !s->low_delay;
696
697     s->encoding = 1;
698
699     s->progressive_frame    =
700     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
701                                                 CODEC_FLAG_INTERLACED_ME) ||
702                                 s->alternate_scan);
703
704     /* init */
705     ff_mpv_idct_init(s);
706     if (ff_mpv_common_init(s) < 0)
707         return -1;
708
709     if (ARCH_X86)
710         ff_mpv_encode_init_x86(s);
711
712     ff_fdctdsp_init(&s->fdsp, avctx);
713     ff_me_cmp_init(&s->mecc, avctx);
714     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
715     ff_pixblockdsp_init(&s->pdsp, avctx);
716     ff_qpeldsp_init(&s->qdsp);
717
718     s->avctx->coded_frame = s->current_picture.f;
719
720     if (s->msmpeg4_version) {
721         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
722                           2 * 2 * (MAX_LEVEL + 1) *
723                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
724     }
725     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
726
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
730     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
731     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
732                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
733     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
734                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
735
736     if (s->avctx->noise_reduction) {
737         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
738                           2 * 64 * sizeof(uint16_t), fail);
739     }
740
741     if (CONFIG_H263_ENCODER)
742         ff_h263dsp_init(&s->h263dsp);
743     if (!s->dct_quantize)
744         s->dct_quantize = ff_dct_quantize_c;
745     if (!s->denoise_dct)
746         s->denoise_dct  = denoise_dct_c;
747     s->fast_dct_quantize = s->dct_quantize;
748     if (avctx->trellis)
749         s->dct_quantize  = dct_quantize_trellis_c;
750
751     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
752         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
753
754     s->quant_precision = 5;
755
756     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
757     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
758
759     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
760         ff_h261_encode_init(s);
761     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
762         ff_h263_encode_init(s);
763     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
764         ff_msmpeg4_encode_init(s);
765     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
766         && s->out_format == FMT_MPEG1)
767         ff_mpeg1_encode_init(s);
768
769     /* init q matrix */
770     for (i = 0; i < 64; i++) {
771         int j = s->idsp.idct_permutation[i];
772         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
773             s->mpeg_quant) {
774             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
775             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
776         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
777             s->intra_matrix[j] =
778             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
779         } else {
780             /* mpeg1/2 */
781             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
782             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
783         }
784         if (s->avctx->intra_matrix)
785             s->intra_matrix[j] = s->avctx->intra_matrix[i];
786         if (s->avctx->inter_matrix)
787             s->inter_matrix[j] = s->avctx->inter_matrix[i];
788     }
789
790     /* precompute matrix */
791     /* for mjpeg, we do include qscale in the matrix */
792     if (s->out_format != FMT_MJPEG) {
793         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
794                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
795                           31, 1);
796         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
797                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
798                           31, 0);
799     }
800
801     if (ff_rate_control_init(s) < 0)
802         return -1;
803
804 #if FF_API_ERROR_RATE
805     FF_DISABLE_DEPRECATION_WARNINGS
806     if (avctx->error_rate)
807         s->error_rate = avctx->error_rate;
808     FF_ENABLE_DEPRECATION_WARNINGS;
809 #endif
810
811 #if FF_API_NORMALIZE_AQP
812     FF_DISABLE_DEPRECATION_WARNINGS
813     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
814         s->mpv_flags |= FF_MPV_FLAG_NAQ;
815     FF_ENABLE_DEPRECATION_WARNINGS;
816 #endif
817
818 #if FF_API_MV0
819     FF_DISABLE_DEPRECATION_WARNINGS
820     if (avctx->flags & CODEC_FLAG_MV0)
821         s->mpv_flags |= FF_MPV_FLAG_MV0;
822     FF_ENABLE_DEPRECATION_WARNINGS
823 #endif
824
825 #if FF_API_MPV_OPT
826     FF_DISABLE_DEPRECATION_WARNINGS
827     if (avctx->rc_qsquish != 0.0)
828         s->rc_qsquish = avctx->rc_qsquish;
829     if (avctx->rc_qmod_amp != 0.0)
830         s->rc_qmod_amp = avctx->rc_qmod_amp;
831     if (avctx->rc_qmod_freq)
832         s->rc_qmod_freq = avctx->rc_qmod_freq;
833
834     if (avctx->rc_eq) {
835         av_freep(&s->rc_eq);
836         s->rc_eq = av_strdup(avctx->rc_eq);
837         if (!s->rc_eq)
838             return AVERROR(ENOMEM);
839     }
840     FF_ENABLE_DEPRECATION_WARNINGS
841 #endif
842
843     if (avctx->b_frame_strategy == 2) {
844         for (i = 0; i < s->max_b_frames + 2; i++) {
845             s->tmp_frames[i] = av_frame_alloc();
846             if (!s->tmp_frames[i])
847                 return AVERROR(ENOMEM);
848
849             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
850             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
851             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
852
853             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
854             if (ret < 0)
855                 return ret;
856         }
857     }
858
859     return 0;
860 fail:
861     ff_mpv_encode_end(avctx);
862     return AVERROR_UNKNOWN;
863 }
864
865 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
866 {
867     MpegEncContext *s = avctx->priv_data;
868     int i;
869
870     ff_rate_control_uninit(s);
871
872     ff_mpv_common_end(s);
873     if (CONFIG_MJPEG_ENCODER &&
874         s->out_format == FMT_MJPEG)
875         ff_mjpeg_encode_close(s);
876
877     av_freep(&avctx->extradata);
878
879     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
880         av_frame_free(&s->tmp_frames[i]);
881
882     ff_free_picture_tables(&s->new_picture);
883     ff_mpeg_unref_picture(s, &s->new_picture);
884
885     av_freep(&s->avctx->stats_out);
886     av_freep(&s->ac_stats);
887
888     av_freep(&s->q_intra_matrix);
889     av_freep(&s->q_inter_matrix);
890     av_freep(&s->q_intra_matrix16);
891     av_freep(&s->q_inter_matrix16);
892     av_freep(&s->input_picture);
893     av_freep(&s->reordered_input_picture);
894     av_freep(&s->dct_offset);
895
896     return 0;
897 }
898
899 static int get_sae(uint8_t *src, int ref, int stride)
900 {
901     int x,y;
902     int acc = 0;
903
904     for (y = 0; y < 16; y++) {
905         for (x = 0; x < 16; x++) {
906             acc += FFABS(src[x + y * stride] - ref);
907         }
908     }
909
910     return acc;
911 }
912
913 static int get_intra_count(MpegEncContext *s, uint8_t *src,
914                            uint8_t *ref, int stride)
915 {
916     int x, y, w, h;
917     int acc = 0;
918
919     w = s->width  & ~15;
920     h = s->height & ~15;
921
922     for (y = 0; y < h; y += 16) {
923         for (x = 0; x < w; x += 16) {
924             int offset = x + y * stride;
925             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
926                                       stride, 16);
927             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
928             int sae  = get_sae(src + offset, mean, stride);
929
930             acc += sae + 500 < sad;
931         }
932     }
933     return acc;
934 }
935
936
937 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
938 {
939     Picture *pic = NULL;
940     int64_t pts;
941     int i, display_picture_number = 0, ret;
942     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
943                                                  (s->low_delay ? 0 : 1);
944     int direct = 1;
945
946     if (pic_arg) {
947         pts = pic_arg->pts;
948         display_picture_number = s->input_picture_number++;
949
950         if (pts != AV_NOPTS_VALUE) {
951             if (s->user_specified_pts != AV_NOPTS_VALUE) {
952                 int64_t time = pts;
953                 int64_t last = s->user_specified_pts;
954
955                 if (time <= last) {
956                     av_log(s->avctx, AV_LOG_ERROR,
957                            "Error, Invalid timestamp=%"PRId64", "
958                            "last=%"PRId64"\n", pts, s->user_specified_pts);
959                     return -1;
960                 }
961
962                 if (!s->low_delay && display_picture_number == 1)
963                     s->dts_delta = time - last;
964             }
965             s->user_specified_pts = pts;
966         } else {
967             if (s->user_specified_pts != AV_NOPTS_VALUE) {
968                 s->user_specified_pts =
969                 pts = s->user_specified_pts + 1;
970                 av_log(s->avctx, AV_LOG_INFO,
971                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
972                        pts);
973             } else {
974                 pts = display_picture_number;
975             }
976         }
977     }
978
979     if (pic_arg) {
980         if (!pic_arg->buf[0]);
981             direct = 0;
982         if (pic_arg->linesize[0] != s->linesize)
983             direct = 0;
984         if (pic_arg->linesize[1] != s->uvlinesize)
985             direct = 0;
986         if (pic_arg->linesize[2] != s->uvlinesize)
987             direct = 0;
988
989         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
990                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
991
992         if (direct) {
993             i = ff_find_unused_picture(s, 1);
994             if (i < 0)
995                 return i;
996
997             pic = &s->picture[i];
998             pic->reference = 3;
999
1000             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1001                 return ret;
1002             if (ff_alloc_picture(s, pic, 1) < 0) {
1003                 return -1;
1004             }
1005         } else {
1006             i = ff_find_unused_picture(s, 0);
1007             if (i < 0)
1008                 return i;
1009
1010             pic = &s->picture[i];
1011             pic->reference = 3;
1012
1013             if (ff_alloc_picture(s, pic, 0) < 0) {
1014                 return -1;
1015             }
1016
1017             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1018                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1019                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1020                 // empty
1021             } else {
1022                 int h_chroma_shift, v_chroma_shift;
1023                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1024                                                  &h_chroma_shift,
1025                                                  &v_chroma_shift);
1026
1027                 for (i = 0; i < 3; i++) {
1028                     int src_stride = pic_arg->linesize[i];
1029                     int dst_stride = i ? s->uvlinesize : s->linesize;
1030                     int h_shift = i ? h_chroma_shift : 0;
1031                     int v_shift = i ? v_chroma_shift : 0;
1032                     int w = s->width  >> h_shift;
1033                     int h = s->height >> v_shift;
1034                     uint8_t *src = pic_arg->data[i];
1035                     uint8_t *dst = pic->f->data[i];
1036
1037                     if (!s->avctx->rc_buffer_size)
1038                         dst += INPLACE_OFFSET;
1039
1040                     if (src_stride == dst_stride)
1041                         memcpy(dst, src, src_stride * h);
1042                     else {
1043                         while (h--) {
1044                             memcpy(dst, src, w);
1045                             dst += dst_stride;
1046                             src += src_stride;
1047                         }
1048                     }
1049                 }
1050             }
1051         }
1052         ret = av_frame_copy_props(pic->f, pic_arg);
1053         if (ret < 0)
1054             return ret;
1055
1056         pic->f->display_picture_number = display_picture_number;
1057         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1058     }
1059
1060     /* shift buffer entries */
1061     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1062         s->input_picture[i - 1] = s->input_picture[i];
1063
1064     s->input_picture[encoding_delay] = (Picture*) pic;
1065
1066     return 0;
1067 }
1068
1069 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1070 {
1071     int x, y, plane;
1072     int score = 0;
1073     int64_t score64 = 0;
1074
1075     for (plane = 0; plane < 3; plane++) {
1076         const int stride = p->f->linesize[plane];
1077         const int bw = plane ? 1 : 2;
1078         for (y = 0; y < s->mb_height * bw; y++) {
1079             for (x = 0; x < s->mb_width * bw; x++) {
1080                 int off = p->shared ? 0 : 16;
1081                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1082                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1083                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1084
1085                 switch (s->avctx->frame_skip_exp) {
1086                 case 0: score    =  FFMAX(score, v);          break;
1087                 case 1: score   += FFABS(v);                  break;
1088                 case 2: score   += v * v;                     break;
1089                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1090                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1091                 }
1092             }
1093         }
1094     }
1095
1096     if (score)
1097         score64 = score;
1098
1099     if (score64 < s->avctx->frame_skip_threshold)
1100         return 1;
1101     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1102         return 1;
1103     return 0;
1104 }
1105
1106 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1107 {
1108     AVPacket pkt = { 0 };
1109     int ret, got_output;
1110
1111     av_init_packet(&pkt);
1112     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1113     if (ret < 0)
1114         return ret;
1115
1116     ret = pkt.size;
1117     av_free_packet(&pkt);
1118     return ret;
1119 }
1120
1121 static int estimate_best_b_count(MpegEncContext *s)
1122 {
1123     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1124     AVCodecContext *c = avcodec_alloc_context3(NULL);
1125     const int scale = s->avctx->brd_scale;
1126     int i, j, out_size, p_lambda, b_lambda, lambda2;
1127     int64_t best_rd  = INT64_MAX;
1128     int best_b_count = -1;
1129
1130     assert(scale >= 0 && scale <= 3);
1131
1132     //emms_c();
1133     //s->next_picture_ptr->quality;
1134     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1135     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1136     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1137     if (!b_lambda) // FIXME we should do this somewhere else
1138         b_lambda = p_lambda;
1139     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1140                FF_LAMBDA_SHIFT;
1141
1142     c->width        = s->width  >> scale;
1143     c->height       = s->height >> scale;
1144     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1145     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1146     c->mb_decision  = s->avctx->mb_decision;
1147     c->me_cmp       = s->avctx->me_cmp;
1148     c->mb_cmp       = s->avctx->mb_cmp;
1149     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1150     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1151     c->time_base    = s->avctx->time_base;
1152     c->max_b_frames = s->max_b_frames;
1153
1154     if (avcodec_open2(c, codec, NULL) < 0)
1155         return -1;
1156
1157     for (i = 0; i < s->max_b_frames + 2; i++) {
1158         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1159                                                 s->next_picture_ptr;
1160
1161         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1162             pre_input = *pre_input_ptr;
1163
1164             if (!pre_input.shared && i) {
1165                 pre_input.f->data[0] += INPLACE_OFFSET;
1166                 pre_input.f->data[1] += INPLACE_OFFSET;
1167                 pre_input.f->data[2] += INPLACE_OFFSET;
1168             }
1169
1170             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1171                                        s->tmp_frames[i]->linesize[0],
1172                                        pre_input.f->data[0],
1173                                        pre_input.f->linesize[0],
1174                                        c->width, c->height);
1175             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1176                                        s->tmp_frames[i]->linesize[1],
1177                                        pre_input.f->data[1],
1178                                        pre_input.f->linesize[1],
1179                                        c->width >> 1, c->height >> 1);
1180             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1181                                        s->tmp_frames[i]->linesize[2],
1182                                        pre_input.f->data[2],
1183                                        pre_input.f->linesize[2],
1184                                        c->width >> 1, c->height >> 1);
1185         }
1186     }
1187
1188     for (j = 0; j < s->max_b_frames + 1; j++) {
1189         int64_t rd = 0;
1190
1191         if (!s->input_picture[j])
1192             break;
1193
1194         c->error[0] = c->error[1] = c->error[2] = 0;
1195
1196         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1197         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1198
1199         out_size = encode_frame(c, s->tmp_frames[0]);
1200
1201         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1202
1203         for (i = 0; i < s->max_b_frames + 1; i++) {
1204             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1205
1206             s->tmp_frames[i + 1]->pict_type = is_p ?
1207                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1208             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1209
1210             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1211
1212             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1213         }
1214
1215         /* get the delayed frames */
1216         while (out_size) {
1217             out_size = encode_frame(c, NULL);
1218             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1219         }
1220
1221         rd += c->error[0] + c->error[1] + c->error[2];
1222
1223         if (rd < best_rd) {
1224             best_rd = rd;
1225             best_b_count = j;
1226         }
1227     }
1228
1229     avcodec_close(c);
1230     av_freep(&c);
1231
1232     return best_b_count;
1233 }
1234
1235 static int select_input_picture(MpegEncContext *s)
1236 {
1237     int i, ret;
1238
1239     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1240         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1241     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1242
1243     /* set next picture type & ordering */
1244     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1245         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1246             !s->next_picture_ptr || s->intra_only) {
1247             s->reordered_input_picture[0] = s->input_picture[0];
1248             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1249             s->reordered_input_picture[0]->f->coded_picture_number =
1250                 s->coded_picture_number++;
1251         } else {
1252             int b_frames;
1253
1254             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1255                 if (s->picture_in_gop_number < s->gop_size &&
1256                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1257                     // FIXME check that te gop check above is +-1 correct
1258                     av_frame_unref(s->input_picture[0]->f);
1259
1260                     emms_c();
1261                     ff_vbv_update(s, 0);
1262
1263                     goto no_output_pic;
1264                 }
1265             }
1266
1267             if (s->flags & CODEC_FLAG_PASS2) {
1268                 for (i = 0; i < s->max_b_frames + 1; i++) {
1269                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1270
1271                     if (pict_num >= s->rc_context.num_entries)
1272                         break;
1273                     if (!s->input_picture[i]) {
1274                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1275                         break;
1276                     }
1277
1278                     s->input_picture[i]->f->pict_type =
1279                         s->rc_context.entry[pict_num].new_pict_type;
1280                 }
1281             }
1282
1283             if (s->avctx->b_frame_strategy == 0) {
1284                 b_frames = s->max_b_frames;
1285                 while (b_frames && !s->input_picture[b_frames])
1286                     b_frames--;
1287             } else if (s->avctx->b_frame_strategy == 1) {
1288                 for (i = 1; i < s->max_b_frames + 1; i++) {
1289                     if (s->input_picture[i] &&
1290                         s->input_picture[i]->b_frame_score == 0) {
1291                         s->input_picture[i]->b_frame_score =
1292                             get_intra_count(s,
1293                                             s->input_picture[i    ]->f->data[0],
1294                                             s->input_picture[i - 1]->f->data[0],
1295                                             s->linesize) + 1;
1296                     }
1297                 }
1298                 for (i = 0; i < s->max_b_frames + 1; i++) {
1299                     if (!s->input_picture[i] ||
1300                         s->input_picture[i]->b_frame_score - 1 >
1301                             s->mb_num / s->avctx->b_sensitivity)
1302                         break;
1303                 }
1304
1305                 b_frames = FFMAX(0, i - 1);
1306
1307                 /* reset scores */
1308                 for (i = 0; i < b_frames + 1; i++) {
1309                     s->input_picture[i]->b_frame_score = 0;
1310                 }
1311             } else if (s->avctx->b_frame_strategy == 2) {
1312                 b_frames = estimate_best_b_count(s);
1313             } else {
1314                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1315                 b_frames = 0;
1316             }
1317
1318             emms_c();
1319
1320             for (i = b_frames - 1; i >= 0; i--) {
1321                 int type = s->input_picture[i]->f->pict_type;
1322                 if (type && type != AV_PICTURE_TYPE_B)
1323                     b_frames = i;
1324             }
1325             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1326                 b_frames == s->max_b_frames) {
1327                 av_log(s->avctx, AV_LOG_ERROR,
1328                        "warning, too many b frames in a row\n");
1329             }
1330
1331             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1332                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1333                     s->gop_size > s->picture_in_gop_number) {
1334                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1335                 } else {
1336                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1337                         b_frames = 0;
1338                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1339                 }
1340             }
1341
1342             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1343                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1344                 b_frames--;
1345
1346             s->reordered_input_picture[0] = s->input_picture[b_frames];
1347             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1348                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1349             s->reordered_input_picture[0]->f->coded_picture_number =
1350                 s->coded_picture_number++;
1351             for (i = 0; i < b_frames; i++) {
1352                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1353                 s->reordered_input_picture[i + 1]->f->pict_type =
1354                     AV_PICTURE_TYPE_B;
1355                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1356                     s->coded_picture_number++;
1357             }
1358         }
1359     }
1360 no_output_pic:
1361     if (s->reordered_input_picture[0]) {
1362         s->reordered_input_picture[0]->reference =
1363            s->reordered_input_picture[0]->f->pict_type !=
1364                AV_PICTURE_TYPE_B ? 3 : 0;
1365
1366         ff_mpeg_unref_picture(s, &s->new_picture);
1367         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1368             return ret;
1369
1370         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1371             // input is a shared pix, so we can't modifiy it -> alloc a new
1372             // one & ensure that the shared one is reuseable
1373
1374             Picture *pic;
1375             int i = ff_find_unused_picture(s, 0);
1376             if (i < 0)
1377                 return i;
1378             pic = &s->picture[i];
1379
1380             pic->reference = s->reordered_input_picture[0]->reference;
1381             if (ff_alloc_picture(s, pic, 0) < 0) {
1382                 return -1;
1383             }
1384
1385             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1386             if (ret < 0)
1387                 return ret;
1388
1389             /* mark us unused / free shared pic */
1390             av_frame_unref(s->reordered_input_picture[0]->f);
1391             s->reordered_input_picture[0]->shared = 0;
1392
1393             s->current_picture_ptr = pic;
1394         } else {
1395             // input is not a shared pix -> reuse buffer for current_pix
1396             s->current_picture_ptr = s->reordered_input_picture[0];
1397             for (i = 0; i < 4; i++) {
1398                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1399             }
1400         }
1401         ff_mpeg_unref_picture(s, &s->current_picture);
1402         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1403                                        s->current_picture_ptr)) < 0)
1404             return ret;
1405
1406         s->picture_number = s->new_picture.f->display_picture_number;
1407     } else {
1408         ff_mpeg_unref_picture(s, &s->new_picture);
1409     }
1410     return 0;
1411 }
1412
1413 static void frame_end(MpegEncContext *s)
1414 {
1415     int i;
1416
1417     if (s->unrestricted_mv &&
1418         s->current_picture.reference &&
1419         !s->intra_only) {
1420         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1421         int hshift = desc->log2_chroma_w;
1422         int vshift = desc->log2_chroma_h;
1423         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1424                                 s->h_edge_pos, s->v_edge_pos,
1425                                 EDGE_WIDTH, EDGE_WIDTH,
1426                                 EDGE_TOP | EDGE_BOTTOM);
1427         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1428                                 s->h_edge_pos >> hshift,
1429                                 s->v_edge_pos >> vshift,
1430                                 EDGE_WIDTH >> hshift,
1431                                 EDGE_WIDTH >> vshift,
1432                                 EDGE_TOP | EDGE_BOTTOM);
1433         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1434                                 s->h_edge_pos >> hshift,
1435                                 s->v_edge_pos >> vshift,
1436                                 EDGE_WIDTH >> hshift,
1437                                 EDGE_WIDTH >> vshift,
1438                                 EDGE_TOP | EDGE_BOTTOM);
1439     }
1440
1441     emms_c();
1442
1443     s->last_pict_type                 = s->pict_type;
1444     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1445     if (s->pict_type!= AV_PICTURE_TYPE_B)
1446         s->last_non_b_pict_type = s->pict_type;
1447
1448     if (s->encoding) {
1449         /* release non-reference frames */
1450         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1451             if (!s->picture[i].reference)
1452                 ff_mpeg_unref_picture(s, &s->picture[i]);
1453         }
1454     }
1455
1456     s->avctx->coded_frame = s->current_picture_ptr->f;
1457
1458 }
1459
1460 static void update_noise_reduction(MpegEncContext *s)
1461 {
1462     int intra, i;
1463
1464     for (intra = 0; intra < 2; intra++) {
1465         if (s->dct_count[intra] > (1 << 16)) {
1466             for (i = 0; i < 64; i++) {
1467                 s->dct_error_sum[intra][i] >>= 1;
1468             }
1469             s->dct_count[intra] >>= 1;
1470         }
1471
1472         for (i = 0; i < 64; i++) {
1473             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1474                                        s->dct_count[intra] +
1475                                        s->dct_error_sum[intra][i] / 2) /
1476                                       (s->dct_error_sum[intra][i] + 1);
1477         }
1478     }
1479 }
1480
1481 static int frame_start(MpegEncContext *s)
1482 {
1483     int ret;
1484
1485     /* mark & release old frames */
1486     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1487         s->last_picture_ptr != s->next_picture_ptr &&
1488         s->last_picture_ptr->f->buf[0]) {
1489         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1490     }
1491
1492     s->current_picture_ptr->f->pict_type = s->pict_type;
1493     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1494
1495     ff_mpeg_unref_picture(s, &s->current_picture);
1496     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1497                                    s->current_picture_ptr)) < 0)
1498         return ret;
1499
1500     if (s->pict_type != AV_PICTURE_TYPE_B) {
1501         s->last_picture_ptr = s->next_picture_ptr;
1502         if (!s->droppable)
1503             s->next_picture_ptr = s->current_picture_ptr;
1504     }
1505
1506     if (s->last_picture_ptr) {
1507         ff_mpeg_unref_picture(s, &s->last_picture);
1508         if (s->last_picture_ptr->f->buf[0] &&
1509             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1510                                        s->last_picture_ptr)) < 0)
1511             return ret;
1512     }
1513     if (s->next_picture_ptr) {
1514         ff_mpeg_unref_picture(s, &s->next_picture);
1515         if (s->next_picture_ptr->f->buf[0] &&
1516             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1517                                        s->next_picture_ptr)) < 0)
1518             return ret;
1519     }
1520
1521     if (s->picture_structure!= PICT_FRAME) {
1522         int i;
1523         for (i = 0; i < 4; i++) {
1524             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1525                 s->current_picture.f->data[i] +=
1526                     s->current_picture.f->linesize[i];
1527             }
1528             s->current_picture.f->linesize[i] *= 2;
1529             s->last_picture.f->linesize[i]    *= 2;
1530             s->next_picture.f->linesize[i]    *= 2;
1531         }
1532     }
1533
1534     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1535         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1536         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1537     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1538         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1539         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1540     } else {
1541         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1542         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1543     }
1544
1545     if (s->dct_error_sum) {
1546         assert(s->avctx->noise_reduction && s->encoding);
1547         update_noise_reduction(s);
1548     }
1549
1550     return 0;
1551 }
1552
1553 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1554                           const AVFrame *pic_arg, int *got_packet)
1555 {
1556     MpegEncContext *s = avctx->priv_data;
1557     int i, stuffing_count, ret;
1558     int context_count = s->slice_context_count;
1559
1560     s->picture_in_gop_number++;
1561
1562     if (load_input_picture(s, pic_arg) < 0)
1563         return -1;
1564
1565     if (select_input_picture(s) < 0) {
1566         return -1;
1567     }
1568
1569     /* output? */
1570     if (s->new_picture.f->data[0]) {
1571         if (!pkt->data &&
1572             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1573             return ret;
1574         if (s->mb_info) {
1575             s->mb_info_ptr = av_packet_new_side_data(pkt,
1576                                  AV_PKT_DATA_H263_MB_INFO,
1577                                  s->mb_width*s->mb_height*12);
1578             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1579         }
1580
1581         for (i = 0; i < context_count; i++) {
1582             int start_y = s->thread_context[i]->start_mb_y;
1583             int   end_y = s->thread_context[i]->  end_mb_y;
1584             int h       = s->mb_height;
1585             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1586             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1587
1588             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1589         }
1590
1591         s->pict_type = s->new_picture.f->pict_type;
1592         //emms_c();
1593         ret = frame_start(s);
1594         if (ret < 0)
1595             return ret;
1596 vbv_retry:
1597         if (encode_picture(s, s->picture_number) < 0)
1598             return -1;
1599
1600         avctx->header_bits = s->header_bits;
1601         avctx->mv_bits     = s->mv_bits;
1602         avctx->misc_bits   = s->misc_bits;
1603         avctx->i_tex_bits  = s->i_tex_bits;
1604         avctx->p_tex_bits  = s->p_tex_bits;
1605         avctx->i_count     = s->i_count;
1606         // FIXME f/b_count in avctx
1607         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1608         avctx->skip_count  = s->skip_count;
1609
1610         frame_end(s);
1611
1612         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1613             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1614
1615         if (avctx->rc_buffer_size) {
1616             RateControlContext *rcc = &s->rc_context;
1617             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1618
1619             if (put_bits_count(&s->pb) > max_size &&
1620                 s->lambda < s->avctx->lmax) {
1621                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1622                                        (s->qscale + 1) / s->qscale);
1623                 if (s->adaptive_quant) {
1624                     int i;
1625                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1626                         s->lambda_table[i] =
1627                             FFMAX(s->lambda_table[i] + 1,
1628                                   s->lambda_table[i] * (s->qscale + 1) /
1629                                   s->qscale);
1630                 }
1631                 s->mb_skipped = 0;        // done in frame_start()
1632                 // done in encode_picture() so we must undo it
1633                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1634                     if (s->flipflop_rounding          ||
1635                         s->codec_id == AV_CODEC_ID_H263P ||
1636                         s->codec_id == AV_CODEC_ID_MPEG4)
1637                         s->no_rounding ^= 1;
1638                 }
1639                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1640                     s->time_base       = s->last_time_base;
1641                     s->last_non_b_time = s->time - s->pp_time;
1642                 }
1643                 for (i = 0; i < context_count; i++) {
1644                     PutBitContext *pb = &s->thread_context[i]->pb;
1645                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1646                 }
1647                 goto vbv_retry;
1648             }
1649
1650             assert(s->avctx->rc_max_rate);
1651         }
1652
1653         if (s->flags & CODEC_FLAG_PASS1)
1654             ff_write_pass1_stats(s);
1655
1656         for (i = 0; i < 4; i++) {
1657             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1658             avctx->error[i] += s->current_picture_ptr->f->error[i];
1659         }
1660
1661         if (s->flags & CODEC_FLAG_PASS1)
1662             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1663                    avctx->i_tex_bits + avctx->p_tex_bits ==
1664                        put_bits_count(&s->pb));
1665         flush_put_bits(&s->pb);
1666         s->frame_bits  = put_bits_count(&s->pb);
1667
1668         stuffing_count = ff_vbv_update(s, s->frame_bits);
1669         if (stuffing_count) {
1670             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1671                     stuffing_count + 50) {
1672                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1673                 return -1;
1674             }
1675
1676             switch (s->codec_id) {
1677             case AV_CODEC_ID_MPEG1VIDEO:
1678             case AV_CODEC_ID_MPEG2VIDEO:
1679                 while (stuffing_count--) {
1680                     put_bits(&s->pb, 8, 0);
1681                 }
1682             break;
1683             case AV_CODEC_ID_MPEG4:
1684                 put_bits(&s->pb, 16, 0);
1685                 put_bits(&s->pb, 16, 0x1C3);
1686                 stuffing_count -= 4;
1687                 while (stuffing_count--) {
1688                     put_bits(&s->pb, 8, 0xFF);
1689                 }
1690             break;
1691             default:
1692                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1693             }
1694             flush_put_bits(&s->pb);
1695             s->frame_bits  = put_bits_count(&s->pb);
1696         }
1697
1698         /* update mpeg1/2 vbv_delay for CBR */
1699         if (s->avctx->rc_max_rate                          &&
1700             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1701             s->out_format == FMT_MPEG1                     &&
1702             90000LL * (avctx->rc_buffer_size - 1) <=
1703                 s->avctx->rc_max_rate * 0xFFFFLL) {
1704             int vbv_delay, min_delay;
1705             double inbits  = s->avctx->rc_max_rate *
1706                              av_q2d(s->avctx->time_base);
1707             int    minbits = s->frame_bits - 8 *
1708                              (s->vbv_delay_ptr - s->pb.buf - 1);
1709             double bits    = s->rc_context.buffer_index + minbits - inbits;
1710
1711             if (bits < 0)
1712                 av_log(s->avctx, AV_LOG_ERROR,
1713                        "Internal error, negative bits\n");
1714
1715             assert(s->repeat_first_field == 0);
1716
1717             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1718             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1719                         s->avctx->rc_max_rate;
1720
1721             vbv_delay = FFMAX(vbv_delay, min_delay);
1722
1723             assert(vbv_delay < 0xFFFF);
1724
1725             s->vbv_delay_ptr[0] &= 0xF8;
1726             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1727             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1728             s->vbv_delay_ptr[2] &= 0x07;
1729             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1730             avctx->vbv_delay     = vbv_delay * 300;
1731         }
1732         s->total_bits     += s->frame_bits;
1733         avctx->frame_bits  = s->frame_bits;
1734
1735         pkt->pts = s->current_picture.f->pts;
1736         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1737             if (!s->current_picture.f->coded_picture_number)
1738                 pkt->dts = pkt->pts - s->dts_delta;
1739             else
1740                 pkt->dts = s->reordered_pts;
1741             s->reordered_pts = pkt->pts;
1742         } else
1743             pkt->dts = pkt->pts;
1744         if (s->current_picture.f->key_frame)
1745             pkt->flags |= AV_PKT_FLAG_KEY;
1746         if (s->mb_info)
1747             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1748     } else {
1749         s->frame_bits = 0;
1750     }
1751     assert((s->frame_bits & 7) == 0);
1752
1753     pkt->size = s->frame_bits / 8;
1754     *got_packet = !!pkt->size;
1755     return 0;
1756 }
1757
1758 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1759                                                 int n, int threshold)
1760 {
1761     static const char tab[64] = {
1762         3, 2, 2, 1, 1, 1, 1, 1,
1763         1, 1, 1, 1, 1, 1, 1, 1,
1764         1, 1, 1, 1, 1, 1, 1, 1,
1765         0, 0, 0, 0, 0, 0, 0, 0,
1766         0, 0, 0, 0, 0, 0, 0, 0,
1767         0, 0, 0, 0, 0, 0, 0, 0,
1768         0, 0, 0, 0, 0, 0, 0, 0,
1769         0, 0, 0, 0, 0, 0, 0, 0
1770     };
1771     int score = 0;
1772     int run = 0;
1773     int i;
1774     int16_t *block = s->block[n];
1775     const int last_index = s->block_last_index[n];
1776     int skip_dc;
1777
1778     if (threshold < 0) {
1779         skip_dc = 0;
1780         threshold = -threshold;
1781     } else
1782         skip_dc = 1;
1783
1784     /* Are all we could set to zero already zero? */
1785     if (last_index <= skip_dc - 1)
1786         return;
1787
1788     for (i = 0; i <= last_index; i++) {
1789         const int j = s->intra_scantable.permutated[i];
1790         const int level = FFABS(block[j]);
1791         if (level == 1) {
1792             if (skip_dc && i == 0)
1793                 continue;
1794             score += tab[run];
1795             run = 0;
1796         } else if (level > 1) {
1797             return;
1798         } else {
1799             run++;
1800         }
1801     }
1802     if (score >= threshold)
1803         return;
1804     for (i = skip_dc; i <= last_index; i++) {
1805         const int j = s->intra_scantable.permutated[i];
1806         block[j] = 0;
1807     }
1808     if (block[0])
1809         s->block_last_index[n] = 0;
1810     else
1811         s->block_last_index[n] = -1;
1812 }
1813
1814 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1815                                int last_index)
1816 {
1817     int i;
1818     const int maxlevel = s->max_qcoeff;
1819     const int minlevel = s->min_qcoeff;
1820     int overflow = 0;
1821
1822     if (s->mb_intra) {
1823         i = 1; // skip clipping of intra dc
1824     } else
1825         i = 0;
1826
1827     for (; i <= last_index; i++) {
1828         const int j = s->intra_scantable.permutated[i];
1829         int level = block[j];
1830
1831         if (level > maxlevel) {
1832             level = maxlevel;
1833             overflow++;
1834         } else if (level < minlevel) {
1835             level = minlevel;
1836             overflow++;
1837         }
1838
1839         block[j] = level;
1840     }
1841
1842     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1843         av_log(s->avctx, AV_LOG_INFO,
1844                "warning, clipping %d dct coefficients to %d..%d\n",
1845                overflow, minlevel, maxlevel);
1846 }
1847
1848 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1849 {
1850     int x, y;
1851     // FIXME optimize
1852     for (y = 0; y < 8; y++) {
1853         for (x = 0; x < 8; x++) {
1854             int x2, y2;
1855             int sum = 0;
1856             int sqr = 0;
1857             int count = 0;
1858
1859             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1860                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1861                     int v = ptr[x2 + y2 * stride];
1862                     sum += v;
1863                     sqr += v * v;
1864                     count++;
1865                 }
1866             }
1867             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1868         }
1869     }
1870 }
1871
1872 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1873                                                 int motion_x, int motion_y,
1874                                                 int mb_block_height,
1875                                                 int mb_block_count)
1876 {
1877     int16_t weight[8][64];
1878     int16_t orig[8][64];
1879     const int mb_x = s->mb_x;
1880     const int mb_y = s->mb_y;
1881     int i;
1882     int skip_dct[8];
1883     int dct_offset = s->linesize * 8; // default for progressive frames
1884     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1885     ptrdiff_t wrap_y, wrap_c;
1886
1887     for (i = 0; i < mb_block_count; i++)
1888         skip_dct[i] = s->skipdct;
1889
1890     if (s->adaptive_quant) {
1891         const int last_qp = s->qscale;
1892         const int mb_xy = mb_x + mb_y * s->mb_stride;
1893
1894         s->lambda = s->lambda_table[mb_xy];
1895         update_qscale(s);
1896
1897         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1898             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1899             s->dquant = s->qscale - last_qp;
1900
1901             if (s->out_format == FMT_H263) {
1902                 s->dquant = av_clip(s->dquant, -2, 2);
1903
1904                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1905                     if (!s->mb_intra) {
1906                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1907                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1908                                 s->dquant = 0;
1909                         }
1910                         if (s->mv_type == MV_TYPE_8X8)
1911                             s->dquant = 0;
1912                     }
1913                 }
1914             }
1915         }
1916         ff_set_qscale(s, last_qp + s->dquant);
1917     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1918         ff_set_qscale(s, s->qscale + s->dquant);
1919
1920     wrap_y = s->linesize;
1921     wrap_c = s->uvlinesize;
1922     ptr_y  = s->new_picture.f->data[0] +
1923              (mb_y * 16 * wrap_y)              + mb_x * 16;
1924     ptr_cb = s->new_picture.f->data[1] +
1925              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1926     ptr_cr = s->new_picture.f->data[2] +
1927              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1928
1929     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1930         uint8_t *ebuf = s->edge_emu_buffer + 32;
1931         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1932                                  wrap_y, wrap_y,
1933                                  16, 16, mb_x * 16, mb_y * 16,
1934                                  s->width, s->height);
1935         ptr_y = ebuf;
1936         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1937                                  wrap_c, wrap_c,
1938                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1939                                  s->width >> 1, s->height >> 1);
1940         ptr_cb = ebuf + 18 * wrap_y;
1941         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1942                                  wrap_c, wrap_c,
1943                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1944                                  s->width >> 1, s->height >> 1);
1945         ptr_cr = ebuf + 18 * wrap_y + 8;
1946     }
1947
1948     if (s->mb_intra) {
1949         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1950             int progressive_score, interlaced_score;
1951
1952             s->interlaced_dct = 0;
1953             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1954                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1955                                                      NULL, wrap_y, 8) - 400;
1956
1957             if (progressive_score > 0) {
1958                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1959                                                         NULL, wrap_y * 2, 8) +
1960                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1961                                                         NULL, wrap_y * 2, 8);
1962                 if (progressive_score > interlaced_score) {
1963                     s->interlaced_dct = 1;
1964
1965                     dct_offset = wrap_y;
1966                     wrap_y <<= 1;
1967                     if (s->chroma_format == CHROMA_422)
1968                         wrap_c <<= 1;
1969                 }
1970             }
1971         }
1972
1973         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1974         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1975         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1976         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1977
1978         if (s->flags & CODEC_FLAG_GRAY) {
1979             skip_dct[4] = 1;
1980             skip_dct[5] = 1;
1981         } else {
1982             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1983             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1984             if (!s->chroma_y_shift) { /* 422 */
1985                 s->pdsp.get_pixels(s->block[6],
1986                                    ptr_cb + (dct_offset >> 1), wrap_c);
1987                 s->pdsp.get_pixels(s->block[7],
1988                                    ptr_cr + (dct_offset >> 1), wrap_c);
1989             }
1990         }
1991     } else {
1992         op_pixels_func (*op_pix)[4];
1993         qpel_mc_func (*op_qpix)[16];
1994         uint8_t *dest_y, *dest_cb, *dest_cr;
1995
1996         dest_y  = s->dest[0];
1997         dest_cb = s->dest[1];
1998         dest_cr = s->dest[2];
1999
2000         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2001             op_pix  = s->hdsp.put_pixels_tab;
2002             op_qpix = s->qdsp.put_qpel_pixels_tab;
2003         } else {
2004             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2005             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2006         }
2007
2008         if (s->mv_dir & MV_DIR_FORWARD) {
2009             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2010                           s->last_picture.f->data,
2011                           op_pix, op_qpix);
2012             op_pix  = s->hdsp.avg_pixels_tab;
2013             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2014         }
2015         if (s->mv_dir & MV_DIR_BACKWARD) {
2016             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2017                           s->next_picture.f->data,
2018                           op_pix, op_qpix);
2019         }
2020
2021         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2022             int progressive_score, interlaced_score;
2023
2024             s->interlaced_dct = 0;
2025             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2026                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2027                                                      ptr_y + wrap_y * 8,
2028                                                      wrap_y, 8) - 400;
2029
2030             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2031                 progressive_score -= 400;
2032
2033             if (progressive_score > 0) {
2034                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2035                                                         wrap_y * 2, 8) +
2036                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2037                                                         ptr_y + wrap_y,
2038                                                         wrap_y * 2, 8);
2039
2040                 if (progressive_score > interlaced_score) {
2041                     s->interlaced_dct = 1;
2042
2043                     dct_offset = wrap_y;
2044                     wrap_y <<= 1;
2045                     if (s->chroma_format == CHROMA_422)
2046                         wrap_c <<= 1;
2047                 }
2048             }
2049         }
2050
2051         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2052         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2053         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2054                             dest_y + dct_offset, wrap_y);
2055         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2056                             dest_y + dct_offset + 8, wrap_y);
2057
2058         if (s->flags & CODEC_FLAG_GRAY) {
2059             skip_dct[4] = 1;
2060             skip_dct[5] = 1;
2061         } else {
2062             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2063             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2064             if (!s->chroma_y_shift) { /* 422 */
2065                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2066                                     dest_cb + (dct_offset >> 1), wrap_c);
2067                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2068                                     dest_cr + (dct_offset >> 1), wrap_c);
2069             }
2070         }
2071         /* pre quantization */
2072         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2073                 2 * s->qscale * s->qscale) {
2074             // FIXME optimize
2075             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2076                 skip_dct[0] = 1;
2077             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2078                 skip_dct[1] = 1;
2079             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2080                                wrap_y, 8) < 20 * s->qscale)
2081                 skip_dct[2] = 1;
2082             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2083                                wrap_y, 8) < 20 * s->qscale)
2084                 skip_dct[3] = 1;
2085             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2086                 skip_dct[4] = 1;
2087             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2088                 skip_dct[5] = 1;
2089             if (!s->chroma_y_shift) { /* 422 */
2090                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2091                                    dest_cb + (dct_offset >> 1),
2092                                    wrap_c, 8) < 20 * s->qscale)
2093                     skip_dct[6] = 1;
2094                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2095                                    dest_cr + (dct_offset >> 1),
2096                                    wrap_c, 8) < 20 * s->qscale)
2097                     skip_dct[7] = 1;
2098             }
2099         }
2100     }
2101
2102     if (s->quantizer_noise_shaping) {
2103         if (!skip_dct[0])
2104             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2105         if (!skip_dct[1])
2106             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2107         if (!skip_dct[2])
2108             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2109         if (!skip_dct[3])
2110             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2111         if (!skip_dct[4])
2112             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2113         if (!skip_dct[5])
2114             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2115         if (!s->chroma_y_shift) { /* 422 */
2116             if (!skip_dct[6])
2117                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2118                                   wrap_c);
2119             if (!skip_dct[7])
2120                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2121                                   wrap_c);
2122         }
2123         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2124     }
2125
2126     /* DCT & quantize */
2127     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2128     {
2129         for (i = 0; i < mb_block_count; i++) {
2130             if (!skip_dct[i]) {
2131                 int overflow;
2132                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2133                 // FIXME we could decide to change to quantizer instead of
2134                 // clipping
2135                 // JS: I don't think that would be a good idea it could lower
2136                 //     quality instead of improve it. Just INTRADC clipping
2137                 //     deserves changes in quantizer
2138                 if (overflow)
2139                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2140             } else
2141                 s->block_last_index[i] = -1;
2142         }
2143         if (s->quantizer_noise_shaping) {
2144             for (i = 0; i < mb_block_count; i++) {
2145                 if (!skip_dct[i]) {
2146                     s->block_last_index[i] =
2147                         dct_quantize_refine(s, s->block[i], weight[i],
2148                                             orig[i], i, s->qscale);
2149                 }
2150             }
2151         }
2152
2153         if (s->luma_elim_threshold && !s->mb_intra)
2154             for (i = 0; i < 4; i++)
2155                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2156         if (s->chroma_elim_threshold && !s->mb_intra)
2157             for (i = 4; i < mb_block_count; i++)
2158                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2159
2160         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2161             for (i = 0; i < mb_block_count; i++) {
2162                 if (s->block_last_index[i] == -1)
2163                     s->coded_score[i] = INT_MAX / 256;
2164             }
2165         }
2166     }
2167
2168     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2169         s->block_last_index[4] =
2170         s->block_last_index[5] = 0;
2171         s->block[4][0] =
2172         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2173     }
2174
2175     // non c quantize code returns incorrect block_last_index FIXME
2176     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2177         for (i = 0; i < mb_block_count; i++) {
2178             int j;
2179             if (s->block_last_index[i] > 0) {
2180                 for (j = 63; j > 0; j--) {
2181                     if (s->block[i][s->intra_scantable.permutated[j]])
2182                         break;
2183                 }
2184                 s->block_last_index[i] = j;
2185             }
2186         }
2187     }
2188
2189     /* huffman encode */
2190     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2191     case AV_CODEC_ID_MPEG1VIDEO:
2192     case AV_CODEC_ID_MPEG2VIDEO:
2193         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2194             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2195         break;
2196     case AV_CODEC_ID_MPEG4:
2197         if (CONFIG_MPEG4_ENCODER)
2198             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2199         break;
2200     case AV_CODEC_ID_MSMPEG4V2:
2201     case AV_CODEC_ID_MSMPEG4V3:
2202     case AV_CODEC_ID_WMV1:
2203         if (CONFIG_MSMPEG4_ENCODER)
2204             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2205         break;
2206     case AV_CODEC_ID_WMV2:
2207         if (CONFIG_WMV2_ENCODER)
2208             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2209         break;
2210     case AV_CODEC_ID_H261:
2211         if (CONFIG_H261_ENCODER)
2212             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2213         break;
2214     case AV_CODEC_ID_H263:
2215     case AV_CODEC_ID_H263P:
2216     case AV_CODEC_ID_FLV1:
2217     case AV_CODEC_ID_RV10:
2218     case AV_CODEC_ID_RV20:
2219         if (CONFIG_H263_ENCODER)
2220             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2221         break;
2222     case AV_CODEC_ID_MJPEG:
2223         if (CONFIG_MJPEG_ENCODER)
2224             ff_mjpeg_encode_mb(s, s->block);
2225         break;
2226     default:
2227         assert(0);
2228     }
2229 }
2230
2231 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2232 {
2233     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2234     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2235 }
2236
2237 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2238     int i;
2239
2240     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2241
2242     /* mpeg1 */
2243     d->mb_skip_run= s->mb_skip_run;
2244     for(i=0; i<3; i++)
2245         d->last_dc[i] = s->last_dc[i];
2246
2247     /* statistics */
2248     d->mv_bits= s->mv_bits;
2249     d->i_tex_bits= s->i_tex_bits;
2250     d->p_tex_bits= s->p_tex_bits;
2251     d->i_count= s->i_count;
2252     d->f_count= s->f_count;
2253     d->b_count= s->b_count;
2254     d->skip_count= s->skip_count;
2255     d->misc_bits= s->misc_bits;
2256     d->last_bits= 0;
2257
2258     d->mb_skipped= 0;
2259     d->qscale= s->qscale;
2260     d->dquant= s->dquant;
2261
2262     d->esc3_level_length= s->esc3_level_length;
2263 }
2264
2265 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2266     int i;
2267
2268     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2269     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2270
2271     /* mpeg1 */
2272     d->mb_skip_run= s->mb_skip_run;
2273     for(i=0; i<3; i++)
2274         d->last_dc[i] = s->last_dc[i];
2275
2276     /* statistics */
2277     d->mv_bits= s->mv_bits;
2278     d->i_tex_bits= s->i_tex_bits;
2279     d->p_tex_bits= s->p_tex_bits;
2280     d->i_count= s->i_count;
2281     d->f_count= s->f_count;
2282     d->b_count= s->b_count;
2283     d->skip_count= s->skip_count;
2284     d->misc_bits= s->misc_bits;
2285
2286     d->mb_intra= s->mb_intra;
2287     d->mb_skipped= s->mb_skipped;
2288     d->mv_type= s->mv_type;
2289     d->mv_dir= s->mv_dir;
2290     d->pb= s->pb;
2291     if(s->data_partitioning){
2292         d->pb2= s->pb2;
2293         d->tex_pb= s->tex_pb;
2294     }
2295     d->block= s->block;
2296     for(i=0; i<8; i++)
2297         d->block_last_index[i]= s->block_last_index[i];
2298     d->interlaced_dct= s->interlaced_dct;
2299     d->qscale= s->qscale;
2300
2301     d->esc3_level_length= s->esc3_level_length;
2302 }
2303
2304 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2305                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2306                            int *dmin, int *next_block, int motion_x, int motion_y)
2307 {
2308     int score;
2309     uint8_t *dest_backup[3];
2310
2311     copy_context_before_encode(s, backup, type);
2312
2313     s->block= s->blocks[*next_block];
2314     s->pb= pb[*next_block];
2315     if(s->data_partitioning){
2316         s->pb2   = pb2   [*next_block];
2317         s->tex_pb= tex_pb[*next_block];
2318     }
2319
2320     if(*next_block){
2321         memcpy(dest_backup, s->dest, sizeof(s->dest));
2322         s->dest[0] = s->rd_scratchpad;
2323         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2324         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2325         assert(s->linesize >= 32); //FIXME
2326     }
2327
2328     encode_mb(s, motion_x, motion_y);
2329
2330     score= put_bits_count(&s->pb);
2331     if(s->data_partitioning){
2332         score+= put_bits_count(&s->pb2);
2333         score+= put_bits_count(&s->tex_pb);
2334     }
2335
2336     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2337         ff_mpv_decode_mb(s, s->block);
2338
2339         score *= s->lambda2;
2340         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2341     }
2342
2343     if(*next_block){
2344         memcpy(s->dest, dest_backup, sizeof(s->dest));
2345     }
2346
2347     if(score<*dmin){
2348         *dmin= score;
2349         *next_block^=1;
2350
2351         copy_context_after_encode(best, s, type);
2352     }
2353 }
2354
2355 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2356     uint32_t *sq = ff_square_tab + 256;
2357     int acc=0;
2358     int x,y;
2359
2360     if(w==16 && h==16)
2361         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2362     else if(w==8 && h==8)
2363         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2364
2365     for(y=0; y<h; y++){
2366         for(x=0; x<w; x++){
2367             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2368         }
2369     }
2370
2371     assert(acc>=0);
2372
2373     return acc;
2374 }
2375
2376 static int sse_mb(MpegEncContext *s){
2377     int w= 16;
2378     int h= 16;
2379
2380     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2381     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2382
2383     if(w==16 && h==16)
2384       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2385         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2386                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2387                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2388       }else{
2389         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2390                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2391                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2392       }
2393     else
2394         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2395                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2396                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2397 }
2398
2399 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2400     MpegEncContext *s= *(void**)arg;
2401
2402
2403     s->me.pre_pass=1;
2404     s->me.dia_size= s->avctx->pre_dia_size;
2405     s->first_slice_line=1;
2406     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2407         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2408             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2409         }
2410         s->first_slice_line=0;
2411     }
2412
2413     s->me.pre_pass=0;
2414
2415     return 0;
2416 }
2417
2418 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2419     MpegEncContext *s= *(void**)arg;
2420
2421     s->me.dia_size= s->avctx->dia_size;
2422     s->first_slice_line=1;
2423     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2424         s->mb_x=0; //for block init below
2425         ff_init_block_index(s);
2426         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2427             s->block_index[0]+=2;
2428             s->block_index[1]+=2;
2429             s->block_index[2]+=2;
2430             s->block_index[3]+=2;
2431
2432             /* compute motion vector & mb_type and store in context */
2433             if(s->pict_type==AV_PICTURE_TYPE_B)
2434                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2435             else
2436                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2437         }
2438         s->first_slice_line=0;
2439     }
2440     return 0;
2441 }
2442
2443 static int mb_var_thread(AVCodecContext *c, void *arg){
2444     MpegEncContext *s= *(void**)arg;
2445     int mb_x, mb_y;
2446
2447     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2448         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2449             int xx = mb_x * 16;
2450             int yy = mb_y * 16;
2451             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2452             int varc;
2453             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2454
2455             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2456                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2457
2458             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2459             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2460             s->me.mb_var_sum_temp    += varc;
2461         }
2462     }
2463     return 0;
2464 }
2465
2466 static void write_slice_end(MpegEncContext *s){
2467     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2468         if(s->partitioned_frame){
2469             ff_mpeg4_merge_partitions(s);
2470         }
2471
2472         ff_mpeg4_stuffing(&s->pb);
2473     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2474         ff_mjpeg_encode_stuffing(&s->pb);
2475     }
2476
2477     avpriv_align_put_bits(&s->pb);
2478     flush_put_bits(&s->pb);
2479
2480     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2481         s->misc_bits+= get_bits_diff(s);
2482 }
2483
2484 static void write_mb_info(MpegEncContext *s)
2485 {
2486     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2487     int offset = put_bits_count(&s->pb);
2488     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2489     int gobn = s->mb_y / s->gob_index;
2490     int pred_x, pred_y;
2491     if (CONFIG_H263_ENCODER)
2492         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2493     bytestream_put_le32(&ptr, offset);
2494     bytestream_put_byte(&ptr, s->qscale);
2495     bytestream_put_byte(&ptr, gobn);
2496     bytestream_put_le16(&ptr, mba);
2497     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2498     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2499     /* 4MV not implemented */
2500     bytestream_put_byte(&ptr, 0); /* hmv2 */
2501     bytestream_put_byte(&ptr, 0); /* vmv2 */
2502 }
2503
2504 static void update_mb_info(MpegEncContext *s, int startcode)
2505 {
2506     if (!s->mb_info)
2507         return;
2508     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2509         s->mb_info_size += 12;
2510         s->prev_mb_info = s->last_mb_info;
2511     }
2512     if (startcode) {
2513         s->prev_mb_info = put_bits_count(&s->pb)/8;
2514         /* This might have incremented mb_info_size above, and we return without
2515          * actually writing any info into that slot yet. But in that case,
2516          * this will be called again at the start of the after writing the
2517          * start code, actually writing the mb info. */
2518         return;
2519     }
2520
2521     s->last_mb_info = put_bits_count(&s->pb)/8;
2522     if (!s->mb_info_size)
2523         s->mb_info_size += 12;
2524     write_mb_info(s);
2525 }
2526
2527 static int encode_thread(AVCodecContext *c, void *arg){
2528     MpegEncContext *s= *(void**)arg;
2529     int mb_x, mb_y, pdif = 0;
2530     int chr_h= 16>>s->chroma_y_shift;
2531     int i, j;
2532     MpegEncContext best_s, backup_s;
2533     uint8_t bit_buf[2][MAX_MB_BYTES];
2534     uint8_t bit_buf2[2][MAX_MB_BYTES];
2535     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2536     PutBitContext pb[2], pb2[2], tex_pb[2];
2537
2538     for(i=0; i<2; i++){
2539         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2540         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2541         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2542     }
2543
2544     s->last_bits= put_bits_count(&s->pb);
2545     s->mv_bits=0;
2546     s->misc_bits=0;
2547     s->i_tex_bits=0;
2548     s->p_tex_bits=0;
2549     s->i_count=0;
2550     s->f_count=0;
2551     s->b_count=0;
2552     s->skip_count=0;
2553
2554     for(i=0; i<3; i++){
2555         /* init last dc values */
2556         /* note: quant matrix value (8) is implied here */
2557         s->last_dc[i] = 128 << s->intra_dc_precision;
2558
2559         s->current_picture.f->error[i] = 0;
2560     }
2561     s->mb_skip_run = 0;
2562     memset(s->last_mv, 0, sizeof(s->last_mv));
2563
2564     s->last_mv_dir = 0;
2565
2566     switch(s->codec_id){
2567     case AV_CODEC_ID_H263:
2568     case AV_CODEC_ID_H263P:
2569     case AV_CODEC_ID_FLV1:
2570         if (CONFIG_H263_ENCODER)
2571             s->gob_index = ff_h263_get_gob_height(s);
2572         break;
2573     case AV_CODEC_ID_MPEG4:
2574         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2575             ff_mpeg4_init_partitions(s);
2576         break;
2577     }
2578
2579     s->resync_mb_x=0;
2580     s->resync_mb_y=0;
2581     s->first_slice_line = 1;
2582     s->ptr_lastgob = s->pb.buf;
2583     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2584         s->mb_x=0;
2585         s->mb_y= mb_y;
2586
2587         ff_set_qscale(s, s->qscale);
2588         ff_init_block_index(s);
2589
2590         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2591             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2592             int mb_type= s->mb_type[xy];
2593 //            int d;
2594             int dmin= INT_MAX;
2595             int dir;
2596
2597             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2598                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2599                 return -1;
2600             }
2601             if(s->data_partitioning){
2602                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2603                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2604                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2605                     return -1;
2606                 }
2607             }
2608
2609             s->mb_x = mb_x;
2610             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2611             ff_update_block_index(s);
2612
2613             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2614                 ff_h261_reorder_mb_index(s);
2615                 xy= s->mb_y*s->mb_stride + s->mb_x;
2616                 mb_type= s->mb_type[xy];
2617             }
2618
2619             /* write gob / video packet header  */
2620             if(s->rtp_mode){
2621                 int current_packet_size, is_gob_start;
2622
2623                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2624
2625                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2626
2627                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2628
2629                 switch(s->codec_id){
2630                 case AV_CODEC_ID_H263:
2631                 case AV_CODEC_ID_H263P:
2632                     if(!s->h263_slice_structured)
2633                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2634                     break;
2635                 case AV_CODEC_ID_MPEG2VIDEO:
2636                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2637                 case AV_CODEC_ID_MPEG1VIDEO:
2638                     if(s->mb_skip_run) is_gob_start=0;
2639                     break;
2640                 }
2641
2642                 if(is_gob_start){
2643                     if(s->start_mb_y != mb_y || mb_x!=0){
2644                         write_slice_end(s);
2645
2646                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2647                             ff_mpeg4_init_partitions(s);
2648                         }
2649                     }
2650
2651                     assert((put_bits_count(&s->pb)&7) == 0);
2652                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2653
2654                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2655                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2656                         int d = 100 / s->error_rate;
2657                         if(r % d == 0){
2658                             current_packet_size=0;
2659                             s->pb.buf_ptr= s->ptr_lastgob;
2660                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2661                         }
2662                     }
2663
2664                     if (s->avctx->rtp_callback){
2665                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2666                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2667                     }
2668                     update_mb_info(s, 1);
2669
2670                     switch(s->codec_id){
2671                     case AV_CODEC_ID_MPEG4:
2672                         if (CONFIG_MPEG4_ENCODER) {
2673                             ff_mpeg4_encode_video_packet_header(s);
2674                             ff_mpeg4_clean_buffers(s);
2675                         }
2676                     break;
2677                     case AV_CODEC_ID_MPEG1VIDEO:
2678                     case AV_CODEC_ID_MPEG2VIDEO:
2679                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2680                             ff_mpeg1_encode_slice_header(s);
2681                             ff_mpeg1_clean_buffers(s);
2682                         }
2683                     break;
2684                     case AV_CODEC_ID_H263:
2685                     case AV_CODEC_ID_H263P:
2686                         if (CONFIG_H263_ENCODER)
2687                             ff_h263_encode_gob_header(s, mb_y);
2688                     break;
2689                     }
2690
2691                     if(s->flags&CODEC_FLAG_PASS1){
2692                         int bits= put_bits_count(&s->pb);
2693                         s->misc_bits+= bits - s->last_bits;
2694                         s->last_bits= bits;
2695                     }
2696
2697                     s->ptr_lastgob += current_packet_size;
2698                     s->first_slice_line=1;
2699                     s->resync_mb_x=mb_x;
2700                     s->resync_mb_y=mb_y;
2701                 }
2702             }
2703
2704             if(  (s->resync_mb_x   == s->mb_x)
2705                && s->resync_mb_y+1 == s->mb_y){
2706                 s->first_slice_line=0;
2707             }
2708
2709             s->mb_skipped=0;
2710             s->dquant=0; //only for QP_RD
2711
2712             update_mb_info(s, 0);
2713
2714             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2715                 int next_block=0;
2716                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2717
2718                 copy_context_before_encode(&backup_s, s, -1);
2719                 backup_s.pb= s->pb;
2720                 best_s.data_partitioning= s->data_partitioning;
2721                 best_s.partitioned_frame= s->partitioned_frame;
2722                 if(s->data_partitioning){
2723                     backup_s.pb2= s->pb2;
2724                     backup_s.tex_pb= s->tex_pb;
2725                 }
2726
2727                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2728                     s->mv_dir = MV_DIR_FORWARD;
2729                     s->mv_type = MV_TYPE_16X16;
2730                     s->mb_intra= 0;
2731                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2732                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2733                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2734                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2735                 }
2736                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2737                     s->mv_dir = MV_DIR_FORWARD;
2738                     s->mv_type = MV_TYPE_FIELD;
2739                     s->mb_intra= 0;
2740                     for(i=0; i<2; i++){
2741                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2742                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2743                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2744                     }
2745                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2746                                  &dmin, &next_block, 0, 0);
2747                 }
2748                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2749                     s->mv_dir = MV_DIR_FORWARD;
2750                     s->mv_type = MV_TYPE_16X16;
2751                     s->mb_intra= 0;
2752                     s->mv[0][0][0] = 0;
2753                     s->mv[0][0][1] = 0;
2754                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2755                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2756                 }
2757                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2758                     s->mv_dir = MV_DIR_FORWARD;
2759                     s->mv_type = MV_TYPE_8X8;
2760                     s->mb_intra= 0;
2761                     for(i=0; i<4; i++){
2762                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2763                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2764                     }
2765                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2766                                  &dmin, &next_block, 0, 0);
2767                 }
2768                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2769                     s->mv_dir = MV_DIR_FORWARD;
2770                     s->mv_type = MV_TYPE_16X16;
2771                     s->mb_intra= 0;
2772                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2773                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2774                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2775                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2776                 }
2777                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2778                     s->mv_dir = MV_DIR_BACKWARD;
2779                     s->mv_type = MV_TYPE_16X16;
2780                     s->mb_intra= 0;
2781                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2782                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2783                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2784                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2785                 }
2786                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2787                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2788                     s->mv_type = MV_TYPE_16X16;
2789                     s->mb_intra= 0;
2790                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2791                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2792                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2793                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2794                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2795                                  &dmin, &next_block, 0, 0);
2796                 }
2797                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2798                     s->mv_dir = MV_DIR_FORWARD;
2799                     s->mv_type = MV_TYPE_FIELD;
2800                     s->mb_intra= 0;
2801                     for(i=0; i<2; i++){
2802                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2803                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2804                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2805                     }
2806                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2807                                  &dmin, &next_block, 0, 0);
2808                 }
2809                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2810                     s->mv_dir = MV_DIR_BACKWARD;
2811                     s->mv_type = MV_TYPE_FIELD;
2812                     s->mb_intra= 0;
2813                     for(i=0; i<2; i++){
2814                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2815                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2816                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2817                     }
2818                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2819                                  &dmin, &next_block, 0, 0);
2820                 }
2821                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2822                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2823                     s->mv_type = MV_TYPE_FIELD;
2824                     s->mb_intra= 0;
2825                     for(dir=0; dir<2; dir++){
2826                         for(i=0; i<2; i++){
2827                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2828                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2829                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2830                         }
2831                     }
2832                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2833                                  &dmin, &next_block, 0, 0);
2834                 }
2835                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2836                     s->mv_dir = 0;
2837                     s->mv_type = MV_TYPE_16X16;
2838                     s->mb_intra= 1;
2839                     s->mv[0][0][0] = 0;
2840                     s->mv[0][0][1] = 0;
2841                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2842                                  &dmin, &next_block, 0, 0);
2843                     if(s->h263_pred || s->h263_aic){
2844                         if(best_s.mb_intra)
2845                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2846                         else
2847                             ff_clean_intra_table_entries(s); //old mode?
2848                     }
2849                 }
2850
2851                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2852                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2853                         const int last_qp= backup_s.qscale;
2854                         int qpi, qp, dc[6];
2855                         int16_t ac[6][16];
2856                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2857                         static const int dquant_tab[4]={-1,1,-2,2};
2858
2859                         assert(backup_s.dquant == 0);
2860
2861                         //FIXME intra
2862                         s->mv_dir= best_s.mv_dir;
2863                         s->mv_type = MV_TYPE_16X16;
2864                         s->mb_intra= best_s.mb_intra;
2865                         s->mv[0][0][0] = best_s.mv[0][0][0];
2866                         s->mv[0][0][1] = best_s.mv[0][0][1];
2867                         s->mv[1][0][0] = best_s.mv[1][0][0];
2868                         s->mv[1][0][1] = best_s.mv[1][0][1];
2869
2870                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2871                         for(; qpi<4; qpi++){
2872                             int dquant= dquant_tab[qpi];
2873                             qp= last_qp + dquant;
2874                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2875                                 continue;
2876                             backup_s.dquant= dquant;
2877                             if(s->mb_intra && s->dc_val[0]){
2878                                 for(i=0; i<6; i++){
2879                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2880                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2881                                 }
2882                             }
2883
2884                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2885                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2886                             if(best_s.qscale != qp){
2887                                 if(s->mb_intra && s->dc_val[0]){
2888                                     for(i=0; i<6; i++){
2889                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2890                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2891                                     }
2892                                 }
2893                             }
2894                         }
2895                     }
2896                 }
2897                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2898                     int mx= s->b_direct_mv_table[xy][0];
2899                     int my= s->b_direct_mv_table[xy][1];
2900
2901                     backup_s.dquant = 0;
2902                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2903                     s->mb_intra= 0;
2904                     ff_mpeg4_set_direct_mv(s, mx, my);
2905                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2906                                  &dmin, &next_block, mx, my);
2907                 }
2908                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2909                     backup_s.dquant = 0;
2910                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2911                     s->mb_intra= 0;
2912                     ff_mpeg4_set_direct_mv(s, 0, 0);
2913                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2914                                  &dmin, &next_block, 0, 0);
2915                 }
2916                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2917                     int coded=0;
2918                     for(i=0; i<6; i++)
2919                         coded |= s->block_last_index[i];
2920                     if(coded){
2921                         int mx,my;
2922                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2923                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2924                             mx=my=0; //FIXME find the one we actually used
2925                             ff_mpeg4_set_direct_mv(s, mx, my);
2926                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2927                             mx= s->mv[1][0][0];
2928                             my= s->mv[1][0][1];
2929                         }else{
2930                             mx= s->mv[0][0][0];
2931                             my= s->mv[0][0][1];
2932                         }
2933
2934                         s->mv_dir= best_s.mv_dir;
2935                         s->mv_type = best_s.mv_type;
2936                         s->mb_intra= 0;
2937 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2938                         s->mv[0][0][1] = best_s.mv[0][0][1];
2939                         s->mv[1][0][0] = best_s.mv[1][0][0];
2940                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2941                         backup_s.dquant= 0;
2942                         s->skipdct=1;
2943                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2944                                         &dmin, &next_block, mx, my);
2945                         s->skipdct=0;
2946                     }
2947                 }
2948
2949                 s->current_picture.qscale_table[xy] = best_s.qscale;
2950
2951                 copy_context_after_encode(s, &best_s, -1);
2952
2953                 pb_bits_count= put_bits_count(&s->pb);
2954                 flush_put_bits(&s->pb);
2955                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2956                 s->pb= backup_s.pb;
2957
2958                 if(s->data_partitioning){
2959                     pb2_bits_count= put_bits_count(&s->pb2);
2960                     flush_put_bits(&s->pb2);
2961                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2962                     s->pb2= backup_s.pb2;
2963
2964                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2965                     flush_put_bits(&s->tex_pb);
2966                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2967                     s->tex_pb= backup_s.tex_pb;
2968                 }
2969                 s->last_bits= put_bits_count(&s->pb);
2970
2971                 if (CONFIG_H263_ENCODER &&
2972                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2973                     ff_h263_update_motion_val(s);
2974
2975                 if(next_block==0){ //FIXME 16 vs linesize16
2976                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2977                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2978                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2979                 }
2980
2981                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2982                     ff_mpv_decode_mb(s, s->block);
2983             } else {
2984                 int motion_x = 0, motion_y = 0;
2985                 s->mv_type=MV_TYPE_16X16;
2986                 // only one MB-Type possible
2987
2988                 switch(mb_type){
2989                 case CANDIDATE_MB_TYPE_INTRA:
2990                     s->mv_dir = 0;
2991                     s->mb_intra= 1;
2992                     motion_x= s->mv[0][0][0] = 0;
2993                     motion_y= s->mv[0][0][1] = 0;
2994                     break;
2995                 case CANDIDATE_MB_TYPE_INTER:
2996                     s->mv_dir = MV_DIR_FORWARD;
2997                     s->mb_intra= 0;
2998                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2999                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3000                     break;
3001                 case CANDIDATE_MB_TYPE_INTER_I:
3002                     s->mv_dir = MV_DIR_FORWARD;
3003                     s->mv_type = MV_TYPE_FIELD;
3004                     s->mb_intra= 0;
3005                     for(i=0; i<2; i++){
3006                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3007                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3008                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3009                     }
3010                     break;
3011                 case CANDIDATE_MB_TYPE_INTER4V:
3012                     s->mv_dir = MV_DIR_FORWARD;
3013                     s->mv_type = MV_TYPE_8X8;
3014                     s->mb_intra= 0;
3015                     for(i=0; i<4; i++){
3016                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3017                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3018                     }
3019                     break;
3020                 case CANDIDATE_MB_TYPE_DIRECT:
3021                     if (CONFIG_MPEG4_ENCODER) {
3022                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3023                         s->mb_intra= 0;
3024                         motion_x=s->b_direct_mv_table[xy][0];
3025                         motion_y=s->b_direct_mv_table[xy][1];
3026                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3027                     }
3028                     break;
3029                 case CANDIDATE_MB_TYPE_DIRECT0:
3030                     if (CONFIG_MPEG4_ENCODER) {
3031                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3032                         s->mb_intra= 0;
3033                         ff_mpeg4_set_direct_mv(s, 0, 0);
3034                     }
3035                     break;
3036                 case CANDIDATE_MB_TYPE_BIDIR:
3037                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3038                     s->mb_intra= 0;
3039                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3040                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3041                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3042                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3043                     break;
3044                 case CANDIDATE_MB_TYPE_BACKWARD:
3045                     s->mv_dir = MV_DIR_BACKWARD;
3046                     s->mb_intra= 0;
3047                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3048                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3049                     break;
3050                 case CANDIDATE_MB_TYPE_FORWARD:
3051                     s->mv_dir = MV_DIR_FORWARD;
3052                     s->mb_intra= 0;
3053                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3054                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3055                     break;
3056                 case CANDIDATE_MB_TYPE_FORWARD_I:
3057                     s->mv_dir = MV_DIR_FORWARD;
3058                     s->mv_type = MV_TYPE_FIELD;
3059                     s->mb_intra= 0;
3060                     for(i=0; i<2; i++){
3061                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3062                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3063                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3064                     }
3065                     break;
3066                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3067                     s->mv_dir = MV_DIR_BACKWARD;
3068                     s->mv_type = MV_TYPE_FIELD;
3069                     s->mb_intra= 0;
3070                     for(i=0; i<2; i++){
3071                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3072                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3073                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3074                     }
3075                     break;
3076                 case CANDIDATE_MB_TYPE_BIDIR_I:
3077                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3078                     s->mv_type = MV_TYPE_FIELD;
3079                     s->mb_intra= 0;
3080                     for(dir=0; dir<2; dir++){
3081                         for(i=0; i<2; i++){
3082                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3083                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3084                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3085                         }
3086                     }
3087                     break;
3088                 default:
3089                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3090                 }
3091
3092                 encode_mb(s, motion_x, motion_y);
3093
3094                 // RAL: Update last macroblock type
3095                 s->last_mv_dir = s->mv_dir;
3096
3097                 if (CONFIG_H263_ENCODER &&
3098                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3099                     ff_h263_update_motion_val(s);
3100
3101                 ff_mpv_decode_mb(s, s->block);
3102             }
3103
3104             /* clean the MV table in IPS frames for direct mode in B frames */
3105             if(s->mb_intra /* && I,P,S_TYPE */){
3106                 s->p_mv_table[xy][0]=0;
3107                 s->p_mv_table[xy][1]=0;
3108             }
3109
3110             if(s->flags&CODEC_FLAG_PSNR){
3111                 int w= 16;
3112                 int h= 16;
3113
3114                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3115                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3116
3117                 s->current_picture.f->error[0] += sse(
3118                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3119                     s->dest[0], w, h, s->linesize);
3120                 s->current_picture.f->error[1] += sse(
3121                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3122                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3123                 s->current_picture.f->error[2] += sse(
3124                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3125                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3126             }
3127             if(s->loop_filter){
3128                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3129                     ff_h263_loop_filter(s);
3130             }
3131             av_dlog(s->avctx, "MB %d %d bits\n",
3132                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3133         }
3134     }
3135
3136     //not beautiful here but we must write it before flushing so it has to be here
3137     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3138         ff_msmpeg4_encode_ext_header(s);
3139
3140     write_slice_end(s);
3141
3142     /* Send the last GOB if RTP */
3143     if (s->avctx->rtp_callback) {
3144         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3145         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3146         /* Call the RTP callback to send the last GOB */
3147         emms_c();
3148         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3149     }
3150
3151     return 0;
3152 }
3153
3154 #define MERGE(field) dst->field += src->field; src->field=0
3155 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3156     MERGE(me.scene_change_score);
3157     MERGE(me.mc_mb_var_sum_temp);
3158     MERGE(me.mb_var_sum_temp);
3159 }
3160
3161 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3162     int i;
3163
3164     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3165     MERGE(dct_count[1]);
3166     MERGE(mv_bits);
3167     MERGE(i_tex_bits);
3168     MERGE(p_tex_bits);
3169     MERGE(i_count);
3170     MERGE(f_count);
3171     MERGE(b_count);
3172     MERGE(skip_count);
3173     MERGE(misc_bits);
3174     MERGE(er.error_count);
3175     MERGE(padding_bug_score);
3176     MERGE(current_picture.f->error[0]);
3177     MERGE(current_picture.f->error[1]);
3178     MERGE(current_picture.f->error[2]);
3179
3180     if(dst->avctx->noise_reduction){
3181         for(i=0; i<64; i++){
3182             MERGE(dct_error_sum[0][i]);
3183             MERGE(dct_error_sum[1][i]);
3184         }
3185     }
3186
3187     assert(put_bits_count(&src->pb) % 8 ==0);
3188     assert(put_bits_count(&dst->pb) % 8 ==0);
3189     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3190     flush_put_bits(&dst->pb);
3191 }
3192
3193 static int estimate_qp(MpegEncContext *s, int dry_run){
3194     if (s->next_lambda){
3195         s->current_picture_ptr->f->quality =
3196         s->current_picture.f->quality = s->next_lambda;
3197         if(!dry_run) s->next_lambda= 0;
3198     } else if (!s->fixed_qscale) {
3199         s->current_picture_ptr->f->quality =
3200         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3201         if (s->current_picture.f->quality < 0)
3202             return -1;
3203     }
3204
3205     if(s->adaptive_quant){
3206         switch(s->codec_id){
3207         case AV_CODEC_ID_MPEG4:
3208             if (CONFIG_MPEG4_ENCODER)
3209                 ff_clean_mpeg4_qscales(s);
3210             break;
3211         case AV_CODEC_ID_H263:
3212         case AV_CODEC_ID_H263P:
3213         case AV_CODEC_ID_FLV1:
3214             if (CONFIG_H263_ENCODER)
3215                 ff_clean_h263_qscales(s);
3216             break;
3217         default:
3218             ff_init_qscale_tab(s);
3219         }
3220
3221         s->lambda= s->lambda_table[0];
3222         //FIXME broken
3223     }else
3224         s->lambda = s->current_picture.f->quality;
3225     update_qscale(s);
3226     return 0;
3227 }
3228
3229 /* must be called before writing the header */
3230 static void set_frame_distances(MpegEncContext * s){
3231     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3232     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3233
3234     if(s->pict_type==AV_PICTURE_TYPE_B){
3235         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3236         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3237     }else{
3238         s->pp_time= s->time - s->last_non_b_time;
3239         s->last_non_b_time= s->time;
3240         assert(s->picture_number==0 || s->pp_time > 0);
3241     }
3242 }
3243
3244 static int encode_picture(MpegEncContext *s, int picture_number)
3245 {
3246     int i, ret;
3247     int bits;
3248     int context_count = s->slice_context_count;
3249
3250     s->picture_number = picture_number;
3251
3252     /* Reset the average MB variance */
3253     s->me.mb_var_sum_temp    =
3254     s->me.mc_mb_var_sum_temp = 0;
3255
3256     /* we need to initialize some time vars before we can encode b-frames */
3257     // RAL: Condition added for MPEG1VIDEO
3258     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3259         set_frame_distances(s);
3260     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3261         ff_set_mpeg4_time(s);
3262
3263     s->me.scene_change_score=0;
3264
3265 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3266
3267     if(s->pict_type==AV_PICTURE_TYPE_I){
3268         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3269         else                        s->no_rounding=0;
3270     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3271         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3272             s->no_rounding ^= 1;
3273     }
3274
3275     if(s->flags & CODEC_FLAG_PASS2){
3276         if (estimate_qp(s,1) < 0)
3277             return -1;
3278         ff_get_2pass_fcode(s);
3279     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3280         if(s->pict_type==AV_PICTURE_TYPE_B)
3281             s->lambda= s->last_lambda_for[s->pict_type];
3282         else
3283             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3284         update_qscale(s);
3285     }
3286
3287     s->mb_intra=0; //for the rate distortion & bit compare functions
3288     for(i=1; i<context_count; i++){
3289         ret = ff_update_duplicate_context(s->thread_context[i], s);
3290         if (ret < 0)
3291             return ret;
3292     }
3293
3294     if(ff_init_me(s)<0)
3295         return -1;
3296
3297     /* Estimate motion for every MB */
3298     if(s->pict_type != AV_PICTURE_TYPE_I){
3299         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3300         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3301         if (s->pict_type != AV_PICTURE_TYPE_B) {
3302             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3303                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3304             }
3305         }
3306
3307         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3308     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3309         /* I-Frame */
3310         for(i=0; i<s->mb_stride*s->mb_height; i++)
3311             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3312
3313         if(!s->fixed_qscale){
3314             /* finding spatial complexity for I-frame rate control */
3315             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3316         }
3317     }
3318     for(i=1; i<context_count; i++){
3319         merge_context_after_me(s, s->thread_context[i]);
3320     }
3321     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3322     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3323     emms_c();
3324
3325     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3326         s->pict_type= AV_PICTURE_TYPE_I;
3327         for(i=0; i<s->mb_stride*s->mb_height; i++)
3328             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3329         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3330                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3331     }
3332
3333     if(!s->umvplus){
3334         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3335             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3336
3337             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3338                 int a,b;
3339                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3340                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3341                 s->f_code= FFMAX3(s->f_code, a, b);
3342             }
3343
3344             ff_fix_long_p_mvs(s);
3345             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3346             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3347                 int j;
3348                 for(i=0; i<2; i++){
3349                     for(j=0; j<2; j++)
3350                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3351                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3352                 }
3353             }
3354         }
3355
3356         if(s->pict_type==AV_PICTURE_TYPE_B){
3357             int a, b;
3358
3359             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3360             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3361             s->f_code = FFMAX(a, b);
3362
3363             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3364             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3365             s->b_code = FFMAX(a, b);
3366
3367             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3368             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3369             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3370             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3371             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3372                 int dir, j;
3373                 for(dir=0; dir<2; dir++){
3374                     for(i=0; i<2; i++){
3375                         for(j=0; j<2; j++){
3376                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3377                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3378                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3379                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3380                         }
3381                     }
3382                 }
3383             }
3384         }
3385     }
3386
3387     if (estimate_qp(s, 0) < 0)
3388         return -1;
3389
3390     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3391         s->qscale= 3; //reduce clipping problems
3392
3393     if (s->out_format == FMT_MJPEG) {
3394         /* for mjpeg, we do include qscale in the matrix */
3395         for(i=1;i<64;i++){
3396             int j = s->idsp.idct_permutation[i];
3397
3398             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3399         }
3400         s->y_dc_scale_table=
3401         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3402         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3403         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3404                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3405         s->qscale= 8;
3406     }
3407
3408     //FIXME var duplication
3409     s->current_picture_ptr->f->key_frame =
3410     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3411     s->current_picture_ptr->f->pict_type =
3412     s->current_picture.f->pict_type = s->pict_type;
3413
3414     if (s->current_picture.f->key_frame)
3415         s->picture_in_gop_number=0;
3416
3417     s->last_bits= put_bits_count(&s->pb);
3418     switch(s->out_format) {
3419     case FMT_MJPEG:
3420         if (CONFIG_MJPEG_ENCODER)
3421             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3422                                            s->intra_matrix);
3423         break;
3424     case FMT_H261:
3425         if (CONFIG_H261_ENCODER)
3426             ff_h261_encode_picture_header(s, picture_number);
3427         break;
3428     case FMT_H263:
3429         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3430             ff_wmv2_encode_picture_header(s, picture_number);
3431         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3432             ff_msmpeg4_encode_picture_header(s, picture_number);
3433         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3434             ff_mpeg4_encode_picture_header(s, picture_number);
3435         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3436             ff_rv10_encode_picture_header(s, picture_number);
3437         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3438             ff_rv20_encode_picture_header(s, picture_number);
3439         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3440             ff_flv_encode_picture_header(s, picture_number);
3441         else if (CONFIG_H263_ENCODER)
3442             ff_h263_encode_picture_header(s, picture_number);
3443         break;
3444     case FMT_MPEG1:
3445         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3446             ff_mpeg1_encode_picture_header(s, picture_number);
3447         break;
3448     default:
3449         assert(0);
3450     }
3451     bits= put_bits_count(&s->pb);
3452     s->header_bits= bits - s->last_bits;
3453
3454     for(i=1; i<context_count; i++){
3455         update_duplicate_context_after_me(s->thread_context[i], s);
3456     }
3457     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3458     for(i=1; i<context_count; i++){
3459         merge_context_after_encode(s, s->thread_context[i]);
3460     }
3461     emms_c();
3462     return 0;
3463 }
3464
3465 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3466     const int intra= s->mb_intra;
3467     int i;
3468
3469     s->dct_count[intra]++;
3470
3471     for(i=0; i<64; i++){
3472         int level= block[i];
3473
3474         if(level){
3475             if(level>0){
3476                 s->dct_error_sum[intra][i] += level;
3477                 level -= s->dct_offset[intra][i];
3478                 if(level<0) level=0;
3479             }else{
3480                 s->dct_error_sum[intra][i] -= level;
3481                 level += s->dct_offset[intra][i];
3482                 if(level>0) level=0;
3483             }
3484             block[i]= level;
3485         }
3486     }
3487 }
3488
3489 static int dct_quantize_trellis_c(MpegEncContext *s,
3490                                   int16_t *block, int n,
3491                                   int qscale, int *overflow){
3492     const int *qmat;
3493     const uint8_t *scantable= s->intra_scantable.scantable;
3494     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3495     int max=0;
3496     unsigned int threshold1, threshold2;
3497     int bias=0;
3498     int run_tab[65];
3499     int level_tab[65];
3500     int score_tab[65];
3501     int survivor[65];
3502     int survivor_count;
3503     int last_run=0;
3504     int last_level=0;
3505     int last_score= 0;
3506     int last_i;
3507     int coeff[2][64];
3508     int coeff_count[64];
3509     int qmul, qadd, start_i, last_non_zero, i, dc;
3510     const int esc_length= s->ac_esc_length;
3511     uint8_t * length;
3512     uint8_t * last_length;
3513     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3514
3515     s->fdsp.fdct(block);
3516
3517     if(s->dct_error_sum)
3518         s->denoise_dct(s, block);
3519     qmul= qscale*16;
3520     qadd= ((qscale-1)|1)*8;
3521
3522     if (s->mb_intra) {
3523         int q;
3524         if (!s->h263_aic) {
3525             if (n < 4)
3526                 q = s->y_dc_scale;
3527             else
3528                 q = s->c_dc_scale;
3529             q = q << 3;
3530         } else{
3531             /* For AIC we skip quant/dequant of INTRADC */
3532             q = 1 << 3;
3533             qadd=0;
3534         }
3535
3536         /* note: block[0] is assumed to be positive */
3537         block[0] = (block[0] + (q >> 1)) / q;
3538         start_i = 1;
3539         last_non_zero = 0;
3540         qmat = s->q_intra_matrix[qscale];
3541         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3542             bias= 1<<(QMAT_SHIFT-1);
3543         length     = s->intra_ac_vlc_length;
3544         last_length= s->intra_ac_vlc_last_length;
3545     } else {
3546         start_i = 0;
3547         last_non_zero = -1;
3548         qmat = s->q_inter_matrix[qscale];
3549         length     = s->inter_ac_vlc_length;
3550         last_length= s->inter_ac_vlc_last_length;
3551     }
3552     last_i= start_i;
3553
3554     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3555     threshold2= (threshold1<<1);
3556
3557     for(i=63; i>=start_i; i--) {
3558         const int j = scantable[i];
3559         int level = block[j] * qmat[j];
3560
3561         if(((unsigned)(level+threshold1))>threshold2){
3562             last_non_zero = i;
3563             break;
3564         }
3565     }
3566
3567     for(i=start_i; i<=last_non_zero; i++) {
3568         const int j = scantable[i];
3569         int level = block[j] * qmat[j];
3570
3571 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3572 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3573         if(((unsigned)(level+threshold1))>threshold2){
3574             if(level>0){
3575                 level= (bias + level)>>QMAT_SHIFT;
3576                 coeff[0][i]= level;
3577                 coeff[1][i]= level-1;
3578 //                coeff[2][k]= level-2;
3579             }else{
3580                 level= (bias - level)>>QMAT_SHIFT;
3581                 coeff[0][i]= -level;
3582                 coeff[1][i]= -level+1;
3583 //                coeff[2][k]= -level+2;
3584             }
3585             coeff_count[i]= FFMIN(level, 2);
3586             assert(coeff_count[i]);
3587             max |=level;
3588         }else{
3589             coeff[0][i]= (level>>31)|1;
3590             coeff_count[i]= 1;
3591         }
3592     }
3593
3594     *overflow= s->max_qcoeff < max; //overflow might have happened
3595
3596     if(last_non_zero < start_i){
3597         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3598         return last_non_zero;
3599     }
3600
3601     score_tab[start_i]= 0;
3602     survivor[0]= start_i;
3603     survivor_count= 1;
3604
3605     for(i=start_i; i<=last_non_zero; i++){
3606         int level_index, j, zero_distortion;
3607         int dct_coeff= FFABS(block[ scantable[i] ]);
3608         int best_score=256*256*256*120;
3609
3610         if (s->fdsp.fdct == ff_fdct_ifast)
3611             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3612         zero_distortion= dct_coeff*dct_coeff;
3613
3614         for(level_index=0; level_index < coeff_count[i]; level_index++){
3615             int distortion;
3616             int level= coeff[level_index][i];
3617             const int alevel= FFABS(level);
3618             int unquant_coeff;
3619
3620             assert(level);
3621
3622             if(s->out_format == FMT_H263){
3623                 unquant_coeff= alevel*qmul + qadd;
3624             }else{ //MPEG1
3625                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3626                 if(s->mb_intra){
3627                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3628                         unquant_coeff =   (unquant_coeff - 1) | 1;
3629                 }else{
3630                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3631                         unquant_coeff =   (unquant_coeff - 1) | 1;
3632                 }
3633                 unquant_coeff<<= 3;
3634             }
3635
3636             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3637             level+=64;
3638             if((level&(~127)) == 0){
3639                 for(j=survivor_count-1; j>=0; j--){
3640                     int run= i - survivor[j];
3641                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3642                     score += score_tab[i-run];
3643
3644                     if(score < best_score){
3645                         best_score= score;
3646                         run_tab[i+1]= run;
3647                         level_tab[i+1]= level-64;
3648                     }
3649                 }
3650
3651                 if(s->out_format == FMT_H263){
3652                     for(j=survivor_count-1; j>=0; j--){
3653                         int run= i - survivor[j];
3654                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3655                         score += score_tab[i-run];
3656                         if(score < last_score){
3657                             last_score= score;
3658                             last_run= run;
3659                             last_level= level-64;
3660                             last_i= i+1;
3661                         }
3662                     }
3663                 }
3664             }else{
3665                 distortion += esc_length*lambda;
3666                 for(j=survivor_count-1; j>=0; j--){
3667                     int run= i - survivor[j];
3668                     int score= distortion + score_tab[i-run];
3669
3670                     if(score < best_score){
3671                         best_score= score;
3672                         run_tab[i+1]= run;
3673                         level_tab[i+1]= level-64;
3674                     }
3675                 }
3676
3677                 if(s->out_format == FMT_H263){
3678                   for(j=survivor_count-1; j>=0; j--){
3679                         int run= i - survivor[j];
3680                         int score= distortion + score_tab[i-run];
3681                         if(score < last_score){
3682                             last_score= score;
3683                             last_run= run;
3684                             last_level= level-64;
3685                             last_i= i+1;
3686                         }
3687                     }
3688                 }
3689             }
3690         }
3691
3692         score_tab[i+1]= best_score;
3693
3694         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3695         if(last_non_zero <= 27){
3696             for(; survivor_count; survivor_count--){
3697                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3698                     break;
3699             }
3700         }else{
3701             for(; survivor_count; survivor_count--){
3702                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3703                     break;
3704             }
3705         }
3706
3707         survivor[ survivor_count++ ]= i+1;
3708     }
3709
3710     if(s->out_format != FMT_H263){
3711         last_score= 256*256*256*120;
3712         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3713             int score= score_tab[i];
3714             if(i) score += lambda*2; //FIXME exacter?
3715
3716             if(score < last_score){
3717                 last_score= score;
3718                 last_i= i;
3719                 last_level= level_tab[i];
3720                 last_run= run_tab[i];
3721             }
3722         }
3723     }
3724
3725     s->coded_score[n] = last_score;
3726
3727     dc= FFABS(block[0]);
3728     last_non_zero= last_i - 1;
3729     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3730
3731     if(last_non_zero < start_i)
3732         return last_non_zero;
3733
3734     if(last_non_zero == 0 && start_i == 0){
3735         int best_level= 0;
3736         int best_score= dc * dc;
3737
3738         for(i=0; i<coeff_count[0]; i++){
3739             int level= coeff[i][0];
3740             int alevel= FFABS(level);
3741             int unquant_coeff, score, distortion;
3742
3743             if(s->out_format == FMT_H263){
3744                     unquant_coeff= (alevel*qmul + qadd)>>3;
3745             }else{ //MPEG1
3746                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3747                     unquant_coeff =   (unquant_coeff - 1) | 1;
3748             }
3749             unquant_coeff = (unquant_coeff + 4) >> 3;
3750             unquant_coeff<<= 3 + 3;
3751
3752             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3753             level+=64;
3754             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3755             else                    score= distortion + esc_length*lambda;
3756
3757             if(score < best_score){
3758                 best_score= score;
3759                 best_level= level - 64;
3760             }
3761         }
3762         block[0]= best_level;
3763         s->coded_score[n] = best_score - dc*dc;
3764         if(best_level == 0) return -1;
3765         else                return last_non_zero;
3766     }
3767
3768     i= last_i;
3769     assert(last_level);
3770
3771     block[ perm_scantable[last_non_zero] ]= last_level;
3772     i -= last_run + 1;
3773
3774     for(; i>start_i; i -= run_tab[i] + 1){
3775         block[ perm_scantable[i-1] ]= level_tab[i];
3776     }
3777
3778     return last_non_zero;
3779 }
3780
3781 //#define REFINE_STATS 1
3782 static int16_t basis[64][64];
3783
3784 static void build_basis(uint8_t *perm){
3785     int i, j, x, y;
3786     emms_c();
3787     for(i=0; i<8; i++){
3788         for(j=0; j<8; j++){
3789             for(y=0; y<8; y++){
3790                 for(x=0; x<8; x++){
3791                     double s= 0.25*(1<<BASIS_SHIFT);
3792                     int index= 8*i + j;
3793                     int perm_index= perm[index];
3794                     if(i==0) s*= sqrt(0.5);
3795                     if(j==0) s*= sqrt(0.5);
3796                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3797                 }
3798             }
3799         }
3800     }
3801 }
3802
3803 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3804                         int16_t *block, int16_t *weight, int16_t *orig,
3805                         int n, int qscale){
3806     int16_t rem[64];
3807     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3808     const uint8_t *scantable= s->intra_scantable.scantable;
3809     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3810 //    unsigned int threshold1, threshold2;
3811 //    int bias=0;
3812     int run_tab[65];
3813     int prev_run=0;
3814     int prev_level=0;
3815     int qmul, qadd, start_i, last_non_zero, i, dc;
3816     uint8_t * length;
3817     uint8_t * last_length;
3818     int lambda;
3819     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3820 #ifdef REFINE_STATS
3821 static int count=0;
3822 static int after_last=0;
3823 static int to_zero=0;
3824 static int from_zero=0;
3825 static int raise=0;
3826 static int lower=0;
3827 static int messed_sign=0;
3828 #endif
3829
3830     if(basis[0][0] == 0)
3831         build_basis(s->idsp.idct_permutation);
3832
3833     qmul= qscale*2;
3834     qadd= (qscale-1)|1;
3835     if (s->mb_intra) {
3836         if (!s->h263_aic) {
3837             if (n < 4)
3838                 q = s->y_dc_scale;
3839             else
3840                 q = s->c_dc_scale;
3841         } else{
3842             /* For AIC we skip quant/dequant of INTRADC */
3843             q = 1;
3844             qadd=0;
3845         }
3846         q <<= RECON_SHIFT-3;
3847         /* note: block[0] is assumed to be positive */
3848         dc= block[0]*q;
3849 //        block[0] = (block[0] + (q >> 1)) / q;
3850         start_i = 1;
3851 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3852 //            bias= 1<<(QMAT_SHIFT-1);
3853         length     = s->intra_ac_vlc_length;
3854         last_length= s->intra_ac_vlc_last_length;
3855     } else {
3856         dc= 0;
3857         start_i = 0;
3858         length     = s->inter_ac_vlc_length;
3859         last_length= s->inter_ac_vlc_last_length;
3860     }
3861     last_non_zero = s->block_last_index[n];
3862
3863 #ifdef REFINE_STATS
3864 {START_TIMER
3865 #endif
3866     dc += (1<<(RECON_SHIFT-1));
3867     for(i=0; i<64; i++){
3868         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3869     }
3870 #ifdef REFINE_STATS
3871 STOP_TIMER("memset rem[]")}
3872 #endif
3873     sum=0;
3874     for(i=0; i<64; i++){
3875         int one= 36;
3876         int qns=4;
3877         int w;
3878
3879         w= FFABS(weight[i]) + qns*one;
3880         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3881
3882         weight[i] = w;
3883 //        w=weight[i] = (63*qns + (w/2)) / w;
3884
3885         assert(w>0);
3886         assert(w<(1<<6));
3887         sum += w*w;
3888     }
3889     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3890 #ifdef REFINE_STATS
3891 {START_TIMER
3892 #endif
3893     run=0;
3894     rle_index=0;
3895     for(i=start_i; i<=last_non_zero; i++){
3896         int j= perm_scantable[i];
3897         const int level= block[j];
3898         int coeff;
3899
3900         if(level){
3901             if(level<0) coeff= qmul*level - qadd;
3902             else        coeff= qmul*level + qadd;
3903             run_tab[rle_index++]=run;
3904             run=0;
3905
3906             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3907         }else{
3908             run++;
3909         }
3910     }
3911 #ifdef REFINE_STATS
3912 if(last_non_zero>0){
3913 STOP_TIMER("init rem[]")
3914 }
3915 }
3916
3917 {START_TIMER
3918 #endif
3919     for(;;){
3920         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3921         int best_coeff=0;
3922         int best_change=0;
3923         int run2, best_unquant_change=0, analyze_gradient;
3924 #ifdef REFINE_STATS
3925 {START_TIMER
3926 #endif
3927         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3928
3929         if(analyze_gradient){
3930 #ifdef REFINE_STATS
3931 {START_TIMER
3932 #endif
3933             for(i=0; i<64; i++){
3934                 int w= weight[i];
3935
3936                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3937             }
3938 #ifdef REFINE_STATS
3939 STOP_TIMER("rem*w*w")}
3940 {START_TIMER
3941 #endif
3942             s->fdsp.fdct(d1);
3943 #ifdef REFINE_STATS
3944 STOP_TIMER("dct")}
3945 #endif
3946         }
3947
3948         if(start_i){
3949             const int level= block[0];
3950             int change, old_coeff;
3951
3952             assert(s->mb_intra);
3953
3954             old_coeff= q*level;
3955
3956             for(change=-1; change<=1; change+=2){
3957                 int new_level= level + change;
3958                 int score, new_coeff;
3959
3960                 new_coeff= q*new_level;
3961                 if(new_coeff >= 2048 || new_coeff < 0)
3962                     continue;
3963
3964                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3965                                                   new_coeff - old_coeff);
3966                 if(score<best_score){
3967                     best_score= score;
3968                     best_coeff= 0;
3969                     best_change= change;
3970                     best_unquant_change= new_coeff - old_coeff;
3971                 }
3972             }
3973         }
3974
3975         run=0;
3976         rle_index=0;
3977         run2= run_tab[rle_index++];
3978         prev_level=0;
3979         prev_run=0;
3980
3981         for(i=start_i; i<64; i++){
3982             int j= perm_scantable[i];
3983             const int level= block[j];
3984             int change, old_coeff;
3985
3986             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3987                 break;
3988
3989             if(level){
3990                 if(level<0) old_coeff= qmul*level - qadd;
3991                 else        old_coeff= qmul*level + qadd;
3992                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3993             }else{
3994                 old_coeff=0;
3995                 run2--;
3996                 assert(run2>=0 || i >= last_non_zero );
3997             }
3998
3999             for(change=-1; change<=1; change+=2){
4000                 int new_level= level + change;
4001                 int score, new_coeff, unquant_change;
4002
4003                 score=0;
4004                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4005                    continue;
4006
4007                 if(new_level){
4008                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4009                     else            new_coeff= qmul*new_level + qadd;
4010                     if(new_coeff >= 2048 || new_coeff <= -2048)
4011                         continue;
4012                     //FIXME check for overflow
4013
4014                     if(level){
4015                         if(level < 63 && level > -63){
4016                             if(i < last_non_zero)
4017                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4018                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4019                             else
4020                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4021                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4022                         }
4023                     }else{
4024                         assert(FFABS(new_level)==1);
4025
4026                         if(analyze_gradient){
4027                             int g= d1[ scantable[i] ];
4028                             if(g && (g^new_level) >= 0)
4029                                 continue;
4030                         }
4031
4032                         if(i < last_non_zero){
4033                             int next_i= i + run2 + 1;
4034                             int next_level= block[ perm_scantable[next_i] ] + 64;
4035
4036                             if(next_level&(~127))
4037                                 next_level= 0;
4038
4039                             if(next_i < last_non_zero)
4040                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4041                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4042                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4043                             else
4044                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4045                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4046                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4047                         }else{
4048                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4049                             if(prev_level){
4050                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4051                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4052                             }
4053                         }
4054                     }
4055                 }else{
4056                     new_coeff=0;
4057                     assert(FFABS(level)==1);
4058
4059                     if(i < last_non_zero){
4060                         int next_i= i + run2 + 1;
4061                         int next_level= block[ perm_scantable[next_i] ] + 64;
4062
4063                         if(next_level&(~127))
4064                             next_level= 0;
4065
4066                         if(next_i < last_non_zero)
4067                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4068                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4069                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4070                         else
4071                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4072                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4073                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4074                     }else{
4075                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4076                         if(prev_level){
4077                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4078                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4079                         }
4080                     }
4081                 }
4082
4083                 score *= lambda;
4084
4085                 unquant_change= new_coeff - old_coeff;
4086                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4087
4088                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4089                                                    unquant_change);
4090                 if(score<best_score){
4091                     best_score= score;
4092                     best_coeff= i;
4093                     best_change= change;
4094                     best_unquant_change= unquant_change;
4095                 }
4096             }
4097             if(level){
4098                 prev_level= level + 64;
4099                 if(prev_level&(~127))
4100                     prev_level= 0;
4101                 prev_run= run;
4102                 run=0;
4103             }else{
4104                 run++;
4105             }
4106         }
4107 #ifdef REFINE_STATS
4108 STOP_TIMER("iterative step")}
4109 #endif
4110
4111         if(best_change){
4112             int j= perm_scantable[ best_coeff ];
4113
4114             block[j] += best_change;
4115
4116             if(best_coeff > last_non_zero){
4117                 last_non_zero= best_coeff;
4118                 assert(block[j]);
4119 #ifdef REFINE_STATS
4120 after_last++;
4121 #endif
4122             }else{
4123 #ifdef REFINE_STATS
4124 if(block[j]){
4125     if(block[j] - best_change){
4126         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4127             raise++;
4128         }else{
4129             lower++;
4130         }
4131     }else{
4132         from_zero++;
4133     }
4134 }else{
4135     to_zero++;
4136 }
4137 #endif
4138                 for(; last_non_zero>=start_i; last_non_zero--){
4139                     if(block[perm_scantable[last_non_zero]])
4140                         break;
4141                 }
4142             }
4143 #ifdef REFINE_STATS
4144 count++;
4145 if(256*256*256*64 % count == 0){
4146     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4147 }
4148 #endif
4149             run=0;
4150             rle_index=0;
4151             for(i=start_i; i<=last_non_zero; i++){
4152                 int j= perm_scantable[i];
4153                 const int level= block[j];
4154
4155                  if(level){
4156                      run_tab[rle_index++]=run;
4157                      run=0;
4158                  }else{
4159                      run++;
4160                  }
4161             }
4162
4163             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4164         }else{
4165             break;
4166         }
4167     }
4168 #ifdef REFINE_STATS
4169 if(last_non_zero>0){
4170 STOP_TIMER("iterative search")
4171 }
4172 }
4173 #endif
4174
4175     return last_non_zero;
4176 }
4177
4178 int ff_dct_quantize_c(MpegEncContext *s,
4179                         int16_t *block, int n,
4180                         int qscale, int *overflow)
4181 {
4182     int i, j, level, last_non_zero, q, start_i;
4183     const int *qmat;
4184     const uint8_t *scantable= s->intra_scantable.scantable;
4185     int bias;
4186     int max=0;
4187     unsigned int threshold1, threshold2;
4188
4189     s->fdsp.fdct(block);
4190
4191     if(s->dct_error_sum)
4192         s->denoise_dct(s, block);
4193
4194     if (s->mb_intra) {
4195         if (!s->h263_aic) {
4196             if (n < 4)
4197                 q = s->y_dc_scale;
4198             else
4199                 q = s->c_dc_scale;
4200             q = q << 3;
4201         } else
4202             /* For AIC we skip quant/dequant of INTRADC */
4203             q = 1 << 3;
4204
4205         /* note: block[0] is assumed to be positive */
4206         block[0] = (block[0] + (q >> 1)) / q;
4207         start_i = 1;
4208         last_non_zero = 0;
4209         qmat = s->q_intra_matrix[qscale];
4210         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4211     } else {
4212         start_i = 0;
4213         last_non_zero = -1;
4214         qmat = s->q_inter_matrix[qscale];
4215         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4216     }
4217     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4218     threshold2= (threshold1<<1);
4219     for(i=63;i>=start_i;i--) {
4220         j = scantable[i];
4221         level = block[j] * qmat[j];
4222
4223         if(((unsigned)(level+threshold1))>threshold2){
4224             last_non_zero = i;
4225             break;
4226         }else{
4227             block[j]=0;
4228         }
4229     }
4230     for(i=start_i; i<=last_non_zero; i++) {
4231         j = scantable[i];
4232         level = block[j] * qmat[j];
4233
4234 //        if(   bias+level >= (1<<QMAT_SHIFT)
4235 //           || bias-level >= (1<<QMAT_SHIFT)){
4236         if(((unsigned)(level+threshold1))>threshold2){
4237             if(level>0){
4238                 level= (bias + level)>>QMAT_SHIFT;
4239                 block[j]= level;
4240             }else{
4241                 level= (bias - level)>>QMAT_SHIFT;
4242                 block[j]= -level;
4243             }
4244             max |=level;
4245         }else{
4246             block[j]=0;
4247         }
4248     }
4249     *overflow= s->max_qcoeff < max; //overflow might have happened
4250
4251     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4252     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4253         ff_block_permute(block, s->idsp.idct_permutation,
4254                          scantable, last_non_zero);
4255
4256     return last_non_zero;
4257 }
4258
4259 #define OFFSET(x) offsetof(MpegEncContext, x)
4260 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4261 static const AVOption h263_options[] = {
4262     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4263     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4264     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4265     FF_MPV_COMMON_OPTS
4266     { NULL },
4267 };
4268
4269 static const AVClass h263_class = {
4270     .class_name = "H.263 encoder",
4271     .item_name  = av_default_item_name,
4272     .option     = h263_options,
4273     .version    = LIBAVUTIL_VERSION_INT,
4274 };
4275
4276 AVCodec ff_h263_encoder = {
4277     .name           = "h263",
4278     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4279     .type           = AVMEDIA_TYPE_VIDEO,
4280     .id             = AV_CODEC_ID_H263,
4281     .priv_data_size = sizeof(MpegEncContext),
4282     .init           = ff_mpv_encode_init,
4283     .encode2        = ff_mpv_encode_picture,
4284     .close          = ff_mpv_encode_end,
4285     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4286     .priv_class     = &h263_class,
4287 };
4288
4289 static const AVOption h263p_options[] = {
4290     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4291     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4292     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4293     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4294     FF_MPV_COMMON_OPTS
4295     { NULL },
4296 };
4297 static const AVClass h263p_class = {
4298     .class_name = "H.263p encoder",
4299     .item_name  = av_default_item_name,
4300     .option     = h263p_options,
4301     .version    = LIBAVUTIL_VERSION_INT,
4302 };
4303
4304 AVCodec ff_h263p_encoder = {
4305     .name           = "h263p",
4306     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4307     .type           = AVMEDIA_TYPE_VIDEO,
4308     .id             = AV_CODEC_ID_H263P,
4309     .priv_data_size = sizeof(MpegEncContext),
4310     .init           = ff_mpv_encode_init,
4311     .encode2        = ff_mpv_encode_picture,
4312     .close          = ff_mpv_encode_end,
4313     .capabilities   = CODEC_CAP_SLICE_THREADS,
4314     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4315     .priv_class     = &h263p_class,
4316 };
4317
4318 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4319
4320 AVCodec ff_msmpeg4v2_encoder = {
4321     .name           = "msmpeg4v2",
4322     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4323     .type           = AVMEDIA_TYPE_VIDEO,
4324     .id             = AV_CODEC_ID_MSMPEG4V2,
4325     .priv_data_size = sizeof(MpegEncContext),
4326     .init           = ff_mpv_encode_init,
4327     .encode2        = ff_mpv_encode_picture,
4328     .close          = ff_mpv_encode_end,
4329     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4330     .priv_class     = &msmpeg4v2_class,
4331 };
4332
4333 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4334
4335 AVCodec ff_msmpeg4v3_encoder = {
4336     .name           = "msmpeg4",
4337     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4338     .type           = AVMEDIA_TYPE_VIDEO,
4339     .id             = AV_CODEC_ID_MSMPEG4V3,
4340     .priv_data_size = sizeof(MpegEncContext),
4341     .init           = ff_mpv_encode_init,
4342     .encode2        = ff_mpv_encode_picture,
4343     .close          = ff_mpv_encode_end,
4344     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4345     .priv_class     = &msmpeg4v3_class,
4346 };
4347
4348 FF_MPV_GENERIC_CLASS(wmv1)
4349
4350 AVCodec ff_wmv1_encoder = {
4351     .name           = "wmv1",
4352     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4353     .type           = AVMEDIA_TYPE_VIDEO,
4354     .id             = AV_CODEC_ID_WMV1,
4355     .priv_data_size = sizeof(MpegEncContext),
4356     .init           = ff_mpv_encode_init,
4357     .encode2        = ff_mpv_encode_picture,
4358     .close          = ff_mpv_encode_end,
4359     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4360     .priv_class     = &wmv1_class,
4361 };