]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
lavc: deprecate unused mb_threshold field
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92 #if CONFIG_FAANDCT
93             fdsp->fdct == ff_faandct            ||
94 #endif /* CONFIG_FAANDCT */
95             fdsp->fdct == ff_jpeg_fdct_islow_10) {
96             for (i = 0; i < 64; i++) {
97                 const int j = s->idsp.idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
105                                         (qscale * quant_matrix[j]));
106             }
107         } else if (fdsp->fdct == ff_fdct_ifast) {
108             for (i = 0; i < 64; i++) {
109                 const int j = s->idsp.idct_permutation[i];
110                 /* 16 <= qscale * quant_matrix[i] <= 7905
111                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
112                  *             19952 <=              x  <= 249205026
113                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
114                  *           3444240 >= (1 << 36) / (x) >= 275 */
115
116                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
117                                         (ff_aanscales[i] * qscale *
118                                          quant_matrix[j]));
119             }
120         } else {
121             for (i = 0; i < 64; i++) {
122                 const int j = s->idsp.idct_permutation[i];
123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
124                  * Assume x = qscale * quant_matrix[i]
125                  * So             16 <=              x  <= 7905
126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
127                  * so          32768 >= (1 << 19) / (x) >= 67 */
128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
129                                         (qscale * quant_matrix[j]));
130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
131                 //                    (qscale * quant_matrix[i]);
132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
133                                        (qscale * quant_matrix[j]);
134
135                 if (qmat16[qscale][0][i] == 0 ||
136                     qmat16[qscale][0][i] == 128 * 256)
137                     qmat16[qscale][0][i] = 128 * 256 - 1;
138                 qmat16[qscale][1][i] =
139                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
140                                 qmat16[qscale][0][i]);
141             }
142         }
143
144         for (i = intra; i < 64; i++) {
145             int64_t max = 8191;
146             if (fdsp->fdct == ff_fdct_ifast) {
147                 max = (8191LL * ff_aanscales[i]) >> 14;
148             }
149             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
150                 shift++;
151             }
152         }
153     }
154     if (shift) {
155         av_log(NULL, AV_LOG_INFO,
156                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
157                QMAT_SHIFT - shift);
158     }
159 }
160
161 static inline void update_qscale(MpegEncContext *s)
162 {
163     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
164                 (FF_LAMBDA_SHIFT + 7);
165     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
166
167     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
168                  FF_LAMBDA_SHIFT;
169 }
170
171 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
172 {
173     int i;
174
175     if (matrix) {
176         put_bits(pb, 1, 1);
177         for (i = 0; i < 64; i++) {
178             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
179         }
180     } else
181         put_bits(pb, 1, 0);
182 }
183
184 /**
185  * init s->current_picture.qscale_table from s->lambda_table
186  */
187 void ff_init_qscale_tab(MpegEncContext *s)
188 {
189     int8_t * const qscale_table = s->current_picture.qscale_table;
190     int i;
191
192     for (i = 0; i < s->mb_num; i++) {
193         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
194         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
195         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
196                                                   s->avctx->qmax);
197     }
198 }
199
200 static void update_duplicate_context_after_me(MpegEncContext *dst,
201                                               MpegEncContext *src)
202 {
203 #define COPY(a) dst->a= src->a
204     COPY(pict_type);
205     COPY(current_picture);
206     COPY(f_code);
207     COPY(b_code);
208     COPY(qscale);
209     COPY(lambda);
210     COPY(lambda2);
211     COPY(picture_in_gop_number);
212     COPY(gop_picture_number);
213     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
214     COPY(progressive_frame);    // FIXME don't set in encode_header
215     COPY(partitioned_frame);    // FIXME don't set in encode_header
216 #undef COPY
217 }
218
219 /**
220  * Set the given MpegEncContext to defaults for encoding.
221  * the changed fields will not depend upon the prior state of the MpegEncContext.
222  */
223 static void mpv_encode_defaults(MpegEncContext *s)
224 {
225     int i;
226     ff_mpv_common_defaults(s);
227
228     for (i = -16; i < 16; i++) {
229         default_fcode_tab[i + MAX_MV] = 1;
230     }
231     s->me.mv_penalty = default_mv_penalty;
232     s->fcode_tab     = default_fcode_tab;
233
234     s->input_picture_number  = 0;
235     s->picture_in_gop_number = 0;
236 }
237
238 /* init video encoder */
239 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
240 {
241     MpegEncContext *s = avctx->priv_data;
242     int i, ret, format_supported;
243
244     mpv_encode_defaults(s);
245
246     switch (avctx->codec_id) {
247     case AV_CODEC_ID_MPEG2VIDEO:
248         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
249             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
250             av_log(avctx, AV_LOG_ERROR,
251                    "only YUV420 and YUV422 are supported\n");
252             return -1;
253         }
254         break;
255     case AV_CODEC_ID_MJPEG:
256         format_supported = 0;
257         /* JPEG color space */
258         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
259             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
260             (avctx->color_range == AVCOL_RANGE_JPEG &&
261              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
262               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
263             format_supported = 1;
264         /* MPEG color space */
265         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
266                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
267                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
268             format_supported = 1;
269
270         if (!format_supported) {
271             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
272             return -1;
273         }
274         break;
275     default:
276         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
277             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
278             return -1;
279         }
280     }
281
282     switch (avctx->pix_fmt) {
283     case AV_PIX_FMT_YUVJ422P:
284     case AV_PIX_FMT_YUV422P:
285         s->chroma_format = CHROMA_422;
286         break;
287     case AV_PIX_FMT_YUVJ420P:
288     case AV_PIX_FMT_YUV420P:
289     default:
290         s->chroma_format = CHROMA_420;
291         break;
292     }
293
294     s->bit_rate = avctx->bit_rate;
295     s->width    = avctx->width;
296     s->height   = avctx->height;
297     if (avctx->gop_size > 600 &&
298         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
299         av_log(avctx, AV_LOG_ERROR,
300                "Warning keyframe interval too large! reducing it ...\n");
301         avctx->gop_size = 600;
302     }
303     s->gop_size     = avctx->gop_size;
304     s->avctx        = avctx;
305     s->flags        = avctx->flags;
306     s->flags2       = avctx->flags2;
307     if (avctx->max_b_frames > MAX_B_FRAMES) {
308         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
309                "is %d.\n", MAX_B_FRAMES);
310     }
311     s->max_b_frames = avctx->max_b_frames;
312     s->codec_id     = avctx->codec->id;
313     s->strict_std_compliance = avctx->strict_std_compliance;
314     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
315     s->mpeg_quant         = avctx->mpeg_quant;
316     s->rtp_mode           = !!avctx->rtp_payload_size;
317     s->intra_dc_precision = avctx->intra_dc_precision;
318     s->user_specified_pts = AV_NOPTS_VALUE;
319
320     if (s->gop_size <= 1) {
321         s->intra_only = 1;
322         s->gop_size   = 12;
323     } else {
324         s->intra_only = 0;
325     }
326
327     s->me_method = avctx->me_method;
328
329     /* Fixed QSCALE */
330     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
331
332     s->adaptive_quant = (s->avctx->lumi_masking ||
333                          s->avctx->dark_masking ||
334                          s->avctx->temporal_cplx_masking ||
335                          s->avctx->spatial_cplx_masking  ||
336                          s->avctx->p_masking      ||
337                          s->avctx->border_masking ||
338                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
339                         !s->fixed_qscale;
340
341     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
342
343     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
344         av_log(avctx, AV_LOG_ERROR,
345                "a vbv buffer size is needed, "
346                "for encoding with a maximum bitrate\n");
347         return -1;
348     }
349
350     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
351         av_log(avctx, AV_LOG_INFO,
352                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
353     }
354
355     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
356         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
357         return -1;
358     }
359
360     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
361         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
362         return -1;
363     }
364
365     if (avctx->rc_max_rate &&
366         avctx->rc_max_rate == avctx->bit_rate &&
367         avctx->rc_max_rate != avctx->rc_min_rate) {
368         av_log(avctx, AV_LOG_INFO,
369                "impossible bitrate constraints, this will fail\n");
370     }
371
372     if (avctx->rc_buffer_size &&
373         avctx->bit_rate * (int64_t)avctx->time_base.num >
374             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
375         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
376         return -1;
377     }
378
379     if (!s->fixed_qscale &&
380         avctx->bit_rate * av_q2d(avctx->time_base) >
381             avctx->bit_rate_tolerance) {
382         av_log(avctx, AV_LOG_ERROR,
383                "bitrate tolerance too small for bitrate\n");
384         return -1;
385     }
386
387     if (s->avctx->rc_max_rate &&
388         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
389         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
390          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
391         90000LL * (avctx->rc_buffer_size - 1) >
392             s->avctx->rc_max_rate * 0xFFFFLL) {
393         av_log(avctx, AV_LOG_INFO,
394                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
395                "specified vbv buffer is too large for the given bitrate!\n");
396     }
397
398     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
399         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
400         s->codec_id != AV_CODEC_ID_FLV1) {
401         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
402         return -1;
403     }
404
405     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
406         av_log(avctx, AV_LOG_ERROR,
407                "OBMC is only supported with simple mb decision\n");
408         return -1;
409     }
410
411     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
412         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
413         return -1;
414     }
415
416     if (s->max_b_frames                    &&
417         s->codec_id != AV_CODEC_ID_MPEG4      &&
418         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
419         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
420         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
421         return -1;
422     }
423
424     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
425          s->codec_id == AV_CODEC_ID_H263  ||
426          s->codec_id == AV_CODEC_ID_H263P) &&
427         (avctx->sample_aspect_ratio.num > 255 ||
428          avctx->sample_aspect_ratio.den > 255)) {
429         av_log(avctx, AV_LOG_ERROR,
430                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
431                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
432         return -1;
433     }
434
435     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
436         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
437         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
438         return -1;
439     }
440
441     // FIXME mpeg2 uses that too
442     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
443         av_log(avctx, AV_LOG_ERROR,
444                "mpeg2 style quantization not supported by codec\n");
445         return -1;
446     }
447
448     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
449         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
450         return -1;
451     }
452
453     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
454         s->avctx->mb_decision != FF_MB_DECISION_RD) {
455         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
456         return -1;
457     }
458
459     if (s->avctx->scenechange_threshold < 1000000000 &&
460         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
461         av_log(avctx, AV_LOG_ERROR,
462                "closed gop with scene change detection are not supported yet, "
463                "set threshold to 1000000000\n");
464         return -1;
465     }
466
467     if (s->flags & CODEC_FLAG_LOW_DELAY) {
468         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
469             av_log(avctx, AV_LOG_ERROR,
470                   "low delay forcing is only available for mpeg2\n");
471             return -1;
472         }
473         if (s->max_b_frames != 0) {
474             av_log(avctx, AV_LOG_ERROR,
475                    "b frames cannot be used with low delay\n");
476             return -1;
477         }
478     }
479
480     if (s->q_scale_type == 1) {
481         if (avctx->qmax > 12) {
482             av_log(avctx, AV_LOG_ERROR,
483                    "non linear quant only supports qmax <= 12 currently\n");
484             return -1;
485         }
486     }
487
488     if (s->avctx->thread_count > 1         &&
489         s->codec_id != AV_CODEC_ID_MPEG4      &&
490         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
491         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
492         (s->codec_id != AV_CODEC_ID_H263P)) {
493         av_log(avctx, AV_LOG_ERROR,
494                "multi threaded encoding not supported by codec\n");
495         return -1;
496     }
497
498     if (s->avctx->thread_count < 1) {
499         av_log(avctx, AV_LOG_ERROR,
500                "automatic thread number detection not supported by codec,"
501                "patch welcome\n");
502         return -1;
503     }
504
505     if (s->avctx->thread_count > 1)
506         s->rtp_mode = 1;
507
508     if (!avctx->time_base.den || !avctx->time_base.num) {
509         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
510         return -1;
511     }
512
513     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
514         av_log(avctx, AV_LOG_INFO,
515                "notice: b_frame_strategy only affects the first pass\n");
516         avctx->b_frame_strategy = 0;
517     }
518
519     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
520     if (i > 1) {
521         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
522         avctx->time_base.den /= i;
523         avctx->time_base.num /= i;
524         //return -1;
525     }
526
527     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
528         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
529         // (a + x * 3 / 8) / x
530         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
531         s->inter_quant_bias = 0;
532     } else {
533         s->intra_quant_bias = 0;
534         // (a - x / 4) / x
535         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
536     }
537
538     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
539         s->intra_quant_bias = avctx->intra_quant_bias;
540     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
541         s->inter_quant_bias = avctx->inter_quant_bias;
542
543     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
544         s->avctx->time_base.den > (1 << 16) - 1) {
545         av_log(avctx, AV_LOG_ERROR,
546                "timebase %d/%d not supported by MPEG 4 standard, "
547                "the maximum admitted value for the timebase denominator "
548                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
549                (1 << 16) - 1);
550         return -1;
551     }
552     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
553
554     switch (avctx->codec->id) {
555     case AV_CODEC_ID_MPEG1VIDEO:
556         s->out_format = FMT_MPEG1;
557         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
558         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
559         break;
560     case AV_CODEC_ID_MPEG2VIDEO:
561         s->out_format = FMT_MPEG1;
562         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
563         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
564         s->rtp_mode   = 1;
565         break;
566     case AV_CODEC_ID_MJPEG:
567         s->out_format = FMT_MJPEG;
568         s->intra_only = 1; /* force intra only for jpeg */
569         if (!CONFIG_MJPEG_ENCODER ||
570             ff_mjpeg_encode_init(s) < 0)
571             return -1;
572         avctx->delay = 0;
573         s->low_delay = 1;
574         break;
575     case AV_CODEC_ID_H261:
576         if (!CONFIG_H261_ENCODER)
577             return -1;
578         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
579             av_log(avctx, AV_LOG_ERROR,
580                    "The specified picture size of %dx%d is not valid for the "
581                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
582                     s->width, s->height);
583             return -1;
584         }
585         s->out_format = FMT_H261;
586         avctx->delay  = 0;
587         s->low_delay  = 1;
588         break;
589     case AV_CODEC_ID_H263:
590         if (!CONFIG_H263_ENCODER)
591         return -1;
592         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
593                              s->width, s->height) == 8) {
594             av_log(avctx, AV_LOG_INFO,
595                    "The specified picture size of %dx%d is not valid for "
596                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
597                    "352x288, 704x576, and 1408x1152."
598                    "Try H.263+.\n", s->width, s->height);
599             return -1;
600         }
601         s->out_format = FMT_H263;
602         avctx->delay  = 0;
603         s->low_delay  = 1;
604         break;
605     case AV_CODEC_ID_H263P:
606         s->out_format = FMT_H263;
607         s->h263_plus  = 1;
608         /* Fx */
609         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
610         s->modified_quant  = s->h263_aic;
611         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
612         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
613
614         /* /Fx */
615         /* These are just to be sure */
616         avctx->delay = 0;
617         s->low_delay = 1;
618         break;
619     case AV_CODEC_ID_FLV1:
620         s->out_format      = FMT_H263;
621         s->h263_flv        = 2; /* format = 1; 11-bit codes */
622         s->unrestricted_mv = 1;
623         s->rtp_mode  = 0; /* don't allow GOB */
624         avctx->delay = 0;
625         s->low_delay = 1;
626         break;
627     case AV_CODEC_ID_RV10:
628         s->out_format = FMT_H263;
629         avctx->delay  = 0;
630         s->low_delay  = 1;
631         break;
632     case AV_CODEC_ID_RV20:
633         s->out_format      = FMT_H263;
634         avctx->delay       = 0;
635         s->low_delay       = 1;
636         s->modified_quant  = 1;
637         s->h263_aic        = 1;
638         s->h263_plus       = 1;
639         s->loop_filter     = 1;
640         s->unrestricted_mv = 0;
641         break;
642     case AV_CODEC_ID_MPEG4:
643         s->out_format      = FMT_H263;
644         s->h263_pred       = 1;
645         s->unrestricted_mv = 1;
646         s->low_delay       = s->max_b_frames ? 0 : 1;
647         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
648         break;
649     case AV_CODEC_ID_MSMPEG4V2:
650         s->out_format      = FMT_H263;
651         s->h263_pred       = 1;
652         s->unrestricted_mv = 1;
653         s->msmpeg4_version = 2;
654         avctx->delay       = 0;
655         s->low_delay       = 1;
656         break;
657     case AV_CODEC_ID_MSMPEG4V3:
658         s->out_format        = FMT_H263;
659         s->h263_pred         = 1;
660         s->unrestricted_mv   = 1;
661         s->msmpeg4_version   = 3;
662         s->flipflop_rounding = 1;
663         avctx->delay         = 0;
664         s->low_delay         = 1;
665         break;
666     case AV_CODEC_ID_WMV1:
667         s->out_format        = FMT_H263;
668         s->h263_pred         = 1;
669         s->unrestricted_mv   = 1;
670         s->msmpeg4_version   = 4;
671         s->flipflop_rounding = 1;
672         avctx->delay         = 0;
673         s->low_delay         = 1;
674         break;
675     case AV_CODEC_ID_WMV2:
676         s->out_format        = FMT_H263;
677         s->h263_pred         = 1;
678         s->unrestricted_mv   = 1;
679         s->msmpeg4_version   = 5;
680         s->flipflop_rounding = 1;
681         avctx->delay         = 0;
682         s->low_delay         = 1;
683         break;
684     default:
685         return -1;
686     }
687
688     avctx->has_b_frames = !s->low_delay;
689
690     s->encoding = 1;
691
692     s->progressive_frame    =
693     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
694                                                 CODEC_FLAG_INTERLACED_ME) ||
695                                 s->alternate_scan);
696
697     /* init */
698     ff_mpv_idct_init(s);
699     if (ff_mpv_common_init(s) < 0)
700         return -1;
701
702     if (ARCH_X86)
703         ff_mpv_encode_init_x86(s);
704
705     ff_fdctdsp_init(&s->fdsp, avctx);
706     ff_me_cmp_init(&s->mecc, avctx);
707     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
708     ff_pixblockdsp_init(&s->pdsp, avctx);
709     ff_qpeldsp_init(&s->qdsp);
710
711     s->avctx->coded_frame = s->current_picture.f;
712
713     if (s->msmpeg4_version) {
714         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
715                           2 * 2 * (MAX_LEVEL + 1) *
716                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
717     }
718     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
719
720     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
721     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
722     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
723     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
724     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
725                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
726     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
727                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
728
729     if (s->avctx->noise_reduction) {
730         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
731                           2 * 64 * sizeof(uint16_t), fail);
732     }
733
734     if (CONFIG_H263_ENCODER)
735         ff_h263dsp_init(&s->h263dsp);
736     if (!s->dct_quantize)
737         s->dct_quantize = ff_dct_quantize_c;
738     if (!s->denoise_dct)
739         s->denoise_dct  = denoise_dct_c;
740     s->fast_dct_quantize = s->dct_quantize;
741     if (avctx->trellis)
742         s->dct_quantize  = dct_quantize_trellis_c;
743
744     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
745         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
746
747     s->quant_precision = 5;
748
749     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
750     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
751
752     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
753         ff_h261_encode_init(s);
754     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
755         ff_h263_encode_init(s);
756     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
757         ff_msmpeg4_encode_init(s);
758     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
759         && s->out_format == FMT_MPEG1)
760         ff_mpeg1_encode_init(s);
761
762     /* init q matrix */
763     for (i = 0; i < 64; i++) {
764         int j = s->idsp.idct_permutation[i];
765         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
766             s->mpeg_quant) {
767             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
768             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
769         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
770             s->intra_matrix[j] =
771             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
772         } else {
773             /* mpeg1/2 */
774             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
775             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
776         }
777         if (s->avctx->intra_matrix)
778             s->intra_matrix[j] = s->avctx->intra_matrix[i];
779         if (s->avctx->inter_matrix)
780             s->inter_matrix[j] = s->avctx->inter_matrix[i];
781     }
782
783     /* precompute matrix */
784     /* for mjpeg, we do include qscale in the matrix */
785     if (s->out_format != FMT_MJPEG) {
786         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
787                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
788                           31, 1);
789         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
790                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
791                           31, 0);
792     }
793
794     if (ff_rate_control_init(s) < 0)
795         return -1;
796
797 #if FF_API_ERROR_RATE
798     FF_DISABLE_DEPRECATION_WARNINGS
799     if (avctx->error_rate)
800         s->error_rate = avctx->error_rate;
801     FF_ENABLE_DEPRECATION_WARNINGS;
802 #endif
803
804 #if FF_API_NORMALIZE_AQP
805     FF_DISABLE_DEPRECATION_WARNINGS
806     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
807         s->mpv_flags |= FF_MPV_FLAG_NAQ;
808     FF_ENABLE_DEPRECATION_WARNINGS;
809 #endif
810
811 #if FF_API_MV0
812     FF_DISABLE_DEPRECATION_WARNINGS
813     if (avctx->flags & CODEC_FLAG_MV0)
814         s->mpv_flags |= FF_MPV_FLAG_MV0;
815     FF_ENABLE_DEPRECATION_WARNINGS
816 #endif
817
818 #if FF_API_MPV_OPT
819     FF_DISABLE_DEPRECATION_WARNINGS
820     if (avctx->rc_qsquish != 0.0)
821         s->rc_qsquish = avctx->rc_qsquish;
822     if (avctx->rc_qmod_amp != 0.0)
823         s->rc_qmod_amp = avctx->rc_qmod_amp;
824     if (avctx->rc_qmod_freq)
825         s->rc_qmod_freq = avctx->rc_qmod_freq;
826     if (avctx->rc_buffer_aggressivity != 1.0)
827         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
828     if (avctx->rc_initial_cplx != 0.0)
829         s->rc_initial_cplx = avctx->rc_initial_cplx;
830
831     if (avctx->rc_eq) {
832         av_freep(&s->rc_eq);
833         s->rc_eq = av_strdup(avctx->rc_eq);
834         if (!s->rc_eq)
835             return AVERROR(ENOMEM);
836     }
837     FF_ENABLE_DEPRECATION_WARNINGS
838 #endif
839
840     if (avctx->b_frame_strategy == 2) {
841         for (i = 0; i < s->max_b_frames + 2; i++) {
842             s->tmp_frames[i] = av_frame_alloc();
843             if (!s->tmp_frames[i])
844                 return AVERROR(ENOMEM);
845
846             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
847             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
848             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
849
850             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
851             if (ret < 0)
852                 return ret;
853         }
854     }
855
856     return 0;
857 fail:
858     ff_mpv_encode_end(avctx);
859     return AVERROR_UNKNOWN;
860 }
861
862 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
863 {
864     MpegEncContext *s = avctx->priv_data;
865     int i;
866
867     ff_rate_control_uninit(s);
868
869     ff_mpv_common_end(s);
870     if (CONFIG_MJPEG_ENCODER &&
871         s->out_format == FMT_MJPEG)
872         ff_mjpeg_encode_close(s);
873
874     av_freep(&avctx->extradata);
875
876     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
877         av_frame_free(&s->tmp_frames[i]);
878
879     ff_free_picture_tables(&s->new_picture);
880     ff_mpeg_unref_picture(s, &s->new_picture);
881
882     av_freep(&s->avctx->stats_out);
883     av_freep(&s->ac_stats);
884
885     av_freep(&s->q_intra_matrix);
886     av_freep(&s->q_inter_matrix);
887     av_freep(&s->q_intra_matrix16);
888     av_freep(&s->q_inter_matrix16);
889     av_freep(&s->input_picture);
890     av_freep(&s->reordered_input_picture);
891     av_freep(&s->dct_offset);
892
893     return 0;
894 }
895
896 static int get_sae(uint8_t *src, int ref, int stride)
897 {
898     int x,y;
899     int acc = 0;
900
901     for (y = 0; y < 16; y++) {
902         for (x = 0; x < 16; x++) {
903             acc += FFABS(src[x + y * stride] - ref);
904         }
905     }
906
907     return acc;
908 }
909
910 static int get_intra_count(MpegEncContext *s, uint8_t *src,
911                            uint8_t *ref, int stride)
912 {
913     int x, y, w, h;
914     int acc = 0;
915
916     w = s->width  & ~15;
917     h = s->height & ~15;
918
919     for (y = 0; y < h; y += 16) {
920         for (x = 0; x < w; x += 16) {
921             int offset = x + y * stride;
922             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
923                                       stride, 16);
924             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
925             int sae  = get_sae(src + offset, mean, stride);
926
927             acc += sae + 500 < sad;
928         }
929     }
930     return acc;
931 }
932
933
934 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
935 {
936     Picture *pic = NULL;
937     int64_t pts;
938     int i, display_picture_number = 0, ret;
939     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
940                                                  (s->low_delay ? 0 : 1);
941     int direct = 1;
942
943     if (pic_arg) {
944         pts = pic_arg->pts;
945         display_picture_number = s->input_picture_number++;
946
947         if (pts != AV_NOPTS_VALUE) {
948             if (s->user_specified_pts != AV_NOPTS_VALUE) {
949                 int64_t time = pts;
950                 int64_t last = s->user_specified_pts;
951
952                 if (time <= last) {
953                     av_log(s->avctx, AV_LOG_ERROR,
954                            "Error, Invalid timestamp=%"PRId64", "
955                            "last=%"PRId64"\n", pts, s->user_specified_pts);
956                     return -1;
957                 }
958
959                 if (!s->low_delay && display_picture_number == 1)
960                     s->dts_delta = time - last;
961             }
962             s->user_specified_pts = pts;
963         } else {
964             if (s->user_specified_pts != AV_NOPTS_VALUE) {
965                 s->user_specified_pts =
966                 pts = s->user_specified_pts + 1;
967                 av_log(s->avctx, AV_LOG_INFO,
968                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
969                        pts);
970             } else {
971                 pts = display_picture_number;
972             }
973         }
974     }
975
976     if (pic_arg) {
977         if (!pic_arg->buf[0]);
978             direct = 0;
979         if (pic_arg->linesize[0] != s->linesize)
980             direct = 0;
981         if (pic_arg->linesize[1] != s->uvlinesize)
982             direct = 0;
983         if (pic_arg->linesize[2] != s->uvlinesize)
984             direct = 0;
985
986         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
987                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
988
989         if (direct) {
990             i = ff_find_unused_picture(s, 1);
991             if (i < 0)
992                 return i;
993
994             pic = &s->picture[i];
995             pic->reference = 3;
996
997             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
998                 return ret;
999             if (ff_alloc_picture(s, pic, 1) < 0) {
1000                 return -1;
1001             }
1002         } else {
1003             i = ff_find_unused_picture(s, 0);
1004             if (i < 0)
1005                 return i;
1006
1007             pic = &s->picture[i];
1008             pic->reference = 3;
1009
1010             if (ff_alloc_picture(s, pic, 0) < 0) {
1011                 return -1;
1012             }
1013
1014             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1015                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1016                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1017                 // empty
1018             } else {
1019                 int h_chroma_shift, v_chroma_shift;
1020                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1021                                                  &h_chroma_shift,
1022                                                  &v_chroma_shift);
1023
1024                 for (i = 0; i < 3; i++) {
1025                     int src_stride = pic_arg->linesize[i];
1026                     int dst_stride = i ? s->uvlinesize : s->linesize;
1027                     int h_shift = i ? h_chroma_shift : 0;
1028                     int v_shift = i ? v_chroma_shift : 0;
1029                     int w = s->width  >> h_shift;
1030                     int h = s->height >> v_shift;
1031                     uint8_t *src = pic_arg->data[i];
1032                     uint8_t *dst = pic->f->data[i];
1033
1034                     if (!s->avctx->rc_buffer_size)
1035                         dst += INPLACE_OFFSET;
1036
1037                     if (src_stride == dst_stride)
1038                         memcpy(dst, src, src_stride * h);
1039                     else {
1040                         while (h--) {
1041                             memcpy(dst, src, w);
1042                             dst += dst_stride;
1043                             src += src_stride;
1044                         }
1045                     }
1046                 }
1047             }
1048         }
1049         ret = av_frame_copy_props(pic->f, pic_arg);
1050         if (ret < 0)
1051             return ret;
1052
1053         pic->f->display_picture_number = display_picture_number;
1054         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1055     }
1056
1057     /* shift buffer entries */
1058     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1059         s->input_picture[i - 1] = s->input_picture[i];
1060
1061     s->input_picture[encoding_delay] = (Picture*) pic;
1062
1063     return 0;
1064 }
1065
1066 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1067 {
1068     int x, y, plane;
1069     int score = 0;
1070     int64_t score64 = 0;
1071
1072     for (plane = 0; plane < 3; plane++) {
1073         const int stride = p->f->linesize[plane];
1074         const int bw = plane ? 1 : 2;
1075         for (y = 0; y < s->mb_height * bw; y++) {
1076             for (x = 0; x < s->mb_width * bw; x++) {
1077                 int off = p->shared ? 0 : 16;
1078                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1079                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1080                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1081
1082                 switch (s->avctx->frame_skip_exp) {
1083                 case 0: score    =  FFMAX(score, v);          break;
1084                 case 1: score   += FFABS(v);                  break;
1085                 case 2: score   += v * v;                     break;
1086                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1087                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1088                 }
1089             }
1090         }
1091     }
1092
1093     if (score)
1094         score64 = score;
1095
1096     if (score64 < s->avctx->frame_skip_threshold)
1097         return 1;
1098     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1099         return 1;
1100     return 0;
1101 }
1102
1103 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1104 {
1105     AVPacket pkt = { 0 };
1106     int ret, got_output;
1107
1108     av_init_packet(&pkt);
1109     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1110     if (ret < 0)
1111         return ret;
1112
1113     ret = pkt.size;
1114     av_free_packet(&pkt);
1115     return ret;
1116 }
1117
1118 static int estimate_best_b_count(MpegEncContext *s)
1119 {
1120     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1121     AVCodecContext *c = avcodec_alloc_context3(NULL);
1122     const int scale = s->avctx->brd_scale;
1123     int i, j, out_size, p_lambda, b_lambda, lambda2;
1124     int64_t best_rd  = INT64_MAX;
1125     int best_b_count = -1;
1126
1127     assert(scale >= 0 && scale <= 3);
1128
1129     //emms_c();
1130     //s->next_picture_ptr->quality;
1131     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1132     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1133     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1134     if (!b_lambda) // FIXME we should do this somewhere else
1135         b_lambda = p_lambda;
1136     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1137                FF_LAMBDA_SHIFT;
1138
1139     c->width        = s->width  >> scale;
1140     c->height       = s->height >> scale;
1141     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1142     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1143     c->mb_decision  = s->avctx->mb_decision;
1144     c->me_cmp       = s->avctx->me_cmp;
1145     c->mb_cmp       = s->avctx->mb_cmp;
1146     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1147     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1148     c->time_base    = s->avctx->time_base;
1149     c->max_b_frames = s->max_b_frames;
1150
1151     if (avcodec_open2(c, codec, NULL) < 0)
1152         return -1;
1153
1154     for (i = 0; i < s->max_b_frames + 2; i++) {
1155         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1156                                                 s->next_picture_ptr;
1157
1158         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1159             pre_input = *pre_input_ptr;
1160
1161             if (!pre_input.shared && i) {
1162                 pre_input.f->data[0] += INPLACE_OFFSET;
1163                 pre_input.f->data[1] += INPLACE_OFFSET;
1164                 pre_input.f->data[2] += INPLACE_OFFSET;
1165             }
1166
1167             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1168                                        s->tmp_frames[i]->linesize[0],
1169                                        pre_input.f->data[0],
1170                                        pre_input.f->linesize[0],
1171                                        c->width, c->height);
1172             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1173                                        s->tmp_frames[i]->linesize[1],
1174                                        pre_input.f->data[1],
1175                                        pre_input.f->linesize[1],
1176                                        c->width >> 1, c->height >> 1);
1177             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1178                                        s->tmp_frames[i]->linesize[2],
1179                                        pre_input.f->data[2],
1180                                        pre_input.f->linesize[2],
1181                                        c->width >> 1, c->height >> 1);
1182         }
1183     }
1184
1185     for (j = 0; j < s->max_b_frames + 1; j++) {
1186         int64_t rd = 0;
1187
1188         if (!s->input_picture[j])
1189             break;
1190
1191         c->error[0] = c->error[1] = c->error[2] = 0;
1192
1193         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1194         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1195
1196         out_size = encode_frame(c, s->tmp_frames[0]);
1197
1198         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1199
1200         for (i = 0; i < s->max_b_frames + 1; i++) {
1201             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1202
1203             s->tmp_frames[i + 1]->pict_type = is_p ?
1204                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1205             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1206
1207             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1208
1209             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1210         }
1211
1212         /* get the delayed frames */
1213         while (out_size) {
1214             out_size = encode_frame(c, NULL);
1215             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1216         }
1217
1218         rd += c->error[0] + c->error[1] + c->error[2];
1219
1220         if (rd < best_rd) {
1221             best_rd = rd;
1222             best_b_count = j;
1223         }
1224     }
1225
1226     avcodec_close(c);
1227     av_freep(&c);
1228
1229     return best_b_count;
1230 }
1231
1232 static int select_input_picture(MpegEncContext *s)
1233 {
1234     int i, ret;
1235
1236     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1237         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1238     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1239
1240     /* set next picture type & ordering */
1241     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1242         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1243             !s->next_picture_ptr || s->intra_only) {
1244             s->reordered_input_picture[0] = s->input_picture[0];
1245             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1246             s->reordered_input_picture[0]->f->coded_picture_number =
1247                 s->coded_picture_number++;
1248         } else {
1249             int b_frames;
1250
1251             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1252                 if (s->picture_in_gop_number < s->gop_size &&
1253                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1254                     // FIXME check that te gop check above is +-1 correct
1255                     av_frame_unref(s->input_picture[0]->f);
1256
1257                     emms_c();
1258                     ff_vbv_update(s, 0);
1259
1260                     goto no_output_pic;
1261                 }
1262             }
1263
1264             if (s->flags & CODEC_FLAG_PASS2) {
1265                 for (i = 0; i < s->max_b_frames + 1; i++) {
1266                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1267
1268                     if (pict_num >= s->rc_context.num_entries)
1269                         break;
1270                     if (!s->input_picture[i]) {
1271                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1272                         break;
1273                     }
1274
1275                     s->input_picture[i]->f->pict_type =
1276                         s->rc_context.entry[pict_num].new_pict_type;
1277                 }
1278             }
1279
1280             if (s->avctx->b_frame_strategy == 0) {
1281                 b_frames = s->max_b_frames;
1282                 while (b_frames && !s->input_picture[b_frames])
1283                     b_frames--;
1284             } else if (s->avctx->b_frame_strategy == 1) {
1285                 for (i = 1; i < s->max_b_frames + 1; i++) {
1286                     if (s->input_picture[i] &&
1287                         s->input_picture[i]->b_frame_score == 0) {
1288                         s->input_picture[i]->b_frame_score =
1289                             get_intra_count(s,
1290                                             s->input_picture[i    ]->f->data[0],
1291                                             s->input_picture[i - 1]->f->data[0],
1292                                             s->linesize) + 1;
1293                     }
1294                 }
1295                 for (i = 0; i < s->max_b_frames + 1; i++) {
1296                     if (!s->input_picture[i] ||
1297                         s->input_picture[i]->b_frame_score - 1 >
1298                             s->mb_num / s->avctx->b_sensitivity)
1299                         break;
1300                 }
1301
1302                 b_frames = FFMAX(0, i - 1);
1303
1304                 /* reset scores */
1305                 for (i = 0; i < b_frames + 1; i++) {
1306                     s->input_picture[i]->b_frame_score = 0;
1307                 }
1308             } else if (s->avctx->b_frame_strategy == 2) {
1309                 b_frames = estimate_best_b_count(s);
1310             } else {
1311                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1312                 b_frames = 0;
1313             }
1314
1315             emms_c();
1316
1317             for (i = b_frames - 1; i >= 0; i--) {
1318                 int type = s->input_picture[i]->f->pict_type;
1319                 if (type && type != AV_PICTURE_TYPE_B)
1320                     b_frames = i;
1321             }
1322             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1323                 b_frames == s->max_b_frames) {
1324                 av_log(s->avctx, AV_LOG_ERROR,
1325                        "warning, too many b frames in a row\n");
1326             }
1327
1328             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1329                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1330                     s->gop_size > s->picture_in_gop_number) {
1331                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1332                 } else {
1333                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1334                         b_frames = 0;
1335                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1336                 }
1337             }
1338
1339             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1340                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1341                 b_frames--;
1342
1343             s->reordered_input_picture[0] = s->input_picture[b_frames];
1344             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1345                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1346             s->reordered_input_picture[0]->f->coded_picture_number =
1347                 s->coded_picture_number++;
1348             for (i = 0; i < b_frames; i++) {
1349                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1350                 s->reordered_input_picture[i + 1]->f->pict_type =
1351                     AV_PICTURE_TYPE_B;
1352                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1353                     s->coded_picture_number++;
1354             }
1355         }
1356     }
1357 no_output_pic:
1358     if (s->reordered_input_picture[0]) {
1359         s->reordered_input_picture[0]->reference =
1360            s->reordered_input_picture[0]->f->pict_type !=
1361                AV_PICTURE_TYPE_B ? 3 : 0;
1362
1363         ff_mpeg_unref_picture(s, &s->new_picture);
1364         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1365             return ret;
1366
1367         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1368             // input is a shared pix, so we can't modifiy it -> alloc a new
1369             // one & ensure that the shared one is reuseable
1370
1371             Picture *pic;
1372             int i = ff_find_unused_picture(s, 0);
1373             if (i < 0)
1374                 return i;
1375             pic = &s->picture[i];
1376
1377             pic->reference = s->reordered_input_picture[0]->reference;
1378             if (ff_alloc_picture(s, pic, 0) < 0) {
1379                 return -1;
1380             }
1381
1382             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1383             if (ret < 0)
1384                 return ret;
1385
1386             /* mark us unused / free shared pic */
1387             av_frame_unref(s->reordered_input_picture[0]->f);
1388             s->reordered_input_picture[0]->shared = 0;
1389
1390             s->current_picture_ptr = pic;
1391         } else {
1392             // input is not a shared pix -> reuse buffer for current_pix
1393             s->current_picture_ptr = s->reordered_input_picture[0];
1394             for (i = 0; i < 4; i++) {
1395                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1396             }
1397         }
1398         ff_mpeg_unref_picture(s, &s->current_picture);
1399         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1400                                        s->current_picture_ptr)) < 0)
1401             return ret;
1402
1403         s->picture_number = s->new_picture.f->display_picture_number;
1404     } else {
1405         ff_mpeg_unref_picture(s, &s->new_picture);
1406     }
1407     return 0;
1408 }
1409
1410 static void frame_end(MpegEncContext *s)
1411 {
1412     int i;
1413
1414     if (s->unrestricted_mv &&
1415         s->current_picture.reference &&
1416         !s->intra_only) {
1417         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1418         int hshift = desc->log2_chroma_w;
1419         int vshift = desc->log2_chroma_h;
1420         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1421                                 s->h_edge_pos, s->v_edge_pos,
1422                                 EDGE_WIDTH, EDGE_WIDTH,
1423                                 EDGE_TOP | EDGE_BOTTOM);
1424         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1425                                 s->h_edge_pos >> hshift,
1426                                 s->v_edge_pos >> vshift,
1427                                 EDGE_WIDTH >> hshift,
1428                                 EDGE_WIDTH >> vshift,
1429                                 EDGE_TOP | EDGE_BOTTOM);
1430         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1431                                 s->h_edge_pos >> hshift,
1432                                 s->v_edge_pos >> vshift,
1433                                 EDGE_WIDTH >> hshift,
1434                                 EDGE_WIDTH >> vshift,
1435                                 EDGE_TOP | EDGE_BOTTOM);
1436     }
1437
1438     emms_c();
1439
1440     s->last_pict_type                 = s->pict_type;
1441     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1442     if (s->pict_type!= AV_PICTURE_TYPE_B)
1443         s->last_non_b_pict_type = s->pict_type;
1444
1445     if (s->encoding) {
1446         /* release non-reference frames */
1447         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1448             if (!s->picture[i].reference)
1449                 ff_mpeg_unref_picture(s, &s->picture[i]);
1450         }
1451     }
1452
1453     s->avctx->coded_frame = s->current_picture_ptr->f;
1454
1455 }
1456
1457 static void update_noise_reduction(MpegEncContext *s)
1458 {
1459     int intra, i;
1460
1461     for (intra = 0; intra < 2; intra++) {
1462         if (s->dct_count[intra] > (1 << 16)) {
1463             for (i = 0; i < 64; i++) {
1464                 s->dct_error_sum[intra][i] >>= 1;
1465             }
1466             s->dct_count[intra] >>= 1;
1467         }
1468
1469         for (i = 0; i < 64; i++) {
1470             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1471                                        s->dct_count[intra] +
1472                                        s->dct_error_sum[intra][i] / 2) /
1473                                       (s->dct_error_sum[intra][i] + 1);
1474         }
1475     }
1476 }
1477
1478 static int frame_start(MpegEncContext *s)
1479 {
1480     int ret;
1481
1482     /* mark & release old frames */
1483     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1484         s->last_picture_ptr != s->next_picture_ptr &&
1485         s->last_picture_ptr->f->buf[0]) {
1486         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1487     }
1488
1489     s->current_picture_ptr->f->pict_type = s->pict_type;
1490     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1491
1492     ff_mpeg_unref_picture(s, &s->current_picture);
1493     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1494                                    s->current_picture_ptr)) < 0)
1495         return ret;
1496
1497     if (s->pict_type != AV_PICTURE_TYPE_B) {
1498         s->last_picture_ptr = s->next_picture_ptr;
1499         if (!s->droppable)
1500             s->next_picture_ptr = s->current_picture_ptr;
1501     }
1502
1503     if (s->last_picture_ptr) {
1504         ff_mpeg_unref_picture(s, &s->last_picture);
1505         if (s->last_picture_ptr->f->buf[0] &&
1506             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1507                                        s->last_picture_ptr)) < 0)
1508             return ret;
1509     }
1510     if (s->next_picture_ptr) {
1511         ff_mpeg_unref_picture(s, &s->next_picture);
1512         if (s->next_picture_ptr->f->buf[0] &&
1513             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1514                                        s->next_picture_ptr)) < 0)
1515             return ret;
1516     }
1517
1518     if (s->picture_structure!= PICT_FRAME) {
1519         int i;
1520         for (i = 0; i < 4; i++) {
1521             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1522                 s->current_picture.f->data[i] +=
1523                     s->current_picture.f->linesize[i];
1524             }
1525             s->current_picture.f->linesize[i] *= 2;
1526             s->last_picture.f->linesize[i]    *= 2;
1527             s->next_picture.f->linesize[i]    *= 2;
1528         }
1529     }
1530
1531     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1532         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1533         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1534     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1535         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1536         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1537     } else {
1538         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1539         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1540     }
1541
1542     if (s->dct_error_sum) {
1543         assert(s->avctx->noise_reduction && s->encoding);
1544         update_noise_reduction(s);
1545     }
1546
1547     return 0;
1548 }
1549
1550 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1551                           const AVFrame *pic_arg, int *got_packet)
1552 {
1553     MpegEncContext *s = avctx->priv_data;
1554     int i, stuffing_count, ret;
1555     int context_count = s->slice_context_count;
1556
1557     s->picture_in_gop_number++;
1558
1559     if (load_input_picture(s, pic_arg) < 0)
1560         return -1;
1561
1562     if (select_input_picture(s) < 0) {
1563         return -1;
1564     }
1565
1566     /* output? */
1567     if (s->new_picture.f->data[0]) {
1568         if (!pkt->data &&
1569             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1570             return ret;
1571         if (s->mb_info) {
1572             s->mb_info_ptr = av_packet_new_side_data(pkt,
1573                                  AV_PKT_DATA_H263_MB_INFO,
1574                                  s->mb_width*s->mb_height*12);
1575             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1576         }
1577
1578         for (i = 0; i < context_count; i++) {
1579             int start_y = s->thread_context[i]->start_mb_y;
1580             int   end_y = s->thread_context[i]->  end_mb_y;
1581             int h       = s->mb_height;
1582             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1583             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1584
1585             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1586         }
1587
1588         s->pict_type = s->new_picture.f->pict_type;
1589         //emms_c();
1590         ret = frame_start(s);
1591         if (ret < 0)
1592             return ret;
1593 vbv_retry:
1594         if (encode_picture(s, s->picture_number) < 0)
1595             return -1;
1596
1597         avctx->header_bits = s->header_bits;
1598         avctx->mv_bits     = s->mv_bits;
1599         avctx->misc_bits   = s->misc_bits;
1600         avctx->i_tex_bits  = s->i_tex_bits;
1601         avctx->p_tex_bits  = s->p_tex_bits;
1602         avctx->i_count     = s->i_count;
1603         // FIXME f/b_count in avctx
1604         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1605         avctx->skip_count  = s->skip_count;
1606
1607         frame_end(s);
1608
1609         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1610             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1611
1612         if (avctx->rc_buffer_size) {
1613             RateControlContext *rcc = &s->rc_context;
1614             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1615
1616             if (put_bits_count(&s->pb) > max_size &&
1617                 s->lambda < s->avctx->lmax) {
1618                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1619                                        (s->qscale + 1) / s->qscale);
1620                 if (s->adaptive_quant) {
1621                     int i;
1622                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1623                         s->lambda_table[i] =
1624                             FFMAX(s->lambda_table[i] + 1,
1625                                   s->lambda_table[i] * (s->qscale + 1) /
1626                                   s->qscale);
1627                 }
1628                 s->mb_skipped = 0;        // done in frame_start()
1629                 // done in encode_picture() so we must undo it
1630                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1631                     if (s->flipflop_rounding          ||
1632                         s->codec_id == AV_CODEC_ID_H263P ||
1633                         s->codec_id == AV_CODEC_ID_MPEG4)
1634                         s->no_rounding ^= 1;
1635                 }
1636                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1637                     s->time_base       = s->last_time_base;
1638                     s->last_non_b_time = s->time - s->pp_time;
1639                 }
1640                 for (i = 0; i < context_count; i++) {
1641                     PutBitContext *pb = &s->thread_context[i]->pb;
1642                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1643                 }
1644                 goto vbv_retry;
1645             }
1646
1647             assert(s->avctx->rc_max_rate);
1648         }
1649
1650         if (s->flags & CODEC_FLAG_PASS1)
1651             ff_write_pass1_stats(s);
1652
1653         for (i = 0; i < 4; i++) {
1654             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1655             avctx->error[i] += s->current_picture_ptr->f->error[i];
1656         }
1657
1658         if (s->flags & CODEC_FLAG_PASS1)
1659             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1660                    avctx->i_tex_bits + avctx->p_tex_bits ==
1661                        put_bits_count(&s->pb));
1662         flush_put_bits(&s->pb);
1663         s->frame_bits  = put_bits_count(&s->pb);
1664
1665         stuffing_count = ff_vbv_update(s, s->frame_bits);
1666         if (stuffing_count) {
1667             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1668                     stuffing_count + 50) {
1669                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1670                 return -1;
1671             }
1672
1673             switch (s->codec_id) {
1674             case AV_CODEC_ID_MPEG1VIDEO:
1675             case AV_CODEC_ID_MPEG2VIDEO:
1676                 while (stuffing_count--) {
1677                     put_bits(&s->pb, 8, 0);
1678                 }
1679             break;
1680             case AV_CODEC_ID_MPEG4:
1681                 put_bits(&s->pb, 16, 0);
1682                 put_bits(&s->pb, 16, 0x1C3);
1683                 stuffing_count -= 4;
1684                 while (stuffing_count--) {
1685                     put_bits(&s->pb, 8, 0xFF);
1686                 }
1687             break;
1688             default:
1689                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1690             }
1691             flush_put_bits(&s->pb);
1692             s->frame_bits  = put_bits_count(&s->pb);
1693         }
1694
1695         /* update mpeg1/2 vbv_delay for CBR */
1696         if (s->avctx->rc_max_rate                          &&
1697             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1698             s->out_format == FMT_MPEG1                     &&
1699             90000LL * (avctx->rc_buffer_size - 1) <=
1700                 s->avctx->rc_max_rate * 0xFFFFLL) {
1701             int vbv_delay, min_delay;
1702             double inbits  = s->avctx->rc_max_rate *
1703                              av_q2d(s->avctx->time_base);
1704             int    minbits = s->frame_bits - 8 *
1705                              (s->vbv_delay_ptr - s->pb.buf - 1);
1706             double bits    = s->rc_context.buffer_index + minbits - inbits;
1707
1708             if (bits < 0)
1709                 av_log(s->avctx, AV_LOG_ERROR,
1710                        "Internal error, negative bits\n");
1711
1712             assert(s->repeat_first_field == 0);
1713
1714             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1715             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1716                         s->avctx->rc_max_rate;
1717
1718             vbv_delay = FFMAX(vbv_delay, min_delay);
1719
1720             assert(vbv_delay < 0xFFFF);
1721
1722             s->vbv_delay_ptr[0] &= 0xF8;
1723             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1724             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1725             s->vbv_delay_ptr[2] &= 0x07;
1726             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1727             avctx->vbv_delay     = vbv_delay * 300;
1728         }
1729         s->total_bits     += s->frame_bits;
1730         avctx->frame_bits  = s->frame_bits;
1731
1732         pkt->pts = s->current_picture.f->pts;
1733         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1734             if (!s->current_picture.f->coded_picture_number)
1735                 pkt->dts = pkt->pts - s->dts_delta;
1736             else
1737                 pkt->dts = s->reordered_pts;
1738             s->reordered_pts = pkt->pts;
1739         } else
1740             pkt->dts = pkt->pts;
1741         if (s->current_picture.f->key_frame)
1742             pkt->flags |= AV_PKT_FLAG_KEY;
1743         if (s->mb_info)
1744             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1745     } else {
1746         s->frame_bits = 0;
1747     }
1748     assert((s->frame_bits & 7) == 0);
1749
1750     pkt->size = s->frame_bits / 8;
1751     *got_packet = !!pkt->size;
1752     return 0;
1753 }
1754
1755 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1756                                                 int n, int threshold)
1757 {
1758     static const char tab[64] = {
1759         3, 2, 2, 1, 1, 1, 1, 1,
1760         1, 1, 1, 1, 1, 1, 1, 1,
1761         1, 1, 1, 1, 1, 1, 1, 1,
1762         0, 0, 0, 0, 0, 0, 0, 0,
1763         0, 0, 0, 0, 0, 0, 0, 0,
1764         0, 0, 0, 0, 0, 0, 0, 0,
1765         0, 0, 0, 0, 0, 0, 0, 0,
1766         0, 0, 0, 0, 0, 0, 0, 0
1767     };
1768     int score = 0;
1769     int run = 0;
1770     int i;
1771     int16_t *block = s->block[n];
1772     const int last_index = s->block_last_index[n];
1773     int skip_dc;
1774
1775     if (threshold < 0) {
1776         skip_dc = 0;
1777         threshold = -threshold;
1778     } else
1779         skip_dc = 1;
1780
1781     /* Are all we could set to zero already zero? */
1782     if (last_index <= skip_dc - 1)
1783         return;
1784
1785     for (i = 0; i <= last_index; i++) {
1786         const int j = s->intra_scantable.permutated[i];
1787         const int level = FFABS(block[j]);
1788         if (level == 1) {
1789             if (skip_dc && i == 0)
1790                 continue;
1791             score += tab[run];
1792             run = 0;
1793         } else if (level > 1) {
1794             return;
1795         } else {
1796             run++;
1797         }
1798     }
1799     if (score >= threshold)
1800         return;
1801     for (i = skip_dc; i <= last_index; i++) {
1802         const int j = s->intra_scantable.permutated[i];
1803         block[j] = 0;
1804     }
1805     if (block[0])
1806         s->block_last_index[n] = 0;
1807     else
1808         s->block_last_index[n] = -1;
1809 }
1810
1811 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1812                                int last_index)
1813 {
1814     int i;
1815     const int maxlevel = s->max_qcoeff;
1816     const int minlevel = s->min_qcoeff;
1817     int overflow = 0;
1818
1819     if (s->mb_intra) {
1820         i = 1; // skip clipping of intra dc
1821     } else
1822         i = 0;
1823
1824     for (; i <= last_index; i++) {
1825         const int j = s->intra_scantable.permutated[i];
1826         int level = block[j];
1827
1828         if (level > maxlevel) {
1829             level = maxlevel;
1830             overflow++;
1831         } else if (level < minlevel) {
1832             level = minlevel;
1833             overflow++;
1834         }
1835
1836         block[j] = level;
1837     }
1838
1839     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1840         av_log(s->avctx, AV_LOG_INFO,
1841                "warning, clipping %d dct coefficients to %d..%d\n",
1842                overflow, minlevel, maxlevel);
1843 }
1844
1845 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1846 {
1847     int x, y;
1848     // FIXME optimize
1849     for (y = 0; y < 8; y++) {
1850         for (x = 0; x < 8; x++) {
1851             int x2, y2;
1852             int sum = 0;
1853             int sqr = 0;
1854             int count = 0;
1855
1856             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1857                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1858                     int v = ptr[x2 + y2 * stride];
1859                     sum += v;
1860                     sqr += v * v;
1861                     count++;
1862                 }
1863             }
1864             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1865         }
1866     }
1867 }
1868
1869 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1870                                                 int motion_x, int motion_y,
1871                                                 int mb_block_height,
1872                                                 int mb_block_count)
1873 {
1874     int16_t weight[8][64];
1875     int16_t orig[8][64];
1876     const int mb_x = s->mb_x;
1877     const int mb_y = s->mb_y;
1878     int i;
1879     int skip_dct[8];
1880     int dct_offset = s->linesize * 8; // default for progressive frames
1881     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1882     ptrdiff_t wrap_y, wrap_c;
1883
1884     for (i = 0; i < mb_block_count; i++)
1885         skip_dct[i] = s->skipdct;
1886
1887     if (s->adaptive_quant) {
1888         const int last_qp = s->qscale;
1889         const int mb_xy = mb_x + mb_y * s->mb_stride;
1890
1891         s->lambda = s->lambda_table[mb_xy];
1892         update_qscale(s);
1893
1894         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1895             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1896             s->dquant = s->qscale - last_qp;
1897
1898             if (s->out_format == FMT_H263) {
1899                 s->dquant = av_clip(s->dquant, -2, 2);
1900
1901                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1902                     if (!s->mb_intra) {
1903                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1904                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1905                                 s->dquant = 0;
1906                         }
1907                         if (s->mv_type == MV_TYPE_8X8)
1908                             s->dquant = 0;
1909                     }
1910                 }
1911             }
1912         }
1913         ff_set_qscale(s, last_qp + s->dquant);
1914     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1915         ff_set_qscale(s, s->qscale + s->dquant);
1916
1917     wrap_y = s->linesize;
1918     wrap_c = s->uvlinesize;
1919     ptr_y  = s->new_picture.f->data[0] +
1920              (mb_y * 16 * wrap_y)              + mb_x * 16;
1921     ptr_cb = s->new_picture.f->data[1] +
1922              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1923     ptr_cr = s->new_picture.f->data[2] +
1924              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1925
1926     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1927         uint8_t *ebuf = s->edge_emu_buffer + 32;
1928         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1929                                  wrap_y, wrap_y,
1930                                  16, 16, mb_x * 16, mb_y * 16,
1931                                  s->width, s->height);
1932         ptr_y = ebuf;
1933         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1934                                  wrap_c, wrap_c,
1935                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1936                                  s->width >> 1, s->height >> 1);
1937         ptr_cb = ebuf + 18 * wrap_y;
1938         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1939                                  wrap_c, wrap_c,
1940                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1941                                  s->width >> 1, s->height >> 1);
1942         ptr_cr = ebuf + 18 * wrap_y + 8;
1943     }
1944
1945     if (s->mb_intra) {
1946         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1947             int progressive_score, interlaced_score;
1948
1949             s->interlaced_dct = 0;
1950             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1951                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1952                                                      NULL, wrap_y, 8) - 400;
1953
1954             if (progressive_score > 0) {
1955                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1956                                                         NULL, wrap_y * 2, 8) +
1957                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1958                                                         NULL, wrap_y * 2, 8);
1959                 if (progressive_score > interlaced_score) {
1960                     s->interlaced_dct = 1;
1961
1962                     dct_offset = wrap_y;
1963                     wrap_y <<= 1;
1964                     if (s->chroma_format == CHROMA_422)
1965                         wrap_c <<= 1;
1966                 }
1967             }
1968         }
1969
1970         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1971         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1972         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1973         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1974
1975         if (s->flags & CODEC_FLAG_GRAY) {
1976             skip_dct[4] = 1;
1977             skip_dct[5] = 1;
1978         } else {
1979             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1980             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1981             if (!s->chroma_y_shift) { /* 422 */
1982                 s->pdsp.get_pixels(s->block[6],
1983                                    ptr_cb + (dct_offset >> 1), wrap_c);
1984                 s->pdsp.get_pixels(s->block[7],
1985                                    ptr_cr + (dct_offset >> 1), wrap_c);
1986             }
1987         }
1988     } else {
1989         op_pixels_func (*op_pix)[4];
1990         qpel_mc_func (*op_qpix)[16];
1991         uint8_t *dest_y, *dest_cb, *dest_cr;
1992
1993         dest_y  = s->dest[0];
1994         dest_cb = s->dest[1];
1995         dest_cr = s->dest[2];
1996
1997         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1998             op_pix  = s->hdsp.put_pixels_tab;
1999             op_qpix = s->qdsp.put_qpel_pixels_tab;
2000         } else {
2001             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2002             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2003         }
2004
2005         if (s->mv_dir & MV_DIR_FORWARD) {
2006             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2007                           s->last_picture.f->data,
2008                           op_pix, op_qpix);
2009             op_pix  = s->hdsp.avg_pixels_tab;
2010             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2011         }
2012         if (s->mv_dir & MV_DIR_BACKWARD) {
2013             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2014                           s->next_picture.f->data,
2015                           op_pix, op_qpix);
2016         }
2017
2018         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2019             int progressive_score, interlaced_score;
2020
2021             s->interlaced_dct = 0;
2022             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2023                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2024                                                      ptr_y + wrap_y * 8,
2025                                                      wrap_y, 8) - 400;
2026
2027             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2028                 progressive_score -= 400;
2029
2030             if (progressive_score > 0) {
2031                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2032                                                         wrap_y * 2, 8) +
2033                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2034                                                         ptr_y + wrap_y,
2035                                                         wrap_y * 2, 8);
2036
2037                 if (progressive_score > interlaced_score) {
2038                     s->interlaced_dct = 1;
2039
2040                     dct_offset = wrap_y;
2041                     wrap_y <<= 1;
2042                     if (s->chroma_format == CHROMA_422)
2043                         wrap_c <<= 1;
2044                 }
2045             }
2046         }
2047
2048         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2049         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2050         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2051                             dest_y + dct_offset, wrap_y);
2052         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2053                             dest_y + dct_offset + 8, wrap_y);
2054
2055         if (s->flags & CODEC_FLAG_GRAY) {
2056             skip_dct[4] = 1;
2057             skip_dct[5] = 1;
2058         } else {
2059             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2060             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2061             if (!s->chroma_y_shift) { /* 422 */
2062                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2063                                     dest_cb + (dct_offset >> 1), wrap_c);
2064                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2065                                     dest_cr + (dct_offset >> 1), wrap_c);
2066             }
2067         }
2068         /* pre quantization */
2069         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2070                 2 * s->qscale * s->qscale) {
2071             // FIXME optimize
2072             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2073                 skip_dct[0] = 1;
2074             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2075                 skip_dct[1] = 1;
2076             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2077                                wrap_y, 8) < 20 * s->qscale)
2078                 skip_dct[2] = 1;
2079             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2080                                wrap_y, 8) < 20 * s->qscale)
2081                 skip_dct[3] = 1;
2082             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2083                 skip_dct[4] = 1;
2084             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2085                 skip_dct[5] = 1;
2086             if (!s->chroma_y_shift) { /* 422 */
2087                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2088                                    dest_cb + (dct_offset >> 1),
2089                                    wrap_c, 8) < 20 * s->qscale)
2090                     skip_dct[6] = 1;
2091                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2092                                    dest_cr + (dct_offset >> 1),
2093                                    wrap_c, 8) < 20 * s->qscale)
2094                     skip_dct[7] = 1;
2095             }
2096         }
2097     }
2098
2099     if (s->quantizer_noise_shaping) {
2100         if (!skip_dct[0])
2101             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2102         if (!skip_dct[1])
2103             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2104         if (!skip_dct[2])
2105             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2106         if (!skip_dct[3])
2107             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2108         if (!skip_dct[4])
2109             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2110         if (!skip_dct[5])
2111             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2112         if (!s->chroma_y_shift) { /* 422 */
2113             if (!skip_dct[6])
2114                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2115                                   wrap_c);
2116             if (!skip_dct[7])
2117                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2118                                   wrap_c);
2119         }
2120         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2121     }
2122
2123     /* DCT & quantize */
2124     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2125     {
2126         for (i = 0; i < mb_block_count; i++) {
2127             if (!skip_dct[i]) {
2128                 int overflow;
2129                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2130                 // FIXME we could decide to change to quantizer instead of
2131                 // clipping
2132                 // JS: I don't think that would be a good idea it could lower
2133                 //     quality instead of improve it. Just INTRADC clipping
2134                 //     deserves changes in quantizer
2135                 if (overflow)
2136                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2137             } else
2138                 s->block_last_index[i] = -1;
2139         }
2140         if (s->quantizer_noise_shaping) {
2141             for (i = 0; i < mb_block_count; i++) {
2142                 if (!skip_dct[i]) {
2143                     s->block_last_index[i] =
2144                         dct_quantize_refine(s, s->block[i], weight[i],
2145                                             orig[i], i, s->qscale);
2146                 }
2147             }
2148         }
2149
2150         if (s->luma_elim_threshold && !s->mb_intra)
2151             for (i = 0; i < 4; i++)
2152                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2153         if (s->chroma_elim_threshold && !s->mb_intra)
2154             for (i = 4; i < mb_block_count; i++)
2155                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2156
2157         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2158             for (i = 0; i < mb_block_count; i++) {
2159                 if (s->block_last_index[i] == -1)
2160                     s->coded_score[i] = INT_MAX / 256;
2161             }
2162         }
2163     }
2164
2165     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2166         s->block_last_index[4] =
2167         s->block_last_index[5] = 0;
2168         s->block[4][0] =
2169         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2170     }
2171
2172     // non c quantize code returns incorrect block_last_index FIXME
2173     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2174         for (i = 0; i < mb_block_count; i++) {
2175             int j;
2176             if (s->block_last_index[i] > 0) {
2177                 for (j = 63; j > 0; j--) {
2178                     if (s->block[i][s->intra_scantable.permutated[j]])
2179                         break;
2180                 }
2181                 s->block_last_index[i] = j;
2182             }
2183         }
2184     }
2185
2186     /* huffman encode */
2187     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2188     case AV_CODEC_ID_MPEG1VIDEO:
2189     case AV_CODEC_ID_MPEG2VIDEO:
2190         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2191             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2192         break;
2193     case AV_CODEC_ID_MPEG4:
2194         if (CONFIG_MPEG4_ENCODER)
2195             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2196         break;
2197     case AV_CODEC_ID_MSMPEG4V2:
2198     case AV_CODEC_ID_MSMPEG4V3:
2199     case AV_CODEC_ID_WMV1:
2200         if (CONFIG_MSMPEG4_ENCODER)
2201             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2202         break;
2203     case AV_CODEC_ID_WMV2:
2204         if (CONFIG_WMV2_ENCODER)
2205             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2206         break;
2207     case AV_CODEC_ID_H261:
2208         if (CONFIG_H261_ENCODER)
2209             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2210         break;
2211     case AV_CODEC_ID_H263:
2212     case AV_CODEC_ID_H263P:
2213     case AV_CODEC_ID_FLV1:
2214     case AV_CODEC_ID_RV10:
2215     case AV_CODEC_ID_RV20:
2216         if (CONFIG_H263_ENCODER)
2217             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2218         break;
2219     case AV_CODEC_ID_MJPEG:
2220         if (CONFIG_MJPEG_ENCODER)
2221             ff_mjpeg_encode_mb(s, s->block);
2222         break;
2223     default:
2224         assert(0);
2225     }
2226 }
2227
2228 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2229 {
2230     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2231     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2232 }
2233
2234 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2235     int i;
2236
2237     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2238
2239     /* mpeg1 */
2240     d->mb_skip_run= s->mb_skip_run;
2241     for(i=0; i<3; i++)
2242         d->last_dc[i] = s->last_dc[i];
2243
2244     /* statistics */
2245     d->mv_bits= s->mv_bits;
2246     d->i_tex_bits= s->i_tex_bits;
2247     d->p_tex_bits= s->p_tex_bits;
2248     d->i_count= s->i_count;
2249     d->f_count= s->f_count;
2250     d->b_count= s->b_count;
2251     d->skip_count= s->skip_count;
2252     d->misc_bits= s->misc_bits;
2253     d->last_bits= 0;
2254
2255     d->mb_skipped= 0;
2256     d->qscale= s->qscale;
2257     d->dquant= s->dquant;
2258
2259     d->esc3_level_length= s->esc3_level_length;
2260 }
2261
2262 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2263     int i;
2264
2265     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2266     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2267
2268     /* mpeg1 */
2269     d->mb_skip_run= s->mb_skip_run;
2270     for(i=0; i<3; i++)
2271         d->last_dc[i] = s->last_dc[i];
2272
2273     /* statistics */
2274     d->mv_bits= s->mv_bits;
2275     d->i_tex_bits= s->i_tex_bits;
2276     d->p_tex_bits= s->p_tex_bits;
2277     d->i_count= s->i_count;
2278     d->f_count= s->f_count;
2279     d->b_count= s->b_count;
2280     d->skip_count= s->skip_count;
2281     d->misc_bits= s->misc_bits;
2282
2283     d->mb_intra= s->mb_intra;
2284     d->mb_skipped= s->mb_skipped;
2285     d->mv_type= s->mv_type;
2286     d->mv_dir= s->mv_dir;
2287     d->pb= s->pb;
2288     if(s->data_partitioning){
2289         d->pb2= s->pb2;
2290         d->tex_pb= s->tex_pb;
2291     }
2292     d->block= s->block;
2293     for(i=0; i<8; i++)
2294         d->block_last_index[i]= s->block_last_index[i];
2295     d->interlaced_dct= s->interlaced_dct;
2296     d->qscale= s->qscale;
2297
2298     d->esc3_level_length= s->esc3_level_length;
2299 }
2300
2301 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2302                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2303                            int *dmin, int *next_block, int motion_x, int motion_y)
2304 {
2305     int score;
2306     uint8_t *dest_backup[3];
2307
2308     copy_context_before_encode(s, backup, type);
2309
2310     s->block= s->blocks[*next_block];
2311     s->pb= pb[*next_block];
2312     if(s->data_partitioning){
2313         s->pb2   = pb2   [*next_block];
2314         s->tex_pb= tex_pb[*next_block];
2315     }
2316
2317     if(*next_block){
2318         memcpy(dest_backup, s->dest, sizeof(s->dest));
2319         s->dest[0] = s->rd_scratchpad;
2320         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2321         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2322         assert(s->linesize >= 32); //FIXME
2323     }
2324
2325     encode_mb(s, motion_x, motion_y);
2326
2327     score= put_bits_count(&s->pb);
2328     if(s->data_partitioning){
2329         score+= put_bits_count(&s->pb2);
2330         score+= put_bits_count(&s->tex_pb);
2331     }
2332
2333     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2334         ff_mpv_decode_mb(s, s->block);
2335
2336         score *= s->lambda2;
2337         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2338     }
2339
2340     if(*next_block){
2341         memcpy(s->dest, dest_backup, sizeof(s->dest));
2342     }
2343
2344     if(score<*dmin){
2345         *dmin= score;
2346         *next_block^=1;
2347
2348         copy_context_after_encode(best, s, type);
2349     }
2350 }
2351
2352 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2353     uint32_t *sq = ff_square_tab + 256;
2354     int acc=0;
2355     int x,y;
2356
2357     if(w==16 && h==16)
2358         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2359     else if(w==8 && h==8)
2360         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2361
2362     for(y=0; y<h; y++){
2363         for(x=0; x<w; x++){
2364             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2365         }
2366     }
2367
2368     assert(acc>=0);
2369
2370     return acc;
2371 }
2372
2373 static int sse_mb(MpegEncContext *s){
2374     int w= 16;
2375     int h= 16;
2376
2377     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2378     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2379
2380     if(w==16 && h==16)
2381       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2382         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2383                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2384                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2385       }else{
2386         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2387                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2388                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2389       }
2390     else
2391         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2392                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2393                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2394 }
2395
2396 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2397     MpegEncContext *s= *(void**)arg;
2398
2399
2400     s->me.pre_pass=1;
2401     s->me.dia_size= s->avctx->pre_dia_size;
2402     s->first_slice_line=1;
2403     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2404         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2405             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2406         }
2407         s->first_slice_line=0;
2408     }
2409
2410     s->me.pre_pass=0;
2411
2412     return 0;
2413 }
2414
2415 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2416     MpegEncContext *s= *(void**)arg;
2417
2418     s->me.dia_size= s->avctx->dia_size;
2419     s->first_slice_line=1;
2420     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2421         s->mb_x=0; //for block init below
2422         ff_init_block_index(s);
2423         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2424             s->block_index[0]+=2;
2425             s->block_index[1]+=2;
2426             s->block_index[2]+=2;
2427             s->block_index[3]+=2;
2428
2429             /* compute motion vector & mb_type and store in context */
2430             if(s->pict_type==AV_PICTURE_TYPE_B)
2431                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2432             else
2433                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2434         }
2435         s->first_slice_line=0;
2436     }
2437     return 0;
2438 }
2439
2440 static int mb_var_thread(AVCodecContext *c, void *arg){
2441     MpegEncContext *s= *(void**)arg;
2442     int mb_x, mb_y;
2443
2444     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2445         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2446             int xx = mb_x * 16;
2447             int yy = mb_y * 16;
2448             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2449             int varc;
2450             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2451
2452             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2453                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2454
2455             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2456             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2457             s->me.mb_var_sum_temp    += varc;
2458         }
2459     }
2460     return 0;
2461 }
2462
2463 static void write_slice_end(MpegEncContext *s){
2464     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2465         if(s->partitioned_frame){
2466             ff_mpeg4_merge_partitions(s);
2467         }
2468
2469         ff_mpeg4_stuffing(&s->pb);
2470     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2471         ff_mjpeg_encode_stuffing(&s->pb);
2472     }
2473
2474     avpriv_align_put_bits(&s->pb);
2475     flush_put_bits(&s->pb);
2476
2477     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2478         s->misc_bits+= get_bits_diff(s);
2479 }
2480
2481 static void write_mb_info(MpegEncContext *s)
2482 {
2483     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2484     int offset = put_bits_count(&s->pb);
2485     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2486     int gobn = s->mb_y / s->gob_index;
2487     int pred_x, pred_y;
2488     if (CONFIG_H263_ENCODER)
2489         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2490     bytestream_put_le32(&ptr, offset);
2491     bytestream_put_byte(&ptr, s->qscale);
2492     bytestream_put_byte(&ptr, gobn);
2493     bytestream_put_le16(&ptr, mba);
2494     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2495     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2496     /* 4MV not implemented */
2497     bytestream_put_byte(&ptr, 0); /* hmv2 */
2498     bytestream_put_byte(&ptr, 0); /* vmv2 */
2499 }
2500
2501 static void update_mb_info(MpegEncContext *s, int startcode)
2502 {
2503     if (!s->mb_info)
2504         return;
2505     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2506         s->mb_info_size += 12;
2507         s->prev_mb_info = s->last_mb_info;
2508     }
2509     if (startcode) {
2510         s->prev_mb_info = put_bits_count(&s->pb)/8;
2511         /* This might have incremented mb_info_size above, and we return without
2512          * actually writing any info into that slot yet. But in that case,
2513          * this will be called again at the start of the after writing the
2514          * start code, actually writing the mb info. */
2515         return;
2516     }
2517
2518     s->last_mb_info = put_bits_count(&s->pb)/8;
2519     if (!s->mb_info_size)
2520         s->mb_info_size += 12;
2521     write_mb_info(s);
2522 }
2523
2524 static int encode_thread(AVCodecContext *c, void *arg){
2525     MpegEncContext *s= *(void**)arg;
2526     int mb_x, mb_y, pdif = 0;
2527     int chr_h= 16>>s->chroma_y_shift;
2528     int i, j;
2529     MpegEncContext best_s, backup_s;
2530     uint8_t bit_buf[2][MAX_MB_BYTES];
2531     uint8_t bit_buf2[2][MAX_MB_BYTES];
2532     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2533     PutBitContext pb[2], pb2[2], tex_pb[2];
2534
2535     for(i=0; i<2; i++){
2536         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2537         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2538         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2539     }
2540
2541     s->last_bits= put_bits_count(&s->pb);
2542     s->mv_bits=0;
2543     s->misc_bits=0;
2544     s->i_tex_bits=0;
2545     s->p_tex_bits=0;
2546     s->i_count=0;
2547     s->f_count=0;
2548     s->b_count=0;
2549     s->skip_count=0;
2550
2551     for(i=0; i<3; i++){
2552         /* init last dc values */
2553         /* note: quant matrix value (8) is implied here */
2554         s->last_dc[i] = 128 << s->intra_dc_precision;
2555
2556         s->current_picture.f->error[i] = 0;
2557     }
2558     s->mb_skip_run = 0;
2559     memset(s->last_mv, 0, sizeof(s->last_mv));
2560
2561     s->last_mv_dir = 0;
2562
2563     switch(s->codec_id){
2564     case AV_CODEC_ID_H263:
2565     case AV_CODEC_ID_H263P:
2566     case AV_CODEC_ID_FLV1:
2567         if (CONFIG_H263_ENCODER)
2568             s->gob_index = ff_h263_get_gob_height(s);
2569         break;
2570     case AV_CODEC_ID_MPEG4:
2571         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2572             ff_mpeg4_init_partitions(s);
2573         break;
2574     }
2575
2576     s->resync_mb_x=0;
2577     s->resync_mb_y=0;
2578     s->first_slice_line = 1;
2579     s->ptr_lastgob = s->pb.buf;
2580     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2581         s->mb_x=0;
2582         s->mb_y= mb_y;
2583
2584         ff_set_qscale(s, s->qscale);
2585         ff_init_block_index(s);
2586
2587         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2588             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2589             int mb_type= s->mb_type[xy];
2590 //            int d;
2591             int dmin= INT_MAX;
2592             int dir;
2593
2594             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2595                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2596                 return -1;
2597             }
2598             if(s->data_partitioning){
2599                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2600                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2601                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2602                     return -1;
2603                 }
2604             }
2605
2606             s->mb_x = mb_x;
2607             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2608             ff_update_block_index(s);
2609
2610             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2611                 ff_h261_reorder_mb_index(s);
2612                 xy= s->mb_y*s->mb_stride + s->mb_x;
2613                 mb_type= s->mb_type[xy];
2614             }
2615
2616             /* write gob / video packet header  */
2617             if(s->rtp_mode){
2618                 int current_packet_size, is_gob_start;
2619
2620                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2621
2622                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2623
2624                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2625
2626                 switch(s->codec_id){
2627                 case AV_CODEC_ID_H263:
2628                 case AV_CODEC_ID_H263P:
2629                     if(!s->h263_slice_structured)
2630                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2631                     break;
2632                 case AV_CODEC_ID_MPEG2VIDEO:
2633                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2634                 case AV_CODEC_ID_MPEG1VIDEO:
2635                     if(s->mb_skip_run) is_gob_start=0;
2636                     break;
2637                 }
2638
2639                 if(is_gob_start){
2640                     if(s->start_mb_y != mb_y || mb_x!=0){
2641                         write_slice_end(s);
2642
2643                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2644                             ff_mpeg4_init_partitions(s);
2645                         }
2646                     }
2647
2648                     assert((put_bits_count(&s->pb)&7) == 0);
2649                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2650
2651                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2652                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2653                         int d = 100 / s->error_rate;
2654                         if(r % d == 0){
2655                             current_packet_size=0;
2656                             s->pb.buf_ptr= s->ptr_lastgob;
2657                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2658                         }
2659                     }
2660
2661                     if (s->avctx->rtp_callback){
2662                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2663                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2664                     }
2665                     update_mb_info(s, 1);
2666
2667                     switch(s->codec_id){
2668                     case AV_CODEC_ID_MPEG4:
2669                         if (CONFIG_MPEG4_ENCODER) {
2670                             ff_mpeg4_encode_video_packet_header(s);
2671                             ff_mpeg4_clean_buffers(s);
2672                         }
2673                     break;
2674                     case AV_CODEC_ID_MPEG1VIDEO:
2675                     case AV_CODEC_ID_MPEG2VIDEO:
2676                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2677                             ff_mpeg1_encode_slice_header(s);
2678                             ff_mpeg1_clean_buffers(s);
2679                         }
2680                     break;
2681                     case AV_CODEC_ID_H263:
2682                     case AV_CODEC_ID_H263P:
2683                         if (CONFIG_H263_ENCODER)
2684                             ff_h263_encode_gob_header(s, mb_y);
2685                     break;
2686                     }
2687
2688                     if(s->flags&CODEC_FLAG_PASS1){
2689                         int bits= put_bits_count(&s->pb);
2690                         s->misc_bits+= bits - s->last_bits;
2691                         s->last_bits= bits;
2692                     }
2693
2694                     s->ptr_lastgob += current_packet_size;
2695                     s->first_slice_line=1;
2696                     s->resync_mb_x=mb_x;
2697                     s->resync_mb_y=mb_y;
2698                 }
2699             }
2700
2701             if(  (s->resync_mb_x   == s->mb_x)
2702                && s->resync_mb_y+1 == s->mb_y){
2703                 s->first_slice_line=0;
2704             }
2705
2706             s->mb_skipped=0;
2707             s->dquant=0; //only for QP_RD
2708
2709             update_mb_info(s, 0);
2710
2711             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2712                 int next_block=0;
2713                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2714
2715                 copy_context_before_encode(&backup_s, s, -1);
2716                 backup_s.pb= s->pb;
2717                 best_s.data_partitioning= s->data_partitioning;
2718                 best_s.partitioned_frame= s->partitioned_frame;
2719                 if(s->data_partitioning){
2720                     backup_s.pb2= s->pb2;
2721                     backup_s.tex_pb= s->tex_pb;
2722                 }
2723
2724                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2725                     s->mv_dir = MV_DIR_FORWARD;
2726                     s->mv_type = MV_TYPE_16X16;
2727                     s->mb_intra= 0;
2728                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2729                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2730                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2731                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2732                 }
2733                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2734                     s->mv_dir = MV_DIR_FORWARD;
2735                     s->mv_type = MV_TYPE_FIELD;
2736                     s->mb_intra= 0;
2737                     for(i=0; i<2; i++){
2738                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2739                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2740                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2741                     }
2742                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2743                                  &dmin, &next_block, 0, 0);
2744                 }
2745                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2746                     s->mv_dir = MV_DIR_FORWARD;
2747                     s->mv_type = MV_TYPE_16X16;
2748                     s->mb_intra= 0;
2749                     s->mv[0][0][0] = 0;
2750                     s->mv[0][0][1] = 0;
2751                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2752                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2753                 }
2754                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2755                     s->mv_dir = MV_DIR_FORWARD;
2756                     s->mv_type = MV_TYPE_8X8;
2757                     s->mb_intra= 0;
2758                     for(i=0; i<4; i++){
2759                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2760                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2761                     }
2762                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2763                                  &dmin, &next_block, 0, 0);
2764                 }
2765                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2766                     s->mv_dir = MV_DIR_FORWARD;
2767                     s->mv_type = MV_TYPE_16X16;
2768                     s->mb_intra= 0;
2769                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2770                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2771                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2772                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2773                 }
2774                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2775                     s->mv_dir = MV_DIR_BACKWARD;
2776                     s->mv_type = MV_TYPE_16X16;
2777                     s->mb_intra= 0;
2778                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2779                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2780                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2781                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2782                 }
2783                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2784                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2785                     s->mv_type = MV_TYPE_16X16;
2786                     s->mb_intra= 0;
2787                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2788                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2789                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2790                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2791                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2792                                  &dmin, &next_block, 0, 0);
2793                 }
2794                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2795                     s->mv_dir = MV_DIR_FORWARD;
2796                     s->mv_type = MV_TYPE_FIELD;
2797                     s->mb_intra= 0;
2798                     for(i=0; i<2; i++){
2799                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2800                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2801                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2802                     }
2803                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2804                                  &dmin, &next_block, 0, 0);
2805                 }
2806                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2807                     s->mv_dir = MV_DIR_BACKWARD;
2808                     s->mv_type = MV_TYPE_FIELD;
2809                     s->mb_intra= 0;
2810                     for(i=0; i<2; i++){
2811                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2812                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2813                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2814                     }
2815                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2816                                  &dmin, &next_block, 0, 0);
2817                 }
2818                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2819                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2820                     s->mv_type = MV_TYPE_FIELD;
2821                     s->mb_intra= 0;
2822                     for(dir=0; dir<2; dir++){
2823                         for(i=0; i<2; i++){
2824                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2825                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2826                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2827                         }
2828                     }
2829                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2830                                  &dmin, &next_block, 0, 0);
2831                 }
2832                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2833                     s->mv_dir = 0;
2834                     s->mv_type = MV_TYPE_16X16;
2835                     s->mb_intra= 1;
2836                     s->mv[0][0][0] = 0;
2837                     s->mv[0][0][1] = 0;
2838                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2839                                  &dmin, &next_block, 0, 0);
2840                     if(s->h263_pred || s->h263_aic){
2841                         if(best_s.mb_intra)
2842                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2843                         else
2844                             ff_clean_intra_table_entries(s); //old mode?
2845                     }
2846                 }
2847
2848                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2849                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2850                         const int last_qp= backup_s.qscale;
2851                         int qpi, qp, dc[6];
2852                         int16_t ac[6][16];
2853                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2854                         static const int dquant_tab[4]={-1,1,-2,2};
2855
2856                         assert(backup_s.dquant == 0);
2857
2858                         //FIXME intra
2859                         s->mv_dir= best_s.mv_dir;
2860                         s->mv_type = MV_TYPE_16X16;
2861                         s->mb_intra= best_s.mb_intra;
2862                         s->mv[0][0][0] = best_s.mv[0][0][0];
2863                         s->mv[0][0][1] = best_s.mv[0][0][1];
2864                         s->mv[1][0][0] = best_s.mv[1][0][0];
2865                         s->mv[1][0][1] = best_s.mv[1][0][1];
2866
2867                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2868                         for(; qpi<4; qpi++){
2869                             int dquant= dquant_tab[qpi];
2870                             qp= last_qp + dquant;
2871                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2872                                 continue;
2873                             backup_s.dquant= dquant;
2874                             if(s->mb_intra && s->dc_val[0]){
2875                                 for(i=0; i<6; i++){
2876                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2877                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2878                                 }
2879                             }
2880
2881                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2882                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2883                             if(best_s.qscale != qp){
2884                                 if(s->mb_intra && s->dc_val[0]){
2885                                     for(i=0; i<6; i++){
2886                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2887                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2888                                     }
2889                                 }
2890                             }
2891                         }
2892                     }
2893                 }
2894                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2895                     int mx= s->b_direct_mv_table[xy][0];
2896                     int my= s->b_direct_mv_table[xy][1];
2897
2898                     backup_s.dquant = 0;
2899                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2900                     s->mb_intra= 0;
2901                     ff_mpeg4_set_direct_mv(s, mx, my);
2902                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2903                                  &dmin, &next_block, mx, my);
2904                 }
2905                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2906                     backup_s.dquant = 0;
2907                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2908                     s->mb_intra= 0;
2909                     ff_mpeg4_set_direct_mv(s, 0, 0);
2910                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2911                                  &dmin, &next_block, 0, 0);
2912                 }
2913                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2914                     int coded=0;
2915                     for(i=0; i<6; i++)
2916                         coded |= s->block_last_index[i];
2917                     if(coded){
2918                         int mx,my;
2919                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2920                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2921                             mx=my=0; //FIXME find the one we actually used
2922                             ff_mpeg4_set_direct_mv(s, mx, my);
2923                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2924                             mx= s->mv[1][0][0];
2925                             my= s->mv[1][0][1];
2926                         }else{
2927                             mx= s->mv[0][0][0];
2928                             my= s->mv[0][0][1];
2929                         }
2930
2931                         s->mv_dir= best_s.mv_dir;
2932                         s->mv_type = best_s.mv_type;
2933                         s->mb_intra= 0;
2934 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2935                         s->mv[0][0][1] = best_s.mv[0][0][1];
2936                         s->mv[1][0][0] = best_s.mv[1][0][0];
2937                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2938                         backup_s.dquant= 0;
2939                         s->skipdct=1;
2940                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2941                                         &dmin, &next_block, mx, my);
2942                         s->skipdct=0;
2943                     }
2944                 }
2945
2946                 s->current_picture.qscale_table[xy] = best_s.qscale;
2947
2948                 copy_context_after_encode(s, &best_s, -1);
2949
2950                 pb_bits_count= put_bits_count(&s->pb);
2951                 flush_put_bits(&s->pb);
2952                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2953                 s->pb= backup_s.pb;
2954
2955                 if(s->data_partitioning){
2956                     pb2_bits_count= put_bits_count(&s->pb2);
2957                     flush_put_bits(&s->pb2);
2958                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2959                     s->pb2= backup_s.pb2;
2960
2961                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2962                     flush_put_bits(&s->tex_pb);
2963                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2964                     s->tex_pb= backup_s.tex_pb;
2965                 }
2966                 s->last_bits= put_bits_count(&s->pb);
2967
2968                 if (CONFIG_H263_ENCODER &&
2969                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2970                     ff_h263_update_motion_val(s);
2971
2972                 if(next_block==0){ //FIXME 16 vs linesize16
2973                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2974                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2975                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2976                 }
2977
2978                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2979                     ff_mpv_decode_mb(s, s->block);
2980             } else {
2981                 int motion_x = 0, motion_y = 0;
2982                 s->mv_type=MV_TYPE_16X16;
2983                 // only one MB-Type possible
2984
2985                 switch(mb_type){
2986                 case CANDIDATE_MB_TYPE_INTRA:
2987                     s->mv_dir = 0;
2988                     s->mb_intra= 1;
2989                     motion_x= s->mv[0][0][0] = 0;
2990                     motion_y= s->mv[0][0][1] = 0;
2991                     break;
2992                 case CANDIDATE_MB_TYPE_INTER:
2993                     s->mv_dir = MV_DIR_FORWARD;
2994                     s->mb_intra= 0;
2995                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2996                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2997                     break;
2998                 case CANDIDATE_MB_TYPE_INTER_I:
2999                     s->mv_dir = MV_DIR_FORWARD;
3000                     s->mv_type = MV_TYPE_FIELD;
3001                     s->mb_intra= 0;
3002                     for(i=0; i<2; i++){
3003                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3004                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3005                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3006                     }
3007                     break;
3008                 case CANDIDATE_MB_TYPE_INTER4V:
3009                     s->mv_dir = MV_DIR_FORWARD;
3010                     s->mv_type = MV_TYPE_8X8;
3011                     s->mb_intra= 0;
3012                     for(i=0; i<4; i++){
3013                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3014                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3015                     }
3016                     break;
3017                 case CANDIDATE_MB_TYPE_DIRECT:
3018                     if (CONFIG_MPEG4_ENCODER) {
3019                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3020                         s->mb_intra= 0;
3021                         motion_x=s->b_direct_mv_table[xy][0];
3022                         motion_y=s->b_direct_mv_table[xy][1];
3023                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3024                     }
3025                     break;
3026                 case CANDIDATE_MB_TYPE_DIRECT0:
3027                     if (CONFIG_MPEG4_ENCODER) {
3028                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3029                         s->mb_intra= 0;
3030                         ff_mpeg4_set_direct_mv(s, 0, 0);
3031                     }
3032                     break;
3033                 case CANDIDATE_MB_TYPE_BIDIR:
3034                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3035                     s->mb_intra= 0;
3036                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3037                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3038                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3039                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3040                     break;
3041                 case CANDIDATE_MB_TYPE_BACKWARD:
3042                     s->mv_dir = MV_DIR_BACKWARD;
3043                     s->mb_intra= 0;
3044                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3045                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3046                     break;
3047                 case CANDIDATE_MB_TYPE_FORWARD:
3048                     s->mv_dir = MV_DIR_FORWARD;
3049                     s->mb_intra= 0;
3050                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3051                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3052                     break;
3053                 case CANDIDATE_MB_TYPE_FORWARD_I:
3054                     s->mv_dir = MV_DIR_FORWARD;
3055                     s->mv_type = MV_TYPE_FIELD;
3056                     s->mb_intra= 0;
3057                     for(i=0; i<2; i++){
3058                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3059                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3060                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3061                     }
3062                     break;
3063                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3064                     s->mv_dir = MV_DIR_BACKWARD;
3065                     s->mv_type = MV_TYPE_FIELD;
3066                     s->mb_intra= 0;
3067                     for(i=0; i<2; i++){
3068                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3069                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3070                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3071                     }
3072                     break;
3073                 case CANDIDATE_MB_TYPE_BIDIR_I:
3074                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3075                     s->mv_type = MV_TYPE_FIELD;
3076                     s->mb_intra= 0;
3077                     for(dir=0; dir<2; dir++){
3078                         for(i=0; i<2; i++){
3079                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3080                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3081                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3082                         }
3083                     }
3084                     break;
3085                 default:
3086                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3087                 }
3088
3089                 encode_mb(s, motion_x, motion_y);
3090
3091                 // RAL: Update last macroblock type
3092                 s->last_mv_dir = s->mv_dir;
3093
3094                 if (CONFIG_H263_ENCODER &&
3095                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3096                     ff_h263_update_motion_val(s);
3097
3098                 ff_mpv_decode_mb(s, s->block);
3099             }
3100
3101             /* clean the MV table in IPS frames for direct mode in B frames */
3102             if(s->mb_intra /* && I,P,S_TYPE */){
3103                 s->p_mv_table[xy][0]=0;
3104                 s->p_mv_table[xy][1]=0;
3105             }
3106
3107             if(s->flags&CODEC_FLAG_PSNR){
3108                 int w= 16;
3109                 int h= 16;
3110
3111                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3112                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3113
3114                 s->current_picture.f->error[0] += sse(
3115                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3116                     s->dest[0], w, h, s->linesize);
3117                 s->current_picture.f->error[1] += sse(
3118                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3119                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3120                 s->current_picture.f->error[2] += sse(
3121                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3122                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3123             }
3124             if(s->loop_filter){
3125                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3126                     ff_h263_loop_filter(s);
3127             }
3128             av_dlog(s->avctx, "MB %d %d bits\n",
3129                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3130         }
3131     }
3132
3133     //not beautiful here but we must write it before flushing so it has to be here
3134     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3135         ff_msmpeg4_encode_ext_header(s);
3136
3137     write_slice_end(s);
3138
3139     /* Send the last GOB if RTP */
3140     if (s->avctx->rtp_callback) {
3141         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3142         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3143         /* Call the RTP callback to send the last GOB */
3144         emms_c();
3145         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3146     }
3147
3148     return 0;
3149 }
3150
3151 #define MERGE(field) dst->field += src->field; src->field=0
3152 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3153     MERGE(me.scene_change_score);
3154     MERGE(me.mc_mb_var_sum_temp);
3155     MERGE(me.mb_var_sum_temp);
3156 }
3157
3158 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3159     int i;
3160
3161     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3162     MERGE(dct_count[1]);
3163     MERGE(mv_bits);
3164     MERGE(i_tex_bits);
3165     MERGE(p_tex_bits);
3166     MERGE(i_count);
3167     MERGE(f_count);
3168     MERGE(b_count);
3169     MERGE(skip_count);
3170     MERGE(misc_bits);
3171     MERGE(er.error_count);
3172     MERGE(padding_bug_score);
3173     MERGE(current_picture.f->error[0]);
3174     MERGE(current_picture.f->error[1]);
3175     MERGE(current_picture.f->error[2]);
3176
3177     if(dst->avctx->noise_reduction){
3178         for(i=0; i<64; i++){
3179             MERGE(dct_error_sum[0][i]);
3180             MERGE(dct_error_sum[1][i]);
3181         }
3182     }
3183
3184     assert(put_bits_count(&src->pb) % 8 ==0);
3185     assert(put_bits_count(&dst->pb) % 8 ==0);
3186     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3187     flush_put_bits(&dst->pb);
3188 }
3189
3190 static int estimate_qp(MpegEncContext *s, int dry_run){
3191     if (s->next_lambda){
3192         s->current_picture_ptr->f->quality =
3193         s->current_picture.f->quality = s->next_lambda;
3194         if(!dry_run) s->next_lambda= 0;
3195     } else if (!s->fixed_qscale) {
3196         s->current_picture_ptr->f->quality =
3197         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3198         if (s->current_picture.f->quality < 0)
3199             return -1;
3200     }
3201
3202     if(s->adaptive_quant){
3203         switch(s->codec_id){
3204         case AV_CODEC_ID_MPEG4:
3205             if (CONFIG_MPEG4_ENCODER)
3206                 ff_clean_mpeg4_qscales(s);
3207             break;
3208         case AV_CODEC_ID_H263:
3209         case AV_CODEC_ID_H263P:
3210         case AV_CODEC_ID_FLV1:
3211             if (CONFIG_H263_ENCODER)
3212                 ff_clean_h263_qscales(s);
3213             break;
3214         default:
3215             ff_init_qscale_tab(s);
3216         }
3217
3218         s->lambda= s->lambda_table[0];
3219         //FIXME broken
3220     }else
3221         s->lambda = s->current_picture.f->quality;
3222     update_qscale(s);
3223     return 0;
3224 }
3225
3226 /* must be called before writing the header */
3227 static void set_frame_distances(MpegEncContext * s){
3228     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3229     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3230
3231     if(s->pict_type==AV_PICTURE_TYPE_B){
3232         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3233         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3234     }else{
3235         s->pp_time= s->time - s->last_non_b_time;
3236         s->last_non_b_time= s->time;
3237         assert(s->picture_number==0 || s->pp_time > 0);
3238     }
3239 }
3240
3241 static int encode_picture(MpegEncContext *s, int picture_number)
3242 {
3243     int i, ret;
3244     int bits;
3245     int context_count = s->slice_context_count;
3246
3247     s->picture_number = picture_number;
3248
3249     /* Reset the average MB variance */
3250     s->me.mb_var_sum_temp    =
3251     s->me.mc_mb_var_sum_temp = 0;
3252
3253     /* we need to initialize some time vars before we can encode b-frames */
3254     // RAL: Condition added for MPEG1VIDEO
3255     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3256         set_frame_distances(s);
3257     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3258         ff_set_mpeg4_time(s);
3259
3260     s->me.scene_change_score=0;
3261
3262 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3263
3264     if(s->pict_type==AV_PICTURE_TYPE_I){
3265         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3266         else                        s->no_rounding=0;
3267     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3268         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3269             s->no_rounding ^= 1;
3270     }
3271
3272     if(s->flags & CODEC_FLAG_PASS2){
3273         if (estimate_qp(s,1) < 0)
3274             return -1;
3275         ff_get_2pass_fcode(s);
3276     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3277         if(s->pict_type==AV_PICTURE_TYPE_B)
3278             s->lambda= s->last_lambda_for[s->pict_type];
3279         else
3280             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3281         update_qscale(s);
3282     }
3283
3284     s->mb_intra=0; //for the rate distortion & bit compare functions
3285     for(i=1; i<context_count; i++){
3286         ret = ff_update_duplicate_context(s->thread_context[i], s);
3287         if (ret < 0)
3288             return ret;
3289     }
3290
3291     if(ff_init_me(s)<0)
3292         return -1;
3293
3294     /* Estimate motion for every MB */
3295     if(s->pict_type != AV_PICTURE_TYPE_I){
3296         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3297         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3298         if (s->pict_type != AV_PICTURE_TYPE_B) {
3299             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3300                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3301             }
3302         }
3303
3304         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3305     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3306         /* I-Frame */
3307         for(i=0; i<s->mb_stride*s->mb_height; i++)
3308             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3309
3310         if(!s->fixed_qscale){
3311             /* finding spatial complexity for I-frame rate control */
3312             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3313         }
3314     }
3315     for(i=1; i<context_count; i++){
3316         merge_context_after_me(s, s->thread_context[i]);
3317     }
3318     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3319     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3320     emms_c();
3321
3322     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3323         s->pict_type= AV_PICTURE_TYPE_I;
3324         for(i=0; i<s->mb_stride*s->mb_height; i++)
3325             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3326         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3327                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3328     }
3329
3330     if(!s->umvplus){
3331         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3332             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3333
3334             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3335                 int a,b;
3336                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3337                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3338                 s->f_code= FFMAX3(s->f_code, a, b);
3339             }
3340
3341             ff_fix_long_p_mvs(s);
3342             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3343             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3344                 int j;
3345                 for(i=0; i<2; i++){
3346                     for(j=0; j<2; j++)
3347                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3348                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3349                 }
3350             }
3351         }
3352
3353         if(s->pict_type==AV_PICTURE_TYPE_B){
3354             int a, b;
3355
3356             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3357             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3358             s->f_code = FFMAX(a, b);
3359
3360             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3361             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3362             s->b_code = FFMAX(a, b);
3363
3364             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3365             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3366             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3367             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3368             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3369                 int dir, j;
3370                 for(dir=0; dir<2; dir++){
3371                     for(i=0; i<2; i++){
3372                         for(j=0; j<2; j++){
3373                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3374                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3375                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3376                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3377                         }
3378                     }
3379                 }
3380             }
3381         }
3382     }
3383
3384     if (estimate_qp(s, 0) < 0)
3385         return -1;
3386
3387     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3388         s->qscale= 3; //reduce clipping problems
3389
3390     if (s->out_format == FMT_MJPEG) {
3391         /* for mjpeg, we do include qscale in the matrix */
3392         for(i=1;i<64;i++){
3393             int j = s->idsp.idct_permutation[i];
3394
3395             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3396         }
3397         s->y_dc_scale_table=
3398         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3399         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3400         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3401                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3402         s->qscale= 8;
3403     }
3404
3405     //FIXME var duplication
3406     s->current_picture_ptr->f->key_frame =
3407     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3408     s->current_picture_ptr->f->pict_type =
3409     s->current_picture.f->pict_type = s->pict_type;
3410
3411     if (s->current_picture.f->key_frame)
3412         s->picture_in_gop_number=0;
3413
3414     s->last_bits= put_bits_count(&s->pb);
3415     switch(s->out_format) {
3416     case FMT_MJPEG:
3417         if (CONFIG_MJPEG_ENCODER)
3418             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3419                                            s->intra_matrix);
3420         break;
3421     case FMT_H261:
3422         if (CONFIG_H261_ENCODER)
3423             ff_h261_encode_picture_header(s, picture_number);
3424         break;
3425     case FMT_H263:
3426         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3427             ff_wmv2_encode_picture_header(s, picture_number);
3428         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3429             ff_msmpeg4_encode_picture_header(s, picture_number);
3430         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3431             ff_mpeg4_encode_picture_header(s, picture_number);
3432         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3433             ff_rv10_encode_picture_header(s, picture_number);
3434         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3435             ff_rv20_encode_picture_header(s, picture_number);
3436         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3437             ff_flv_encode_picture_header(s, picture_number);
3438         else if (CONFIG_H263_ENCODER)
3439             ff_h263_encode_picture_header(s, picture_number);
3440         break;
3441     case FMT_MPEG1:
3442         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3443             ff_mpeg1_encode_picture_header(s, picture_number);
3444         break;
3445     default:
3446         assert(0);
3447     }
3448     bits= put_bits_count(&s->pb);
3449     s->header_bits= bits - s->last_bits;
3450
3451     for(i=1; i<context_count; i++){
3452         update_duplicate_context_after_me(s->thread_context[i], s);
3453     }
3454     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3455     for(i=1; i<context_count; i++){
3456         merge_context_after_encode(s, s->thread_context[i]);
3457     }
3458     emms_c();
3459     return 0;
3460 }
3461
3462 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3463     const int intra= s->mb_intra;
3464     int i;
3465
3466     s->dct_count[intra]++;
3467
3468     for(i=0; i<64; i++){
3469         int level= block[i];
3470
3471         if(level){
3472             if(level>0){
3473                 s->dct_error_sum[intra][i] += level;
3474                 level -= s->dct_offset[intra][i];
3475                 if(level<0) level=0;
3476             }else{
3477                 s->dct_error_sum[intra][i] -= level;
3478                 level += s->dct_offset[intra][i];
3479                 if(level>0) level=0;
3480             }
3481             block[i]= level;
3482         }
3483     }
3484 }
3485
3486 static int dct_quantize_trellis_c(MpegEncContext *s,
3487                                   int16_t *block, int n,
3488                                   int qscale, int *overflow){
3489     const int *qmat;
3490     const uint8_t *scantable= s->intra_scantable.scantable;
3491     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3492     int max=0;
3493     unsigned int threshold1, threshold2;
3494     int bias=0;
3495     int run_tab[65];
3496     int level_tab[65];
3497     int score_tab[65];
3498     int survivor[65];
3499     int survivor_count;
3500     int last_run=0;
3501     int last_level=0;
3502     int last_score= 0;
3503     int last_i;
3504     int coeff[2][64];
3505     int coeff_count[64];
3506     int qmul, qadd, start_i, last_non_zero, i, dc;
3507     const int esc_length= s->ac_esc_length;
3508     uint8_t * length;
3509     uint8_t * last_length;
3510     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3511
3512     s->fdsp.fdct(block);
3513
3514     if(s->dct_error_sum)
3515         s->denoise_dct(s, block);
3516     qmul= qscale*16;
3517     qadd= ((qscale-1)|1)*8;
3518
3519     if (s->mb_intra) {
3520         int q;
3521         if (!s->h263_aic) {
3522             if (n < 4)
3523                 q = s->y_dc_scale;
3524             else
3525                 q = s->c_dc_scale;
3526             q = q << 3;
3527         } else{
3528             /* For AIC we skip quant/dequant of INTRADC */
3529             q = 1 << 3;
3530             qadd=0;
3531         }
3532
3533         /* note: block[0] is assumed to be positive */
3534         block[0] = (block[0] + (q >> 1)) / q;
3535         start_i = 1;
3536         last_non_zero = 0;
3537         qmat = s->q_intra_matrix[qscale];
3538         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3539             bias= 1<<(QMAT_SHIFT-1);
3540         length     = s->intra_ac_vlc_length;
3541         last_length= s->intra_ac_vlc_last_length;
3542     } else {
3543         start_i = 0;
3544         last_non_zero = -1;
3545         qmat = s->q_inter_matrix[qscale];
3546         length     = s->inter_ac_vlc_length;
3547         last_length= s->inter_ac_vlc_last_length;
3548     }
3549     last_i= start_i;
3550
3551     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3552     threshold2= (threshold1<<1);
3553
3554     for(i=63; i>=start_i; i--) {
3555         const int j = scantable[i];
3556         int level = block[j] * qmat[j];
3557
3558         if(((unsigned)(level+threshold1))>threshold2){
3559             last_non_zero = i;
3560             break;
3561         }
3562     }
3563
3564     for(i=start_i; i<=last_non_zero; i++) {
3565         const int j = scantable[i];
3566         int level = block[j] * qmat[j];
3567
3568 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3569 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3570         if(((unsigned)(level+threshold1))>threshold2){
3571             if(level>0){
3572                 level= (bias + level)>>QMAT_SHIFT;
3573                 coeff[0][i]= level;
3574                 coeff[1][i]= level-1;
3575 //                coeff[2][k]= level-2;
3576             }else{
3577                 level= (bias - level)>>QMAT_SHIFT;
3578                 coeff[0][i]= -level;
3579                 coeff[1][i]= -level+1;
3580 //                coeff[2][k]= -level+2;
3581             }
3582             coeff_count[i]= FFMIN(level, 2);
3583             assert(coeff_count[i]);
3584             max |=level;
3585         }else{
3586             coeff[0][i]= (level>>31)|1;
3587             coeff_count[i]= 1;
3588         }
3589     }
3590
3591     *overflow= s->max_qcoeff < max; //overflow might have happened
3592
3593     if(last_non_zero < start_i){
3594         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3595         return last_non_zero;
3596     }
3597
3598     score_tab[start_i]= 0;
3599     survivor[0]= start_i;
3600     survivor_count= 1;
3601
3602     for(i=start_i; i<=last_non_zero; i++){
3603         int level_index, j, zero_distortion;
3604         int dct_coeff= FFABS(block[ scantable[i] ]);
3605         int best_score=256*256*256*120;
3606
3607         if (s->fdsp.fdct == ff_fdct_ifast)
3608             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3609         zero_distortion= dct_coeff*dct_coeff;
3610
3611         for(level_index=0; level_index < coeff_count[i]; level_index++){
3612             int distortion;
3613             int level= coeff[level_index][i];
3614             const int alevel= FFABS(level);
3615             int unquant_coeff;
3616
3617             assert(level);
3618
3619             if(s->out_format == FMT_H263){
3620                 unquant_coeff= alevel*qmul + qadd;
3621             }else{ //MPEG1
3622                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3623                 if(s->mb_intra){
3624                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3625                         unquant_coeff =   (unquant_coeff - 1) | 1;
3626                 }else{
3627                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3628                         unquant_coeff =   (unquant_coeff - 1) | 1;
3629                 }
3630                 unquant_coeff<<= 3;
3631             }
3632
3633             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3634             level+=64;
3635             if((level&(~127)) == 0){
3636                 for(j=survivor_count-1; j>=0; j--){
3637                     int run= i - survivor[j];
3638                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3639                     score += score_tab[i-run];
3640
3641                     if(score < best_score){
3642                         best_score= score;
3643                         run_tab[i+1]= run;
3644                         level_tab[i+1]= level-64;
3645                     }
3646                 }
3647
3648                 if(s->out_format == FMT_H263){
3649                     for(j=survivor_count-1; j>=0; j--){
3650                         int run= i - survivor[j];
3651                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3652                         score += score_tab[i-run];
3653                         if(score < last_score){
3654                             last_score= score;
3655                             last_run= run;
3656                             last_level= level-64;
3657                             last_i= i+1;
3658                         }
3659                     }
3660                 }
3661             }else{
3662                 distortion += esc_length*lambda;
3663                 for(j=survivor_count-1; j>=0; j--){
3664                     int run= i - survivor[j];
3665                     int score= distortion + score_tab[i-run];
3666
3667                     if(score < best_score){
3668                         best_score= score;
3669                         run_tab[i+1]= run;
3670                         level_tab[i+1]= level-64;
3671                     }
3672                 }
3673
3674                 if(s->out_format == FMT_H263){
3675                   for(j=survivor_count-1; j>=0; j--){
3676                         int run= i - survivor[j];
3677                         int score= distortion + score_tab[i-run];
3678                         if(score < last_score){
3679                             last_score= score;
3680                             last_run= run;
3681                             last_level= level-64;
3682                             last_i= i+1;
3683                         }
3684                     }
3685                 }
3686             }
3687         }
3688
3689         score_tab[i+1]= best_score;
3690
3691         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3692         if(last_non_zero <= 27){
3693             for(; survivor_count; survivor_count--){
3694                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3695                     break;
3696             }
3697         }else{
3698             for(; survivor_count; survivor_count--){
3699                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3700                     break;
3701             }
3702         }
3703
3704         survivor[ survivor_count++ ]= i+1;
3705     }
3706
3707     if(s->out_format != FMT_H263){
3708         last_score= 256*256*256*120;
3709         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3710             int score= score_tab[i];
3711             if(i) score += lambda*2; //FIXME exacter?
3712
3713             if(score < last_score){
3714                 last_score= score;
3715                 last_i= i;
3716                 last_level= level_tab[i];
3717                 last_run= run_tab[i];
3718             }
3719         }
3720     }
3721
3722     s->coded_score[n] = last_score;
3723
3724     dc= FFABS(block[0]);
3725     last_non_zero= last_i - 1;
3726     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3727
3728     if(last_non_zero < start_i)
3729         return last_non_zero;
3730
3731     if(last_non_zero == 0 && start_i == 0){
3732         int best_level= 0;
3733         int best_score= dc * dc;
3734
3735         for(i=0; i<coeff_count[0]; i++){
3736             int level= coeff[i][0];
3737             int alevel= FFABS(level);
3738             int unquant_coeff, score, distortion;
3739
3740             if(s->out_format == FMT_H263){
3741                     unquant_coeff= (alevel*qmul + qadd)>>3;
3742             }else{ //MPEG1
3743                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3744                     unquant_coeff =   (unquant_coeff - 1) | 1;
3745             }
3746             unquant_coeff = (unquant_coeff + 4) >> 3;
3747             unquant_coeff<<= 3 + 3;
3748
3749             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3750             level+=64;
3751             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3752             else                    score= distortion + esc_length*lambda;
3753
3754             if(score < best_score){
3755                 best_score= score;
3756                 best_level= level - 64;
3757             }
3758         }
3759         block[0]= best_level;
3760         s->coded_score[n] = best_score - dc*dc;
3761         if(best_level == 0) return -1;
3762         else                return last_non_zero;
3763     }
3764
3765     i= last_i;
3766     assert(last_level);
3767
3768     block[ perm_scantable[last_non_zero] ]= last_level;
3769     i -= last_run + 1;
3770
3771     for(; i>start_i; i -= run_tab[i] + 1){
3772         block[ perm_scantable[i-1] ]= level_tab[i];
3773     }
3774
3775     return last_non_zero;
3776 }
3777
3778 //#define REFINE_STATS 1
3779 static int16_t basis[64][64];
3780
3781 static void build_basis(uint8_t *perm){
3782     int i, j, x, y;
3783     emms_c();
3784     for(i=0; i<8; i++){
3785         for(j=0; j<8; j++){
3786             for(y=0; y<8; y++){
3787                 for(x=0; x<8; x++){
3788                     double s= 0.25*(1<<BASIS_SHIFT);
3789                     int index= 8*i + j;
3790                     int perm_index= perm[index];
3791                     if(i==0) s*= sqrt(0.5);
3792                     if(j==0) s*= sqrt(0.5);
3793                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3794                 }
3795             }
3796         }
3797     }
3798 }
3799
3800 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3801                         int16_t *block, int16_t *weight, int16_t *orig,
3802                         int n, int qscale){
3803     int16_t rem[64];
3804     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3805     const uint8_t *scantable= s->intra_scantable.scantable;
3806     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3807 //    unsigned int threshold1, threshold2;
3808 //    int bias=0;
3809     int run_tab[65];
3810     int prev_run=0;
3811     int prev_level=0;
3812     int qmul, qadd, start_i, last_non_zero, i, dc;
3813     uint8_t * length;
3814     uint8_t * last_length;
3815     int lambda;
3816     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3817 #ifdef REFINE_STATS
3818 static int count=0;
3819 static int after_last=0;
3820 static int to_zero=0;
3821 static int from_zero=0;
3822 static int raise=0;
3823 static int lower=0;
3824 static int messed_sign=0;
3825 #endif
3826
3827     if(basis[0][0] == 0)
3828         build_basis(s->idsp.idct_permutation);
3829
3830     qmul= qscale*2;
3831     qadd= (qscale-1)|1;
3832     if (s->mb_intra) {
3833         if (!s->h263_aic) {
3834             if (n < 4)
3835                 q = s->y_dc_scale;
3836             else
3837                 q = s->c_dc_scale;
3838         } else{
3839             /* For AIC we skip quant/dequant of INTRADC */
3840             q = 1;
3841             qadd=0;
3842         }
3843         q <<= RECON_SHIFT-3;
3844         /* note: block[0] is assumed to be positive */
3845         dc= block[0]*q;
3846 //        block[0] = (block[0] + (q >> 1)) / q;
3847         start_i = 1;
3848 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3849 //            bias= 1<<(QMAT_SHIFT-1);
3850         length     = s->intra_ac_vlc_length;
3851         last_length= s->intra_ac_vlc_last_length;
3852     } else {
3853         dc= 0;
3854         start_i = 0;
3855         length     = s->inter_ac_vlc_length;
3856         last_length= s->inter_ac_vlc_last_length;
3857     }
3858     last_non_zero = s->block_last_index[n];
3859
3860 #ifdef REFINE_STATS
3861 {START_TIMER
3862 #endif
3863     dc += (1<<(RECON_SHIFT-1));
3864     for(i=0; i<64; i++){
3865         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3866     }
3867 #ifdef REFINE_STATS
3868 STOP_TIMER("memset rem[]")}
3869 #endif
3870     sum=0;
3871     for(i=0; i<64; i++){
3872         int one= 36;
3873         int qns=4;
3874         int w;
3875
3876         w= FFABS(weight[i]) + qns*one;
3877         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3878
3879         weight[i] = w;
3880 //        w=weight[i] = (63*qns + (w/2)) / w;
3881
3882         assert(w>0);
3883         assert(w<(1<<6));
3884         sum += w*w;
3885     }
3886     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3887 #ifdef REFINE_STATS
3888 {START_TIMER
3889 #endif
3890     run=0;
3891     rle_index=0;
3892     for(i=start_i; i<=last_non_zero; i++){
3893         int j= perm_scantable[i];
3894         const int level= block[j];
3895         int coeff;
3896
3897         if(level){
3898             if(level<0) coeff= qmul*level - qadd;
3899             else        coeff= qmul*level + qadd;
3900             run_tab[rle_index++]=run;
3901             run=0;
3902
3903             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3904         }else{
3905             run++;
3906         }
3907     }
3908 #ifdef REFINE_STATS
3909 if(last_non_zero>0){
3910 STOP_TIMER("init rem[]")
3911 }
3912 }
3913
3914 {START_TIMER
3915 #endif
3916     for(;;){
3917         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3918         int best_coeff=0;
3919         int best_change=0;
3920         int run2, best_unquant_change=0, analyze_gradient;
3921 #ifdef REFINE_STATS
3922 {START_TIMER
3923 #endif
3924         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3925
3926         if(analyze_gradient){
3927 #ifdef REFINE_STATS
3928 {START_TIMER
3929 #endif
3930             for(i=0; i<64; i++){
3931                 int w= weight[i];
3932
3933                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3934             }
3935 #ifdef REFINE_STATS
3936 STOP_TIMER("rem*w*w")}
3937 {START_TIMER
3938 #endif
3939             s->fdsp.fdct(d1);
3940 #ifdef REFINE_STATS
3941 STOP_TIMER("dct")}
3942 #endif
3943         }
3944
3945         if(start_i){
3946             const int level= block[0];
3947             int change, old_coeff;
3948
3949             assert(s->mb_intra);
3950
3951             old_coeff= q*level;
3952
3953             for(change=-1; change<=1; change+=2){
3954                 int new_level= level + change;
3955                 int score, new_coeff;
3956
3957                 new_coeff= q*new_level;
3958                 if(new_coeff >= 2048 || new_coeff < 0)
3959                     continue;
3960
3961                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3962                                                   new_coeff - old_coeff);
3963                 if(score<best_score){
3964                     best_score= score;
3965                     best_coeff= 0;
3966                     best_change= change;
3967                     best_unquant_change= new_coeff - old_coeff;
3968                 }
3969             }
3970         }
3971
3972         run=0;
3973         rle_index=0;
3974         run2= run_tab[rle_index++];
3975         prev_level=0;
3976         prev_run=0;
3977
3978         for(i=start_i; i<64; i++){
3979             int j= perm_scantable[i];
3980             const int level= block[j];
3981             int change, old_coeff;
3982
3983             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3984                 break;
3985
3986             if(level){
3987                 if(level<0) old_coeff= qmul*level - qadd;
3988                 else        old_coeff= qmul*level + qadd;
3989                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3990             }else{
3991                 old_coeff=0;
3992                 run2--;
3993                 assert(run2>=0 || i >= last_non_zero );
3994             }
3995
3996             for(change=-1; change<=1; change+=2){
3997                 int new_level= level + change;
3998                 int score, new_coeff, unquant_change;
3999
4000                 score=0;
4001                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4002                    continue;
4003
4004                 if(new_level){
4005                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4006                     else            new_coeff= qmul*new_level + qadd;
4007                     if(new_coeff >= 2048 || new_coeff <= -2048)
4008                         continue;
4009                     //FIXME check for overflow
4010
4011                     if(level){
4012                         if(level < 63 && level > -63){
4013                             if(i < last_non_zero)
4014                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4015                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4016                             else
4017                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4018                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4019                         }
4020                     }else{
4021                         assert(FFABS(new_level)==1);
4022
4023                         if(analyze_gradient){
4024                             int g= d1[ scantable[i] ];
4025                             if(g && (g^new_level) >= 0)
4026                                 continue;
4027                         }
4028
4029                         if(i < last_non_zero){
4030                             int next_i= i + run2 + 1;
4031                             int next_level= block[ perm_scantable[next_i] ] + 64;
4032
4033                             if(next_level&(~127))
4034                                 next_level= 0;
4035
4036                             if(next_i < last_non_zero)
4037                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4038                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4039                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4040                             else
4041                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4042                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4043                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4044                         }else{
4045                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4046                             if(prev_level){
4047                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4048                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4049                             }
4050                         }
4051                     }
4052                 }else{
4053                     new_coeff=0;
4054                     assert(FFABS(level)==1);
4055
4056                     if(i < last_non_zero){
4057                         int next_i= i + run2 + 1;
4058                         int next_level= block[ perm_scantable[next_i] ] + 64;
4059
4060                         if(next_level&(~127))
4061                             next_level= 0;
4062
4063                         if(next_i < last_non_zero)
4064                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4065                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4066                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4067                         else
4068                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4069                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4070                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4071                     }else{
4072                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4073                         if(prev_level){
4074                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4075                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4076                         }
4077                     }
4078                 }
4079
4080                 score *= lambda;
4081
4082                 unquant_change= new_coeff - old_coeff;
4083                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4084
4085                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4086                                                    unquant_change);
4087                 if(score<best_score){
4088                     best_score= score;
4089                     best_coeff= i;
4090                     best_change= change;
4091                     best_unquant_change= unquant_change;
4092                 }
4093             }
4094             if(level){
4095                 prev_level= level + 64;
4096                 if(prev_level&(~127))
4097                     prev_level= 0;
4098                 prev_run= run;
4099                 run=0;
4100             }else{
4101                 run++;
4102             }
4103         }
4104 #ifdef REFINE_STATS
4105 STOP_TIMER("iterative step")}
4106 #endif
4107
4108         if(best_change){
4109             int j= perm_scantable[ best_coeff ];
4110
4111             block[j] += best_change;
4112
4113             if(best_coeff > last_non_zero){
4114                 last_non_zero= best_coeff;
4115                 assert(block[j]);
4116 #ifdef REFINE_STATS
4117 after_last++;
4118 #endif
4119             }else{
4120 #ifdef REFINE_STATS
4121 if(block[j]){
4122     if(block[j] - best_change){
4123         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4124             raise++;
4125         }else{
4126             lower++;
4127         }
4128     }else{
4129         from_zero++;
4130     }
4131 }else{
4132     to_zero++;
4133 }
4134 #endif
4135                 for(; last_non_zero>=start_i; last_non_zero--){
4136                     if(block[perm_scantable[last_non_zero]])
4137                         break;
4138                 }
4139             }
4140 #ifdef REFINE_STATS
4141 count++;
4142 if(256*256*256*64 % count == 0){
4143     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4144 }
4145 #endif
4146             run=0;
4147             rle_index=0;
4148             for(i=start_i; i<=last_non_zero; i++){
4149                 int j= perm_scantable[i];
4150                 const int level= block[j];
4151
4152                  if(level){
4153                      run_tab[rle_index++]=run;
4154                      run=0;
4155                  }else{
4156                      run++;
4157                  }
4158             }
4159
4160             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4161         }else{
4162             break;
4163         }
4164     }
4165 #ifdef REFINE_STATS
4166 if(last_non_zero>0){
4167 STOP_TIMER("iterative search")
4168 }
4169 }
4170 #endif
4171
4172     return last_non_zero;
4173 }
4174
4175 int ff_dct_quantize_c(MpegEncContext *s,
4176                         int16_t *block, int n,
4177                         int qscale, int *overflow)
4178 {
4179     int i, j, level, last_non_zero, q, start_i;
4180     const int *qmat;
4181     const uint8_t *scantable= s->intra_scantable.scantable;
4182     int bias;
4183     int max=0;
4184     unsigned int threshold1, threshold2;
4185
4186     s->fdsp.fdct(block);
4187
4188     if(s->dct_error_sum)
4189         s->denoise_dct(s, block);
4190
4191     if (s->mb_intra) {
4192         if (!s->h263_aic) {
4193             if (n < 4)
4194                 q = s->y_dc_scale;
4195             else
4196                 q = s->c_dc_scale;
4197             q = q << 3;
4198         } else
4199             /* For AIC we skip quant/dequant of INTRADC */
4200             q = 1 << 3;
4201
4202         /* note: block[0] is assumed to be positive */
4203         block[0] = (block[0] + (q >> 1)) / q;
4204         start_i = 1;
4205         last_non_zero = 0;
4206         qmat = s->q_intra_matrix[qscale];
4207         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4208     } else {
4209         start_i = 0;
4210         last_non_zero = -1;
4211         qmat = s->q_inter_matrix[qscale];
4212         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4213     }
4214     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4215     threshold2= (threshold1<<1);
4216     for(i=63;i>=start_i;i--) {
4217         j = scantable[i];
4218         level = block[j] * qmat[j];
4219
4220         if(((unsigned)(level+threshold1))>threshold2){
4221             last_non_zero = i;
4222             break;
4223         }else{
4224             block[j]=0;
4225         }
4226     }
4227     for(i=start_i; i<=last_non_zero; i++) {
4228         j = scantable[i];
4229         level = block[j] * qmat[j];
4230
4231 //        if(   bias+level >= (1<<QMAT_SHIFT)
4232 //           || bias-level >= (1<<QMAT_SHIFT)){
4233         if(((unsigned)(level+threshold1))>threshold2){
4234             if(level>0){
4235                 level= (bias + level)>>QMAT_SHIFT;
4236                 block[j]= level;
4237             }else{
4238                 level= (bias - level)>>QMAT_SHIFT;
4239                 block[j]= -level;
4240             }
4241             max |=level;
4242         }else{
4243             block[j]=0;
4244         }
4245     }
4246     *overflow= s->max_qcoeff < max; //overflow might have happened
4247
4248     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4249     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4250         ff_block_permute(block, s->idsp.idct_permutation,
4251                          scantable, last_non_zero);
4252
4253     return last_non_zero;
4254 }
4255
4256 #define OFFSET(x) offsetof(MpegEncContext, x)
4257 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4258 static const AVOption h263_options[] = {
4259     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4260     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4261     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4262     FF_MPV_COMMON_OPTS
4263     { NULL },
4264 };
4265
4266 static const AVClass h263_class = {
4267     .class_name = "H.263 encoder",
4268     .item_name  = av_default_item_name,
4269     .option     = h263_options,
4270     .version    = LIBAVUTIL_VERSION_INT,
4271 };
4272
4273 AVCodec ff_h263_encoder = {
4274     .name           = "h263",
4275     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4276     .type           = AVMEDIA_TYPE_VIDEO,
4277     .id             = AV_CODEC_ID_H263,
4278     .priv_data_size = sizeof(MpegEncContext),
4279     .init           = ff_mpv_encode_init,
4280     .encode2        = ff_mpv_encode_picture,
4281     .close          = ff_mpv_encode_end,
4282     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4283     .priv_class     = &h263_class,
4284 };
4285
4286 static const AVOption h263p_options[] = {
4287     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4288     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4289     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4290     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4291     FF_MPV_COMMON_OPTS
4292     { NULL },
4293 };
4294 static const AVClass h263p_class = {
4295     .class_name = "H.263p encoder",
4296     .item_name  = av_default_item_name,
4297     .option     = h263p_options,
4298     .version    = LIBAVUTIL_VERSION_INT,
4299 };
4300
4301 AVCodec ff_h263p_encoder = {
4302     .name           = "h263p",
4303     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4304     .type           = AVMEDIA_TYPE_VIDEO,
4305     .id             = AV_CODEC_ID_H263P,
4306     .priv_data_size = sizeof(MpegEncContext),
4307     .init           = ff_mpv_encode_init,
4308     .encode2        = ff_mpv_encode_picture,
4309     .close          = ff_mpv_encode_end,
4310     .capabilities   = CODEC_CAP_SLICE_THREADS,
4311     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4312     .priv_class     = &h263p_class,
4313 };
4314
4315 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4316
4317 AVCodec ff_msmpeg4v2_encoder = {
4318     .name           = "msmpeg4v2",
4319     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4320     .type           = AVMEDIA_TYPE_VIDEO,
4321     .id             = AV_CODEC_ID_MSMPEG4V2,
4322     .priv_data_size = sizeof(MpegEncContext),
4323     .init           = ff_mpv_encode_init,
4324     .encode2        = ff_mpv_encode_picture,
4325     .close          = ff_mpv_encode_end,
4326     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4327     .priv_class     = &msmpeg4v2_class,
4328 };
4329
4330 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4331
4332 AVCodec ff_msmpeg4v3_encoder = {
4333     .name           = "msmpeg4",
4334     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4335     .type           = AVMEDIA_TYPE_VIDEO,
4336     .id             = AV_CODEC_ID_MSMPEG4V3,
4337     .priv_data_size = sizeof(MpegEncContext),
4338     .init           = ff_mpv_encode_init,
4339     .encode2        = ff_mpv_encode_picture,
4340     .close          = ff_mpv_encode_end,
4341     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4342     .priv_class     = &msmpeg4v3_class,
4343 };
4344
4345 FF_MPV_GENERIC_CLASS(wmv1)
4346
4347 AVCodec ff_wmv1_encoder = {
4348     .name           = "wmv1",
4349     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4350     .type           = AVMEDIA_TYPE_VIDEO,
4351     .id             = AV_CODEC_ID_WMV1,
4352     .priv_data_size = sizeof(MpegEncContext),
4353     .init           = ff_mpv_encode_init,
4354     .encode2        = ff_mpv_encode_picture,
4355     .close          = ff_mpv_encode_end,
4356     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4357     .priv_class     = &wmv1_class,
4358 };