]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
lavc: make rc_qmod_* into private options of mpegvideo encoders
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92 #if CONFIG_FAANDCT
93             fdsp->fdct == ff_faandct            ||
94 #endif /* CONFIG_FAANDCT */
95             fdsp->fdct == ff_jpeg_fdct_islow_10) {
96             for (i = 0; i < 64; i++) {
97                 const int j = s->idsp.idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
105                                         (qscale * quant_matrix[j]));
106             }
107         } else if (fdsp->fdct == ff_fdct_ifast) {
108             for (i = 0; i < 64; i++) {
109                 const int j = s->idsp.idct_permutation[i];
110                 /* 16 <= qscale * quant_matrix[i] <= 7905
111                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
112                  *             19952 <=              x  <= 249205026
113                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
114                  *           3444240 >= (1 << 36) / (x) >= 275 */
115
116                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
117                                         (ff_aanscales[i] * qscale *
118                                          quant_matrix[j]));
119             }
120         } else {
121             for (i = 0; i < 64; i++) {
122                 const int j = s->idsp.idct_permutation[i];
123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
124                  * Assume x = qscale * quant_matrix[i]
125                  * So             16 <=              x  <= 7905
126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
127                  * so          32768 >= (1 << 19) / (x) >= 67 */
128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
129                                         (qscale * quant_matrix[j]));
130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
131                 //                    (qscale * quant_matrix[i]);
132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
133                                        (qscale * quant_matrix[j]);
134
135                 if (qmat16[qscale][0][i] == 0 ||
136                     qmat16[qscale][0][i] == 128 * 256)
137                     qmat16[qscale][0][i] = 128 * 256 - 1;
138                 qmat16[qscale][1][i] =
139                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
140                                 qmat16[qscale][0][i]);
141             }
142         }
143
144         for (i = intra; i < 64; i++) {
145             int64_t max = 8191;
146             if (fdsp->fdct == ff_fdct_ifast) {
147                 max = (8191LL * ff_aanscales[i]) >> 14;
148             }
149             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
150                 shift++;
151             }
152         }
153     }
154     if (shift) {
155         av_log(NULL, AV_LOG_INFO,
156                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
157                QMAT_SHIFT - shift);
158     }
159 }
160
161 static inline void update_qscale(MpegEncContext *s)
162 {
163     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
164                 (FF_LAMBDA_SHIFT + 7);
165     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
166
167     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
168                  FF_LAMBDA_SHIFT;
169 }
170
171 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
172 {
173     int i;
174
175     if (matrix) {
176         put_bits(pb, 1, 1);
177         for (i = 0; i < 64; i++) {
178             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
179         }
180     } else
181         put_bits(pb, 1, 0);
182 }
183
184 /**
185  * init s->current_picture.qscale_table from s->lambda_table
186  */
187 void ff_init_qscale_tab(MpegEncContext *s)
188 {
189     int8_t * const qscale_table = s->current_picture.qscale_table;
190     int i;
191
192     for (i = 0; i < s->mb_num; i++) {
193         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
194         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
195         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
196                                                   s->avctx->qmax);
197     }
198 }
199
200 static void update_duplicate_context_after_me(MpegEncContext *dst,
201                                               MpegEncContext *src)
202 {
203 #define COPY(a) dst->a= src->a
204     COPY(pict_type);
205     COPY(current_picture);
206     COPY(f_code);
207     COPY(b_code);
208     COPY(qscale);
209     COPY(lambda);
210     COPY(lambda2);
211     COPY(picture_in_gop_number);
212     COPY(gop_picture_number);
213     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
214     COPY(progressive_frame);    // FIXME don't set in encode_header
215     COPY(partitioned_frame);    // FIXME don't set in encode_header
216 #undef COPY
217 }
218
219 /**
220  * Set the given MpegEncContext to defaults for encoding.
221  * the changed fields will not depend upon the prior state of the MpegEncContext.
222  */
223 static void mpv_encode_defaults(MpegEncContext *s)
224 {
225     int i;
226     ff_mpv_common_defaults(s);
227
228     for (i = -16; i < 16; i++) {
229         default_fcode_tab[i + MAX_MV] = 1;
230     }
231     s->me.mv_penalty = default_mv_penalty;
232     s->fcode_tab     = default_fcode_tab;
233
234     s->input_picture_number  = 0;
235     s->picture_in_gop_number = 0;
236 }
237
238 /* init video encoder */
239 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
240 {
241     MpegEncContext *s = avctx->priv_data;
242     int i, ret, format_supported;
243
244     mpv_encode_defaults(s);
245
246     switch (avctx->codec_id) {
247     case AV_CODEC_ID_MPEG2VIDEO:
248         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
249             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
250             av_log(avctx, AV_LOG_ERROR,
251                    "only YUV420 and YUV422 are supported\n");
252             return -1;
253         }
254         break;
255     case AV_CODEC_ID_MJPEG:
256         format_supported = 0;
257         /* JPEG color space */
258         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
259             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
260             (avctx->color_range == AVCOL_RANGE_JPEG &&
261              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
262               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
263             format_supported = 1;
264         /* MPEG color space */
265         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
266                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
267                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
268             format_supported = 1;
269
270         if (!format_supported) {
271             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
272             return -1;
273         }
274         break;
275     default:
276         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
277             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
278             return -1;
279         }
280     }
281
282     switch (avctx->pix_fmt) {
283     case AV_PIX_FMT_YUVJ422P:
284     case AV_PIX_FMT_YUV422P:
285         s->chroma_format = CHROMA_422;
286         break;
287     case AV_PIX_FMT_YUVJ420P:
288     case AV_PIX_FMT_YUV420P:
289     default:
290         s->chroma_format = CHROMA_420;
291         break;
292     }
293
294     s->bit_rate = avctx->bit_rate;
295     s->width    = avctx->width;
296     s->height   = avctx->height;
297     if (avctx->gop_size > 600 &&
298         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
299         av_log(avctx, AV_LOG_ERROR,
300                "Warning keyframe interval too large! reducing it ...\n");
301         avctx->gop_size = 600;
302     }
303     s->gop_size     = avctx->gop_size;
304     s->avctx        = avctx;
305     s->flags        = avctx->flags;
306     s->flags2       = avctx->flags2;
307     if (avctx->max_b_frames > MAX_B_FRAMES) {
308         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
309                "is %d.\n", MAX_B_FRAMES);
310     }
311     s->max_b_frames = avctx->max_b_frames;
312     s->codec_id     = avctx->codec->id;
313     s->strict_std_compliance = avctx->strict_std_compliance;
314     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
315     s->mpeg_quant         = avctx->mpeg_quant;
316     s->rtp_mode           = !!avctx->rtp_payload_size;
317     s->intra_dc_precision = avctx->intra_dc_precision;
318     s->user_specified_pts = AV_NOPTS_VALUE;
319
320     if (s->gop_size <= 1) {
321         s->intra_only = 1;
322         s->gop_size   = 12;
323     } else {
324         s->intra_only = 0;
325     }
326
327     s->me_method = avctx->me_method;
328
329     /* Fixed QSCALE */
330     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
331
332     s->adaptive_quant = (s->avctx->lumi_masking ||
333                          s->avctx->dark_masking ||
334                          s->avctx->temporal_cplx_masking ||
335                          s->avctx->spatial_cplx_masking  ||
336                          s->avctx->p_masking      ||
337                          s->avctx->border_masking ||
338                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
339                         !s->fixed_qscale;
340
341     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
342
343     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
344         av_log(avctx, AV_LOG_ERROR,
345                "a vbv buffer size is needed, "
346                "for encoding with a maximum bitrate\n");
347         return -1;
348     }
349
350     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
351         av_log(avctx, AV_LOG_INFO,
352                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
353     }
354
355     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
356         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
357         return -1;
358     }
359
360     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
361         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
362         return -1;
363     }
364
365     if (avctx->rc_max_rate &&
366         avctx->rc_max_rate == avctx->bit_rate &&
367         avctx->rc_max_rate != avctx->rc_min_rate) {
368         av_log(avctx, AV_LOG_INFO,
369                "impossible bitrate constraints, this will fail\n");
370     }
371
372     if (avctx->rc_buffer_size &&
373         avctx->bit_rate * (int64_t)avctx->time_base.num >
374             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
375         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
376         return -1;
377     }
378
379     if (!s->fixed_qscale &&
380         avctx->bit_rate * av_q2d(avctx->time_base) >
381             avctx->bit_rate_tolerance) {
382         av_log(avctx, AV_LOG_ERROR,
383                "bitrate tolerance too small for bitrate\n");
384         return -1;
385     }
386
387     if (s->avctx->rc_max_rate &&
388         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
389         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
390          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
391         90000LL * (avctx->rc_buffer_size - 1) >
392             s->avctx->rc_max_rate * 0xFFFFLL) {
393         av_log(avctx, AV_LOG_INFO,
394                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
395                "specified vbv buffer is too large for the given bitrate!\n");
396     }
397
398     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
399         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
400         s->codec_id != AV_CODEC_ID_FLV1) {
401         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
402         return -1;
403     }
404
405     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
406         av_log(avctx, AV_LOG_ERROR,
407                "OBMC is only supported with simple mb decision\n");
408         return -1;
409     }
410
411     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
412         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
413         return -1;
414     }
415
416     if (s->max_b_frames                    &&
417         s->codec_id != AV_CODEC_ID_MPEG4      &&
418         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
419         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
420         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
421         return -1;
422     }
423
424     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
425          s->codec_id == AV_CODEC_ID_H263  ||
426          s->codec_id == AV_CODEC_ID_H263P) &&
427         (avctx->sample_aspect_ratio.num > 255 ||
428          avctx->sample_aspect_ratio.den > 255)) {
429         av_log(avctx, AV_LOG_ERROR,
430                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
431                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
432         return -1;
433     }
434
435     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
436         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
437         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
438         return -1;
439     }
440
441     // FIXME mpeg2 uses that too
442     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
443         av_log(avctx, AV_LOG_ERROR,
444                "mpeg2 style quantization not supported by codec\n");
445         return -1;
446     }
447
448     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
449         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
450         return -1;
451     }
452
453     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
454         s->avctx->mb_decision != FF_MB_DECISION_RD) {
455         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
456         return -1;
457     }
458
459     if (s->avctx->scenechange_threshold < 1000000000 &&
460         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
461         av_log(avctx, AV_LOG_ERROR,
462                "closed gop with scene change detection are not supported yet, "
463                "set threshold to 1000000000\n");
464         return -1;
465     }
466
467     if (s->flags & CODEC_FLAG_LOW_DELAY) {
468         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
469             av_log(avctx, AV_LOG_ERROR,
470                   "low delay forcing is only available for mpeg2\n");
471             return -1;
472         }
473         if (s->max_b_frames != 0) {
474             av_log(avctx, AV_LOG_ERROR,
475                    "b frames cannot be used with low delay\n");
476             return -1;
477         }
478     }
479
480     if (s->q_scale_type == 1) {
481         if (avctx->qmax > 12) {
482             av_log(avctx, AV_LOG_ERROR,
483                    "non linear quant only supports qmax <= 12 currently\n");
484             return -1;
485         }
486     }
487
488     if (s->avctx->thread_count > 1         &&
489         s->codec_id != AV_CODEC_ID_MPEG4      &&
490         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
491         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
492         (s->codec_id != AV_CODEC_ID_H263P)) {
493         av_log(avctx, AV_LOG_ERROR,
494                "multi threaded encoding not supported by codec\n");
495         return -1;
496     }
497
498     if (s->avctx->thread_count < 1) {
499         av_log(avctx, AV_LOG_ERROR,
500                "automatic thread number detection not supported by codec,"
501                "patch welcome\n");
502         return -1;
503     }
504
505     if (s->avctx->thread_count > 1)
506         s->rtp_mode = 1;
507
508     if (!avctx->time_base.den || !avctx->time_base.num) {
509         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
510         return -1;
511     }
512
513     i = (INT_MAX / 2 + 128) >> 8;
514     if (avctx->mb_threshold >= i) {
515         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
516                i - 1);
517         return -1;
518     }
519
520     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
521         av_log(avctx, AV_LOG_INFO,
522                "notice: b_frame_strategy only affects the first pass\n");
523         avctx->b_frame_strategy = 0;
524     }
525
526     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
527     if (i > 1) {
528         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
529         avctx->time_base.den /= i;
530         avctx->time_base.num /= i;
531         //return -1;
532     }
533
534     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
535         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
536         // (a + x * 3 / 8) / x
537         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
538         s->inter_quant_bias = 0;
539     } else {
540         s->intra_quant_bias = 0;
541         // (a - x / 4) / x
542         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
543     }
544
545     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
546         s->intra_quant_bias = avctx->intra_quant_bias;
547     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
548         s->inter_quant_bias = avctx->inter_quant_bias;
549
550     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
551         s->avctx->time_base.den > (1 << 16) - 1) {
552         av_log(avctx, AV_LOG_ERROR,
553                "timebase %d/%d not supported by MPEG 4 standard, "
554                "the maximum admitted value for the timebase denominator "
555                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
556                (1 << 16) - 1);
557         return -1;
558     }
559     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
560
561     switch (avctx->codec->id) {
562     case AV_CODEC_ID_MPEG1VIDEO:
563         s->out_format = FMT_MPEG1;
564         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
565         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
566         break;
567     case AV_CODEC_ID_MPEG2VIDEO:
568         s->out_format = FMT_MPEG1;
569         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
570         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
571         s->rtp_mode   = 1;
572         break;
573     case AV_CODEC_ID_MJPEG:
574         s->out_format = FMT_MJPEG;
575         s->intra_only = 1; /* force intra only for jpeg */
576         if (!CONFIG_MJPEG_ENCODER ||
577             ff_mjpeg_encode_init(s) < 0)
578             return -1;
579         avctx->delay = 0;
580         s->low_delay = 1;
581         break;
582     case AV_CODEC_ID_H261:
583         if (!CONFIG_H261_ENCODER)
584             return -1;
585         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
586             av_log(avctx, AV_LOG_ERROR,
587                    "The specified picture size of %dx%d is not valid for the "
588                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
589                     s->width, s->height);
590             return -1;
591         }
592         s->out_format = FMT_H261;
593         avctx->delay  = 0;
594         s->low_delay  = 1;
595         break;
596     case AV_CODEC_ID_H263:
597         if (!CONFIG_H263_ENCODER)
598         return -1;
599         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
600                              s->width, s->height) == 8) {
601             av_log(avctx, AV_LOG_INFO,
602                    "The specified picture size of %dx%d is not valid for "
603                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
604                    "352x288, 704x576, and 1408x1152."
605                    "Try H.263+.\n", s->width, s->height);
606             return -1;
607         }
608         s->out_format = FMT_H263;
609         avctx->delay  = 0;
610         s->low_delay  = 1;
611         break;
612     case AV_CODEC_ID_H263P:
613         s->out_format = FMT_H263;
614         s->h263_plus  = 1;
615         /* Fx */
616         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
617         s->modified_quant  = s->h263_aic;
618         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
619         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
620
621         /* /Fx */
622         /* These are just to be sure */
623         avctx->delay = 0;
624         s->low_delay = 1;
625         break;
626     case AV_CODEC_ID_FLV1:
627         s->out_format      = FMT_H263;
628         s->h263_flv        = 2; /* format = 1; 11-bit codes */
629         s->unrestricted_mv = 1;
630         s->rtp_mode  = 0; /* don't allow GOB */
631         avctx->delay = 0;
632         s->low_delay = 1;
633         break;
634     case AV_CODEC_ID_RV10:
635         s->out_format = FMT_H263;
636         avctx->delay  = 0;
637         s->low_delay  = 1;
638         break;
639     case AV_CODEC_ID_RV20:
640         s->out_format      = FMT_H263;
641         avctx->delay       = 0;
642         s->low_delay       = 1;
643         s->modified_quant  = 1;
644         s->h263_aic        = 1;
645         s->h263_plus       = 1;
646         s->loop_filter     = 1;
647         s->unrestricted_mv = 0;
648         break;
649     case AV_CODEC_ID_MPEG4:
650         s->out_format      = FMT_H263;
651         s->h263_pred       = 1;
652         s->unrestricted_mv = 1;
653         s->low_delay       = s->max_b_frames ? 0 : 1;
654         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
655         break;
656     case AV_CODEC_ID_MSMPEG4V2:
657         s->out_format      = FMT_H263;
658         s->h263_pred       = 1;
659         s->unrestricted_mv = 1;
660         s->msmpeg4_version = 2;
661         avctx->delay       = 0;
662         s->low_delay       = 1;
663         break;
664     case AV_CODEC_ID_MSMPEG4V3:
665         s->out_format        = FMT_H263;
666         s->h263_pred         = 1;
667         s->unrestricted_mv   = 1;
668         s->msmpeg4_version   = 3;
669         s->flipflop_rounding = 1;
670         avctx->delay         = 0;
671         s->low_delay         = 1;
672         break;
673     case AV_CODEC_ID_WMV1:
674         s->out_format        = FMT_H263;
675         s->h263_pred         = 1;
676         s->unrestricted_mv   = 1;
677         s->msmpeg4_version   = 4;
678         s->flipflop_rounding = 1;
679         avctx->delay         = 0;
680         s->low_delay         = 1;
681         break;
682     case AV_CODEC_ID_WMV2:
683         s->out_format        = FMT_H263;
684         s->h263_pred         = 1;
685         s->unrestricted_mv   = 1;
686         s->msmpeg4_version   = 5;
687         s->flipflop_rounding = 1;
688         avctx->delay         = 0;
689         s->low_delay         = 1;
690         break;
691     default:
692         return -1;
693     }
694
695     avctx->has_b_frames = !s->low_delay;
696
697     s->encoding = 1;
698
699     s->progressive_frame    =
700     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
701                                                 CODEC_FLAG_INTERLACED_ME) ||
702                                 s->alternate_scan);
703
704     /* init */
705     ff_mpv_idct_init(s);
706     if (ff_mpv_common_init(s) < 0)
707         return -1;
708
709     if (ARCH_X86)
710         ff_mpv_encode_init_x86(s);
711
712     ff_fdctdsp_init(&s->fdsp, avctx);
713     ff_me_cmp_init(&s->mecc, avctx);
714     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
715     ff_pixblockdsp_init(&s->pdsp, avctx);
716     ff_qpeldsp_init(&s->qdsp);
717
718     s->avctx->coded_frame = s->current_picture.f;
719
720     if (s->msmpeg4_version) {
721         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
722                           2 * 2 * (MAX_LEVEL + 1) *
723                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
724     }
725     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
726
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
730     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
731     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
732                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
733     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
734                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
735
736     if (s->avctx->noise_reduction) {
737         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
738                           2 * 64 * sizeof(uint16_t), fail);
739     }
740
741     if (CONFIG_H263_ENCODER)
742         ff_h263dsp_init(&s->h263dsp);
743     if (!s->dct_quantize)
744         s->dct_quantize = ff_dct_quantize_c;
745     if (!s->denoise_dct)
746         s->denoise_dct  = denoise_dct_c;
747     s->fast_dct_quantize = s->dct_quantize;
748     if (avctx->trellis)
749         s->dct_quantize  = dct_quantize_trellis_c;
750
751     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
752         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
753
754     s->quant_precision = 5;
755
756     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
757     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
758
759     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
760         ff_h261_encode_init(s);
761     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
762         ff_h263_encode_init(s);
763     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
764         ff_msmpeg4_encode_init(s);
765     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
766         && s->out_format == FMT_MPEG1)
767         ff_mpeg1_encode_init(s);
768
769     /* init q matrix */
770     for (i = 0; i < 64; i++) {
771         int j = s->idsp.idct_permutation[i];
772         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
773             s->mpeg_quant) {
774             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
775             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
776         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
777             s->intra_matrix[j] =
778             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
779         } else {
780             /* mpeg1/2 */
781             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
782             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
783         }
784         if (s->avctx->intra_matrix)
785             s->intra_matrix[j] = s->avctx->intra_matrix[i];
786         if (s->avctx->inter_matrix)
787             s->inter_matrix[j] = s->avctx->inter_matrix[i];
788     }
789
790     /* precompute matrix */
791     /* for mjpeg, we do include qscale in the matrix */
792     if (s->out_format != FMT_MJPEG) {
793         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
794                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
795                           31, 1);
796         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
797                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
798                           31, 0);
799     }
800
801     if (ff_rate_control_init(s) < 0)
802         return -1;
803
804 #if FF_API_ERROR_RATE
805     FF_DISABLE_DEPRECATION_WARNINGS
806     if (avctx->error_rate)
807         s->error_rate = avctx->error_rate;
808     FF_ENABLE_DEPRECATION_WARNINGS;
809 #endif
810
811 #if FF_API_NORMALIZE_AQP
812     FF_DISABLE_DEPRECATION_WARNINGS
813     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
814         s->mpv_flags |= FF_MPV_FLAG_NAQ;
815     FF_ENABLE_DEPRECATION_WARNINGS;
816 #endif
817
818 #if FF_API_MV0
819     FF_DISABLE_DEPRECATION_WARNINGS
820     if (avctx->flags & CODEC_FLAG_MV0)
821         s->mpv_flags |= FF_MPV_FLAG_MV0;
822     FF_ENABLE_DEPRECATION_WARNINGS
823 #endif
824
825 #if FF_API_MPV_OPT
826     FF_DISABLE_DEPRECATION_WARNINGS
827     if (avctx->rc_qsquish != 0.0)
828         s->rc_qsquish = avctx->rc_qsquish;
829     if (avctx->rc_qmod_amp != 0.0)
830         s->rc_qmod_amp = avctx->rc_qmod_amp;
831     if (avctx->rc_qmod_freq)
832         s->rc_qmod_freq = avctx->rc_qmod_freq;
833     FF_ENABLE_DEPRECATION_WARNINGS
834 #endif
835
836     if (avctx->b_frame_strategy == 2) {
837         for (i = 0; i < s->max_b_frames + 2; i++) {
838             s->tmp_frames[i] = av_frame_alloc();
839             if (!s->tmp_frames[i])
840                 return AVERROR(ENOMEM);
841
842             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
843             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
844             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
845
846             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
847             if (ret < 0)
848                 return ret;
849         }
850     }
851
852     return 0;
853 fail:
854     ff_mpv_encode_end(avctx);
855     return AVERROR_UNKNOWN;
856 }
857
858 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
859 {
860     MpegEncContext *s = avctx->priv_data;
861     int i;
862
863     ff_rate_control_uninit(s);
864
865     ff_mpv_common_end(s);
866     if (CONFIG_MJPEG_ENCODER &&
867         s->out_format == FMT_MJPEG)
868         ff_mjpeg_encode_close(s);
869
870     av_freep(&avctx->extradata);
871
872     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
873         av_frame_free(&s->tmp_frames[i]);
874
875     ff_free_picture_tables(&s->new_picture);
876     ff_mpeg_unref_picture(s, &s->new_picture);
877
878     av_freep(&s->avctx->stats_out);
879     av_freep(&s->ac_stats);
880
881     av_freep(&s->q_intra_matrix);
882     av_freep(&s->q_inter_matrix);
883     av_freep(&s->q_intra_matrix16);
884     av_freep(&s->q_inter_matrix16);
885     av_freep(&s->input_picture);
886     av_freep(&s->reordered_input_picture);
887     av_freep(&s->dct_offset);
888
889     return 0;
890 }
891
892 static int get_sae(uint8_t *src, int ref, int stride)
893 {
894     int x,y;
895     int acc = 0;
896
897     for (y = 0; y < 16; y++) {
898         for (x = 0; x < 16; x++) {
899             acc += FFABS(src[x + y * stride] - ref);
900         }
901     }
902
903     return acc;
904 }
905
906 static int get_intra_count(MpegEncContext *s, uint8_t *src,
907                            uint8_t *ref, int stride)
908 {
909     int x, y, w, h;
910     int acc = 0;
911
912     w = s->width  & ~15;
913     h = s->height & ~15;
914
915     for (y = 0; y < h; y += 16) {
916         for (x = 0; x < w; x += 16) {
917             int offset = x + y * stride;
918             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
919                                       stride, 16);
920             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
921             int sae  = get_sae(src + offset, mean, stride);
922
923             acc += sae + 500 < sad;
924         }
925     }
926     return acc;
927 }
928
929
930 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
931 {
932     Picture *pic = NULL;
933     int64_t pts;
934     int i, display_picture_number = 0, ret;
935     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
936                                                  (s->low_delay ? 0 : 1);
937     int direct = 1;
938
939     if (pic_arg) {
940         pts = pic_arg->pts;
941         display_picture_number = s->input_picture_number++;
942
943         if (pts != AV_NOPTS_VALUE) {
944             if (s->user_specified_pts != AV_NOPTS_VALUE) {
945                 int64_t time = pts;
946                 int64_t last = s->user_specified_pts;
947
948                 if (time <= last) {
949                     av_log(s->avctx, AV_LOG_ERROR,
950                            "Error, Invalid timestamp=%"PRId64", "
951                            "last=%"PRId64"\n", pts, s->user_specified_pts);
952                     return -1;
953                 }
954
955                 if (!s->low_delay && display_picture_number == 1)
956                     s->dts_delta = time - last;
957             }
958             s->user_specified_pts = pts;
959         } else {
960             if (s->user_specified_pts != AV_NOPTS_VALUE) {
961                 s->user_specified_pts =
962                 pts = s->user_specified_pts + 1;
963                 av_log(s->avctx, AV_LOG_INFO,
964                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
965                        pts);
966             } else {
967                 pts = display_picture_number;
968             }
969         }
970     }
971
972     if (pic_arg) {
973         if (!pic_arg->buf[0]);
974             direct = 0;
975         if (pic_arg->linesize[0] != s->linesize)
976             direct = 0;
977         if (pic_arg->linesize[1] != s->uvlinesize)
978             direct = 0;
979         if (pic_arg->linesize[2] != s->uvlinesize)
980             direct = 0;
981
982         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
983                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
984
985         if (direct) {
986             i = ff_find_unused_picture(s, 1);
987             if (i < 0)
988                 return i;
989
990             pic = &s->picture[i];
991             pic->reference = 3;
992
993             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
994                 return ret;
995             if (ff_alloc_picture(s, pic, 1) < 0) {
996                 return -1;
997             }
998         } else {
999             i = ff_find_unused_picture(s, 0);
1000             if (i < 0)
1001                 return i;
1002
1003             pic = &s->picture[i];
1004             pic->reference = 3;
1005
1006             if (ff_alloc_picture(s, pic, 0) < 0) {
1007                 return -1;
1008             }
1009
1010             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1011                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1012                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1013                 // empty
1014             } else {
1015                 int h_chroma_shift, v_chroma_shift;
1016                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1017                                                  &h_chroma_shift,
1018                                                  &v_chroma_shift);
1019
1020                 for (i = 0; i < 3; i++) {
1021                     int src_stride = pic_arg->linesize[i];
1022                     int dst_stride = i ? s->uvlinesize : s->linesize;
1023                     int h_shift = i ? h_chroma_shift : 0;
1024                     int v_shift = i ? v_chroma_shift : 0;
1025                     int w = s->width  >> h_shift;
1026                     int h = s->height >> v_shift;
1027                     uint8_t *src = pic_arg->data[i];
1028                     uint8_t *dst = pic->f->data[i];
1029
1030                     if (!s->avctx->rc_buffer_size)
1031                         dst += INPLACE_OFFSET;
1032
1033                     if (src_stride == dst_stride)
1034                         memcpy(dst, src, src_stride * h);
1035                     else {
1036                         while (h--) {
1037                             memcpy(dst, src, w);
1038                             dst += dst_stride;
1039                             src += src_stride;
1040                         }
1041                     }
1042                 }
1043             }
1044         }
1045         ret = av_frame_copy_props(pic->f, pic_arg);
1046         if (ret < 0)
1047             return ret;
1048
1049         pic->f->display_picture_number = display_picture_number;
1050         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1051     }
1052
1053     /* shift buffer entries */
1054     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1055         s->input_picture[i - 1] = s->input_picture[i];
1056
1057     s->input_picture[encoding_delay] = (Picture*) pic;
1058
1059     return 0;
1060 }
1061
1062 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1063 {
1064     int x, y, plane;
1065     int score = 0;
1066     int64_t score64 = 0;
1067
1068     for (plane = 0; plane < 3; plane++) {
1069         const int stride = p->f->linesize[plane];
1070         const int bw = plane ? 1 : 2;
1071         for (y = 0; y < s->mb_height * bw; y++) {
1072             for (x = 0; x < s->mb_width * bw; x++) {
1073                 int off = p->shared ? 0 : 16;
1074                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1075                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1076                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1077
1078                 switch (s->avctx->frame_skip_exp) {
1079                 case 0: score    =  FFMAX(score, v);          break;
1080                 case 1: score   += FFABS(v);                  break;
1081                 case 2: score   += v * v;                     break;
1082                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1083                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1084                 }
1085             }
1086         }
1087     }
1088
1089     if (score)
1090         score64 = score;
1091
1092     if (score64 < s->avctx->frame_skip_threshold)
1093         return 1;
1094     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1095         return 1;
1096     return 0;
1097 }
1098
1099 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1100 {
1101     AVPacket pkt = { 0 };
1102     int ret, got_output;
1103
1104     av_init_packet(&pkt);
1105     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1106     if (ret < 0)
1107         return ret;
1108
1109     ret = pkt.size;
1110     av_free_packet(&pkt);
1111     return ret;
1112 }
1113
1114 static int estimate_best_b_count(MpegEncContext *s)
1115 {
1116     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1117     AVCodecContext *c = avcodec_alloc_context3(NULL);
1118     const int scale = s->avctx->brd_scale;
1119     int i, j, out_size, p_lambda, b_lambda, lambda2;
1120     int64_t best_rd  = INT64_MAX;
1121     int best_b_count = -1;
1122
1123     assert(scale >= 0 && scale <= 3);
1124
1125     //emms_c();
1126     //s->next_picture_ptr->quality;
1127     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1128     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1129     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1130     if (!b_lambda) // FIXME we should do this somewhere else
1131         b_lambda = p_lambda;
1132     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1133                FF_LAMBDA_SHIFT;
1134
1135     c->width        = s->width  >> scale;
1136     c->height       = s->height >> scale;
1137     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1138     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1139     c->mb_decision  = s->avctx->mb_decision;
1140     c->me_cmp       = s->avctx->me_cmp;
1141     c->mb_cmp       = s->avctx->mb_cmp;
1142     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1143     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1144     c->time_base    = s->avctx->time_base;
1145     c->max_b_frames = s->max_b_frames;
1146
1147     if (avcodec_open2(c, codec, NULL) < 0)
1148         return -1;
1149
1150     for (i = 0; i < s->max_b_frames + 2; i++) {
1151         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1152                                                 s->next_picture_ptr;
1153
1154         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1155             pre_input = *pre_input_ptr;
1156
1157             if (!pre_input.shared && i) {
1158                 pre_input.f->data[0] += INPLACE_OFFSET;
1159                 pre_input.f->data[1] += INPLACE_OFFSET;
1160                 pre_input.f->data[2] += INPLACE_OFFSET;
1161             }
1162
1163             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1164                                        s->tmp_frames[i]->linesize[0],
1165                                        pre_input.f->data[0],
1166                                        pre_input.f->linesize[0],
1167                                        c->width, c->height);
1168             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1169                                        s->tmp_frames[i]->linesize[1],
1170                                        pre_input.f->data[1],
1171                                        pre_input.f->linesize[1],
1172                                        c->width >> 1, c->height >> 1);
1173             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1174                                        s->tmp_frames[i]->linesize[2],
1175                                        pre_input.f->data[2],
1176                                        pre_input.f->linesize[2],
1177                                        c->width >> 1, c->height >> 1);
1178         }
1179     }
1180
1181     for (j = 0; j < s->max_b_frames + 1; j++) {
1182         int64_t rd = 0;
1183
1184         if (!s->input_picture[j])
1185             break;
1186
1187         c->error[0] = c->error[1] = c->error[2] = 0;
1188
1189         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1190         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1191
1192         out_size = encode_frame(c, s->tmp_frames[0]);
1193
1194         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1195
1196         for (i = 0; i < s->max_b_frames + 1; i++) {
1197             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1198
1199             s->tmp_frames[i + 1]->pict_type = is_p ?
1200                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1201             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1202
1203             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1204
1205             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1206         }
1207
1208         /* get the delayed frames */
1209         while (out_size) {
1210             out_size = encode_frame(c, NULL);
1211             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1212         }
1213
1214         rd += c->error[0] + c->error[1] + c->error[2];
1215
1216         if (rd < best_rd) {
1217             best_rd = rd;
1218             best_b_count = j;
1219         }
1220     }
1221
1222     avcodec_close(c);
1223     av_freep(&c);
1224
1225     return best_b_count;
1226 }
1227
1228 static int select_input_picture(MpegEncContext *s)
1229 {
1230     int i, ret;
1231
1232     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1233         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1234     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1235
1236     /* set next picture type & ordering */
1237     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1238         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1239             !s->next_picture_ptr || s->intra_only) {
1240             s->reordered_input_picture[0] = s->input_picture[0];
1241             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1242             s->reordered_input_picture[0]->f->coded_picture_number =
1243                 s->coded_picture_number++;
1244         } else {
1245             int b_frames;
1246
1247             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1248                 if (s->picture_in_gop_number < s->gop_size &&
1249                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1250                     // FIXME check that te gop check above is +-1 correct
1251                     av_frame_unref(s->input_picture[0]->f);
1252
1253                     emms_c();
1254                     ff_vbv_update(s, 0);
1255
1256                     goto no_output_pic;
1257                 }
1258             }
1259
1260             if (s->flags & CODEC_FLAG_PASS2) {
1261                 for (i = 0; i < s->max_b_frames + 1; i++) {
1262                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1263
1264                     if (pict_num >= s->rc_context.num_entries)
1265                         break;
1266                     if (!s->input_picture[i]) {
1267                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1268                         break;
1269                     }
1270
1271                     s->input_picture[i]->f->pict_type =
1272                         s->rc_context.entry[pict_num].new_pict_type;
1273                 }
1274             }
1275
1276             if (s->avctx->b_frame_strategy == 0) {
1277                 b_frames = s->max_b_frames;
1278                 while (b_frames && !s->input_picture[b_frames])
1279                     b_frames--;
1280             } else if (s->avctx->b_frame_strategy == 1) {
1281                 for (i = 1; i < s->max_b_frames + 1; i++) {
1282                     if (s->input_picture[i] &&
1283                         s->input_picture[i]->b_frame_score == 0) {
1284                         s->input_picture[i]->b_frame_score =
1285                             get_intra_count(s,
1286                                             s->input_picture[i    ]->f->data[0],
1287                                             s->input_picture[i - 1]->f->data[0],
1288                                             s->linesize) + 1;
1289                     }
1290                 }
1291                 for (i = 0; i < s->max_b_frames + 1; i++) {
1292                     if (!s->input_picture[i] ||
1293                         s->input_picture[i]->b_frame_score - 1 >
1294                             s->mb_num / s->avctx->b_sensitivity)
1295                         break;
1296                 }
1297
1298                 b_frames = FFMAX(0, i - 1);
1299
1300                 /* reset scores */
1301                 for (i = 0; i < b_frames + 1; i++) {
1302                     s->input_picture[i]->b_frame_score = 0;
1303                 }
1304             } else if (s->avctx->b_frame_strategy == 2) {
1305                 b_frames = estimate_best_b_count(s);
1306             } else {
1307                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1308                 b_frames = 0;
1309             }
1310
1311             emms_c();
1312
1313             for (i = b_frames - 1; i >= 0; i--) {
1314                 int type = s->input_picture[i]->f->pict_type;
1315                 if (type && type != AV_PICTURE_TYPE_B)
1316                     b_frames = i;
1317             }
1318             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1319                 b_frames == s->max_b_frames) {
1320                 av_log(s->avctx, AV_LOG_ERROR,
1321                        "warning, too many b frames in a row\n");
1322             }
1323
1324             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1325                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1326                     s->gop_size > s->picture_in_gop_number) {
1327                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1328                 } else {
1329                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1330                         b_frames = 0;
1331                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1332                 }
1333             }
1334
1335             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1336                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1337                 b_frames--;
1338
1339             s->reordered_input_picture[0] = s->input_picture[b_frames];
1340             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1341                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1342             s->reordered_input_picture[0]->f->coded_picture_number =
1343                 s->coded_picture_number++;
1344             for (i = 0; i < b_frames; i++) {
1345                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1346                 s->reordered_input_picture[i + 1]->f->pict_type =
1347                     AV_PICTURE_TYPE_B;
1348                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1349                     s->coded_picture_number++;
1350             }
1351         }
1352     }
1353 no_output_pic:
1354     if (s->reordered_input_picture[0]) {
1355         s->reordered_input_picture[0]->reference =
1356            s->reordered_input_picture[0]->f->pict_type !=
1357                AV_PICTURE_TYPE_B ? 3 : 0;
1358
1359         ff_mpeg_unref_picture(s, &s->new_picture);
1360         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1361             return ret;
1362
1363         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1364             // input is a shared pix, so we can't modifiy it -> alloc a new
1365             // one & ensure that the shared one is reuseable
1366
1367             Picture *pic;
1368             int i = ff_find_unused_picture(s, 0);
1369             if (i < 0)
1370                 return i;
1371             pic = &s->picture[i];
1372
1373             pic->reference = s->reordered_input_picture[0]->reference;
1374             if (ff_alloc_picture(s, pic, 0) < 0) {
1375                 return -1;
1376             }
1377
1378             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1379             if (ret < 0)
1380                 return ret;
1381
1382             /* mark us unused / free shared pic */
1383             av_frame_unref(s->reordered_input_picture[0]->f);
1384             s->reordered_input_picture[0]->shared = 0;
1385
1386             s->current_picture_ptr = pic;
1387         } else {
1388             // input is not a shared pix -> reuse buffer for current_pix
1389             s->current_picture_ptr = s->reordered_input_picture[0];
1390             for (i = 0; i < 4; i++) {
1391                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1392             }
1393         }
1394         ff_mpeg_unref_picture(s, &s->current_picture);
1395         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1396                                        s->current_picture_ptr)) < 0)
1397             return ret;
1398
1399         s->picture_number = s->new_picture.f->display_picture_number;
1400     } else {
1401         ff_mpeg_unref_picture(s, &s->new_picture);
1402     }
1403     return 0;
1404 }
1405
1406 static void frame_end(MpegEncContext *s)
1407 {
1408     int i;
1409
1410     if (s->unrestricted_mv &&
1411         s->current_picture.reference &&
1412         !s->intra_only) {
1413         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1414         int hshift = desc->log2_chroma_w;
1415         int vshift = desc->log2_chroma_h;
1416         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1417                                 s->h_edge_pos, s->v_edge_pos,
1418                                 EDGE_WIDTH, EDGE_WIDTH,
1419                                 EDGE_TOP | EDGE_BOTTOM);
1420         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1421                                 s->h_edge_pos >> hshift,
1422                                 s->v_edge_pos >> vshift,
1423                                 EDGE_WIDTH >> hshift,
1424                                 EDGE_WIDTH >> vshift,
1425                                 EDGE_TOP | EDGE_BOTTOM);
1426         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1427                                 s->h_edge_pos >> hshift,
1428                                 s->v_edge_pos >> vshift,
1429                                 EDGE_WIDTH >> hshift,
1430                                 EDGE_WIDTH >> vshift,
1431                                 EDGE_TOP | EDGE_BOTTOM);
1432     }
1433
1434     emms_c();
1435
1436     s->last_pict_type                 = s->pict_type;
1437     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1438     if (s->pict_type!= AV_PICTURE_TYPE_B)
1439         s->last_non_b_pict_type = s->pict_type;
1440
1441     if (s->encoding) {
1442         /* release non-reference frames */
1443         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1444             if (!s->picture[i].reference)
1445                 ff_mpeg_unref_picture(s, &s->picture[i]);
1446         }
1447     }
1448
1449     s->avctx->coded_frame = s->current_picture_ptr->f;
1450
1451 }
1452
1453 static void update_noise_reduction(MpegEncContext *s)
1454 {
1455     int intra, i;
1456
1457     for (intra = 0; intra < 2; intra++) {
1458         if (s->dct_count[intra] > (1 << 16)) {
1459             for (i = 0; i < 64; i++) {
1460                 s->dct_error_sum[intra][i] >>= 1;
1461             }
1462             s->dct_count[intra] >>= 1;
1463         }
1464
1465         for (i = 0; i < 64; i++) {
1466             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1467                                        s->dct_count[intra] +
1468                                        s->dct_error_sum[intra][i] / 2) /
1469                                       (s->dct_error_sum[intra][i] + 1);
1470         }
1471     }
1472 }
1473
1474 static int frame_start(MpegEncContext *s)
1475 {
1476     int ret;
1477
1478     /* mark & release old frames */
1479     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1480         s->last_picture_ptr != s->next_picture_ptr &&
1481         s->last_picture_ptr->f->buf[0]) {
1482         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1483     }
1484
1485     s->current_picture_ptr->f->pict_type = s->pict_type;
1486     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1487
1488     ff_mpeg_unref_picture(s, &s->current_picture);
1489     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1490                                    s->current_picture_ptr)) < 0)
1491         return ret;
1492
1493     if (s->pict_type != AV_PICTURE_TYPE_B) {
1494         s->last_picture_ptr = s->next_picture_ptr;
1495         if (!s->droppable)
1496             s->next_picture_ptr = s->current_picture_ptr;
1497     }
1498
1499     if (s->last_picture_ptr) {
1500         ff_mpeg_unref_picture(s, &s->last_picture);
1501         if (s->last_picture_ptr->f->buf[0] &&
1502             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1503                                        s->last_picture_ptr)) < 0)
1504             return ret;
1505     }
1506     if (s->next_picture_ptr) {
1507         ff_mpeg_unref_picture(s, &s->next_picture);
1508         if (s->next_picture_ptr->f->buf[0] &&
1509             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1510                                        s->next_picture_ptr)) < 0)
1511             return ret;
1512     }
1513
1514     if (s->picture_structure!= PICT_FRAME) {
1515         int i;
1516         for (i = 0; i < 4; i++) {
1517             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1518                 s->current_picture.f->data[i] +=
1519                     s->current_picture.f->linesize[i];
1520             }
1521             s->current_picture.f->linesize[i] *= 2;
1522             s->last_picture.f->linesize[i]    *= 2;
1523             s->next_picture.f->linesize[i]    *= 2;
1524         }
1525     }
1526
1527     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1528         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1529         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1530     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1531         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1532         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1533     } else {
1534         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1535         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1536     }
1537
1538     if (s->dct_error_sum) {
1539         assert(s->avctx->noise_reduction && s->encoding);
1540         update_noise_reduction(s);
1541     }
1542
1543     return 0;
1544 }
1545
1546 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1547                           const AVFrame *pic_arg, int *got_packet)
1548 {
1549     MpegEncContext *s = avctx->priv_data;
1550     int i, stuffing_count, ret;
1551     int context_count = s->slice_context_count;
1552
1553     s->picture_in_gop_number++;
1554
1555     if (load_input_picture(s, pic_arg) < 0)
1556         return -1;
1557
1558     if (select_input_picture(s) < 0) {
1559         return -1;
1560     }
1561
1562     /* output? */
1563     if (s->new_picture.f->data[0]) {
1564         if (!pkt->data &&
1565             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1566             return ret;
1567         if (s->mb_info) {
1568             s->mb_info_ptr = av_packet_new_side_data(pkt,
1569                                  AV_PKT_DATA_H263_MB_INFO,
1570                                  s->mb_width*s->mb_height*12);
1571             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1572         }
1573
1574         for (i = 0; i < context_count; i++) {
1575             int start_y = s->thread_context[i]->start_mb_y;
1576             int   end_y = s->thread_context[i]->  end_mb_y;
1577             int h       = s->mb_height;
1578             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1579             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1580
1581             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1582         }
1583
1584         s->pict_type = s->new_picture.f->pict_type;
1585         //emms_c();
1586         ret = frame_start(s);
1587         if (ret < 0)
1588             return ret;
1589 vbv_retry:
1590         if (encode_picture(s, s->picture_number) < 0)
1591             return -1;
1592
1593         avctx->header_bits = s->header_bits;
1594         avctx->mv_bits     = s->mv_bits;
1595         avctx->misc_bits   = s->misc_bits;
1596         avctx->i_tex_bits  = s->i_tex_bits;
1597         avctx->p_tex_bits  = s->p_tex_bits;
1598         avctx->i_count     = s->i_count;
1599         // FIXME f/b_count in avctx
1600         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1601         avctx->skip_count  = s->skip_count;
1602
1603         frame_end(s);
1604
1605         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1606             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1607
1608         if (avctx->rc_buffer_size) {
1609             RateControlContext *rcc = &s->rc_context;
1610             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1611
1612             if (put_bits_count(&s->pb) > max_size &&
1613                 s->lambda < s->avctx->lmax) {
1614                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1615                                        (s->qscale + 1) / s->qscale);
1616                 if (s->adaptive_quant) {
1617                     int i;
1618                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1619                         s->lambda_table[i] =
1620                             FFMAX(s->lambda_table[i] + 1,
1621                                   s->lambda_table[i] * (s->qscale + 1) /
1622                                   s->qscale);
1623                 }
1624                 s->mb_skipped = 0;        // done in frame_start()
1625                 // done in encode_picture() so we must undo it
1626                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1627                     if (s->flipflop_rounding          ||
1628                         s->codec_id == AV_CODEC_ID_H263P ||
1629                         s->codec_id == AV_CODEC_ID_MPEG4)
1630                         s->no_rounding ^= 1;
1631                 }
1632                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1633                     s->time_base       = s->last_time_base;
1634                     s->last_non_b_time = s->time - s->pp_time;
1635                 }
1636                 for (i = 0; i < context_count; i++) {
1637                     PutBitContext *pb = &s->thread_context[i]->pb;
1638                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1639                 }
1640                 goto vbv_retry;
1641             }
1642
1643             assert(s->avctx->rc_max_rate);
1644         }
1645
1646         if (s->flags & CODEC_FLAG_PASS1)
1647             ff_write_pass1_stats(s);
1648
1649         for (i = 0; i < 4; i++) {
1650             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1651             avctx->error[i] += s->current_picture_ptr->f->error[i];
1652         }
1653
1654         if (s->flags & CODEC_FLAG_PASS1)
1655             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1656                    avctx->i_tex_bits + avctx->p_tex_bits ==
1657                        put_bits_count(&s->pb));
1658         flush_put_bits(&s->pb);
1659         s->frame_bits  = put_bits_count(&s->pb);
1660
1661         stuffing_count = ff_vbv_update(s, s->frame_bits);
1662         if (stuffing_count) {
1663             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1664                     stuffing_count + 50) {
1665                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1666                 return -1;
1667             }
1668
1669             switch (s->codec_id) {
1670             case AV_CODEC_ID_MPEG1VIDEO:
1671             case AV_CODEC_ID_MPEG2VIDEO:
1672                 while (stuffing_count--) {
1673                     put_bits(&s->pb, 8, 0);
1674                 }
1675             break;
1676             case AV_CODEC_ID_MPEG4:
1677                 put_bits(&s->pb, 16, 0);
1678                 put_bits(&s->pb, 16, 0x1C3);
1679                 stuffing_count -= 4;
1680                 while (stuffing_count--) {
1681                     put_bits(&s->pb, 8, 0xFF);
1682                 }
1683             break;
1684             default:
1685                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1686             }
1687             flush_put_bits(&s->pb);
1688             s->frame_bits  = put_bits_count(&s->pb);
1689         }
1690
1691         /* update mpeg1/2 vbv_delay for CBR */
1692         if (s->avctx->rc_max_rate                          &&
1693             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1694             s->out_format == FMT_MPEG1                     &&
1695             90000LL * (avctx->rc_buffer_size - 1) <=
1696                 s->avctx->rc_max_rate * 0xFFFFLL) {
1697             int vbv_delay, min_delay;
1698             double inbits  = s->avctx->rc_max_rate *
1699                              av_q2d(s->avctx->time_base);
1700             int    minbits = s->frame_bits - 8 *
1701                              (s->vbv_delay_ptr - s->pb.buf - 1);
1702             double bits    = s->rc_context.buffer_index + minbits - inbits;
1703
1704             if (bits < 0)
1705                 av_log(s->avctx, AV_LOG_ERROR,
1706                        "Internal error, negative bits\n");
1707
1708             assert(s->repeat_first_field == 0);
1709
1710             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1711             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1712                         s->avctx->rc_max_rate;
1713
1714             vbv_delay = FFMAX(vbv_delay, min_delay);
1715
1716             assert(vbv_delay < 0xFFFF);
1717
1718             s->vbv_delay_ptr[0] &= 0xF8;
1719             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1720             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1721             s->vbv_delay_ptr[2] &= 0x07;
1722             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1723             avctx->vbv_delay     = vbv_delay * 300;
1724         }
1725         s->total_bits     += s->frame_bits;
1726         avctx->frame_bits  = s->frame_bits;
1727
1728         pkt->pts = s->current_picture.f->pts;
1729         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1730             if (!s->current_picture.f->coded_picture_number)
1731                 pkt->dts = pkt->pts - s->dts_delta;
1732             else
1733                 pkt->dts = s->reordered_pts;
1734             s->reordered_pts = pkt->pts;
1735         } else
1736             pkt->dts = pkt->pts;
1737         if (s->current_picture.f->key_frame)
1738             pkt->flags |= AV_PKT_FLAG_KEY;
1739         if (s->mb_info)
1740             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1741     } else {
1742         s->frame_bits = 0;
1743     }
1744     assert((s->frame_bits & 7) == 0);
1745
1746     pkt->size = s->frame_bits / 8;
1747     *got_packet = !!pkt->size;
1748     return 0;
1749 }
1750
1751 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1752                                                 int n, int threshold)
1753 {
1754     static const char tab[64] = {
1755         3, 2, 2, 1, 1, 1, 1, 1,
1756         1, 1, 1, 1, 1, 1, 1, 1,
1757         1, 1, 1, 1, 1, 1, 1, 1,
1758         0, 0, 0, 0, 0, 0, 0, 0,
1759         0, 0, 0, 0, 0, 0, 0, 0,
1760         0, 0, 0, 0, 0, 0, 0, 0,
1761         0, 0, 0, 0, 0, 0, 0, 0,
1762         0, 0, 0, 0, 0, 0, 0, 0
1763     };
1764     int score = 0;
1765     int run = 0;
1766     int i;
1767     int16_t *block = s->block[n];
1768     const int last_index = s->block_last_index[n];
1769     int skip_dc;
1770
1771     if (threshold < 0) {
1772         skip_dc = 0;
1773         threshold = -threshold;
1774     } else
1775         skip_dc = 1;
1776
1777     /* Are all we could set to zero already zero? */
1778     if (last_index <= skip_dc - 1)
1779         return;
1780
1781     for (i = 0; i <= last_index; i++) {
1782         const int j = s->intra_scantable.permutated[i];
1783         const int level = FFABS(block[j]);
1784         if (level == 1) {
1785             if (skip_dc && i == 0)
1786                 continue;
1787             score += tab[run];
1788             run = 0;
1789         } else if (level > 1) {
1790             return;
1791         } else {
1792             run++;
1793         }
1794     }
1795     if (score >= threshold)
1796         return;
1797     for (i = skip_dc; i <= last_index; i++) {
1798         const int j = s->intra_scantable.permutated[i];
1799         block[j] = 0;
1800     }
1801     if (block[0])
1802         s->block_last_index[n] = 0;
1803     else
1804         s->block_last_index[n] = -1;
1805 }
1806
1807 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1808                                int last_index)
1809 {
1810     int i;
1811     const int maxlevel = s->max_qcoeff;
1812     const int minlevel = s->min_qcoeff;
1813     int overflow = 0;
1814
1815     if (s->mb_intra) {
1816         i = 1; // skip clipping of intra dc
1817     } else
1818         i = 0;
1819
1820     for (; i <= last_index; i++) {
1821         const int j = s->intra_scantable.permutated[i];
1822         int level = block[j];
1823
1824         if (level > maxlevel) {
1825             level = maxlevel;
1826             overflow++;
1827         } else if (level < minlevel) {
1828             level = minlevel;
1829             overflow++;
1830         }
1831
1832         block[j] = level;
1833     }
1834
1835     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1836         av_log(s->avctx, AV_LOG_INFO,
1837                "warning, clipping %d dct coefficients to %d..%d\n",
1838                overflow, minlevel, maxlevel);
1839 }
1840
1841 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1842 {
1843     int x, y;
1844     // FIXME optimize
1845     for (y = 0; y < 8; y++) {
1846         for (x = 0; x < 8; x++) {
1847             int x2, y2;
1848             int sum = 0;
1849             int sqr = 0;
1850             int count = 0;
1851
1852             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1853                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1854                     int v = ptr[x2 + y2 * stride];
1855                     sum += v;
1856                     sqr += v * v;
1857                     count++;
1858                 }
1859             }
1860             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1861         }
1862     }
1863 }
1864
1865 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1866                                                 int motion_x, int motion_y,
1867                                                 int mb_block_height,
1868                                                 int mb_block_count)
1869 {
1870     int16_t weight[8][64];
1871     int16_t orig[8][64];
1872     const int mb_x = s->mb_x;
1873     const int mb_y = s->mb_y;
1874     int i;
1875     int skip_dct[8];
1876     int dct_offset = s->linesize * 8; // default for progressive frames
1877     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1878     ptrdiff_t wrap_y, wrap_c;
1879
1880     for (i = 0; i < mb_block_count; i++)
1881         skip_dct[i] = s->skipdct;
1882
1883     if (s->adaptive_quant) {
1884         const int last_qp = s->qscale;
1885         const int mb_xy = mb_x + mb_y * s->mb_stride;
1886
1887         s->lambda = s->lambda_table[mb_xy];
1888         update_qscale(s);
1889
1890         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1891             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1892             s->dquant = s->qscale - last_qp;
1893
1894             if (s->out_format == FMT_H263) {
1895                 s->dquant = av_clip(s->dquant, -2, 2);
1896
1897                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1898                     if (!s->mb_intra) {
1899                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1900                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1901                                 s->dquant = 0;
1902                         }
1903                         if (s->mv_type == MV_TYPE_8X8)
1904                             s->dquant = 0;
1905                     }
1906                 }
1907             }
1908         }
1909         ff_set_qscale(s, last_qp + s->dquant);
1910     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1911         ff_set_qscale(s, s->qscale + s->dquant);
1912
1913     wrap_y = s->linesize;
1914     wrap_c = s->uvlinesize;
1915     ptr_y  = s->new_picture.f->data[0] +
1916              (mb_y * 16 * wrap_y)              + mb_x * 16;
1917     ptr_cb = s->new_picture.f->data[1] +
1918              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1919     ptr_cr = s->new_picture.f->data[2] +
1920              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1921
1922     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1923         uint8_t *ebuf = s->edge_emu_buffer + 32;
1924         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1925                                  wrap_y, wrap_y,
1926                                  16, 16, mb_x * 16, mb_y * 16,
1927                                  s->width, s->height);
1928         ptr_y = ebuf;
1929         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1930                                  wrap_c, wrap_c,
1931                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1932                                  s->width >> 1, s->height >> 1);
1933         ptr_cb = ebuf + 18 * wrap_y;
1934         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1935                                  wrap_c, wrap_c,
1936                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1937                                  s->width >> 1, s->height >> 1);
1938         ptr_cr = ebuf + 18 * wrap_y + 8;
1939     }
1940
1941     if (s->mb_intra) {
1942         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1943             int progressive_score, interlaced_score;
1944
1945             s->interlaced_dct = 0;
1946             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1947                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1948                                                      NULL, wrap_y, 8) - 400;
1949
1950             if (progressive_score > 0) {
1951                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1952                                                         NULL, wrap_y * 2, 8) +
1953                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1954                                                         NULL, wrap_y * 2, 8);
1955                 if (progressive_score > interlaced_score) {
1956                     s->interlaced_dct = 1;
1957
1958                     dct_offset = wrap_y;
1959                     wrap_y <<= 1;
1960                     if (s->chroma_format == CHROMA_422)
1961                         wrap_c <<= 1;
1962                 }
1963             }
1964         }
1965
1966         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1967         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1968         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1969         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1970
1971         if (s->flags & CODEC_FLAG_GRAY) {
1972             skip_dct[4] = 1;
1973             skip_dct[5] = 1;
1974         } else {
1975             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1976             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1977             if (!s->chroma_y_shift) { /* 422 */
1978                 s->pdsp.get_pixels(s->block[6],
1979                                    ptr_cb + (dct_offset >> 1), wrap_c);
1980                 s->pdsp.get_pixels(s->block[7],
1981                                    ptr_cr + (dct_offset >> 1), wrap_c);
1982             }
1983         }
1984     } else {
1985         op_pixels_func (*op_pix)[4];
1986         qpel_mc_func (*op_qpix)[16];
1987         uint8_t *dest_y, *dest_cb, *dest_cr;
1988
1989         dest_y  = s->dest[0];
1990         dest_cb = s->dest[1];
1991         dest_cr = s->dest[2];
1992
1993         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1994             op_pix  = s->hdsp.put_pixels_tab;
1995             op_qpix = s->qdsp.put_qpel_pixels_tab;
1996         } else {
1997             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1998             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
1999         }
2000
2001         if (s->mv_dir & MV_DIR_FORWARD) {
2002             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2003                           s->last_picture.f->data,
2004                           op_pix, op_qpix);
2005             op_pix  = s->hdsp.avg_pixels_tab;
2006             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2007         }
2008         if (s->mv_dir & MV_DIR_BACKWARD) {
2009             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2010                           s->next_picture.f->data,
2011                           op_pix, op_qpix);
2012         }
2013
2014         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2015             int progressive_score, interlaced_score;
2016
2017             s->interlaced_dct = 0;
2018             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2019                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2020                                                      ptr_y + wrap_y * 8,
2021                                                      wrap_y, 8) - 400;
2022
2023             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2024                 progressive_score -= 400;
2025
2026             if (progressive_score > 0) {
2027                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2028                                                         wrap_y * 2, 8) +
2029                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2030                                                         ptr_y + wrap_y,
2031                                                         wrap_y * 2, 8);
2032
2033                 if (progressive_score > interlaced_score) {
2034                     s->interlaced_dct = 1;
2035
2036                     dct_offset = wrap_y;
2037                     wrap_y <<= 1;
2038                     if (s->chroma_format == CHROMA_422)
2039                         wrap_c <<= 1;
2040                 }
2041             }
2042         }
2043
2044         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2045         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2046         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2047                             dest_y + dct_offset, wrap_y);
2048         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2049                             dest_y + dct_offset + 8, wrap_y);
2050
2051         if (s->flags & CODEC_FLAG_GRAY) {
2052             skip_dct[4] = 1;
2053             skip_dct[5] = 1;
2054         } else {
2055             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2056             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2057             if (!s->chroma_y_shift) { /* 422 */
2058                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2059                                     dest_cb + (dct_offset >> 1), wrap_c);
2060                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2061                                     dest_cr + (dct_offset >> 1), wrap_c);
2062             }
2063         }
2064         /* pre quantization */
2065         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2066                 2 * s->qscale * s->qscale) {
2067             // FIXME optimize
2068             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2069                 skip_dct[0] = 1;
2070             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2071                 skip_dct[1] = 1;
2072             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2073                                wrap_y, 8) < 20 * s->qscale)
2074                 skip_dct[2] = 1;
2075             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2076                                wrap_y, 8) < 20 * s->qscale)
2077                 skip_dct[3] = 1;
2078             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2079                 skip_dct[4] = 1;
2080             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2081                 skip_dct[5] = 1;
2082             if (!s->chroma_y_shift) { /* 422 */
2083                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2084                                    dest_cb + (dct_offset >> 1),
2085                                    wrap_c, 8) < 20 * s->qscale)
2086                     skip_dct[6] = 1;
2087                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2088                                    dest_cr + (dct_offset >> 1),
2089                                    wrap_c, 8) < 20 * s->qscale)
2090                     skip_dct[7] = 1;
2091             }
2092         }
2093     }
2094
2095     if (s->quantizer_noise_shaping) {
2096         if (!skip_dct[0])
2097             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2098         if (!skip_dct[1])
2099             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2100         if (!skip_dct[2])
2101             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2102         if (!skip_dct[3])
2103             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2104         if (!skip_dct[4])
2105             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2106         if (!skip_dct[5])
2107             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2108         if (!s->chroma_y_shift) { /* 422 */
2109             if (!skip_dct[6])
2110                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2111                                   wrap_c);
2112             if (!skip_dct[7])
2113                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2114                                   wrap_c);
2115         }
2116         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2117     }
2118
2119     /* DCT & quantize */
2120     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2121     {
2122         for (i = 0; i < mb_block_count; i++) {
2123             if (!skip_dct[i]) {
2124                 int overflow;
2125                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2126                 // FIXME we could decide to change to quantizer instead of
2127                 // clipping
2128                 // JS: I don't think that would be a good idea it could lower
2129                 //     quality instead of improve it. Just INTRADC clipping
2130                 //     deserves changes in quantizer
2131                 if (overflow)
2132                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2133             } else
2134                 s->block_last_index[i] = -1;
2135         }
2136         if (s->quantizer_noise_shaping) {
2137             for (i = 0; i < mb_block_count; i++) {
2138                 if (!skip_dct[i]) {
2139                     s->block_last_index[i] =
2140                         dct_quantize_refine(s, s->block[i], weight[i],
2141                                             orig[i], i, s->qscale);
2142                 }
2143             }
2144         }
2145
2146         if (s->luma_elim_threshold && !s->mb_intra)
2147             for (i = 0; i < 4; i++)
2148                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2149         if (s->chroma_elim_threshold && !s->mb_intra)
2150             for (i = 4; i < mb_block_count; i++)
2151                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2152
2153         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2154             for (i = 0; i < mb_block_count; i++) {
2155                 if (s->block_last_index[i] == -1)
2156                     s->coded_score[i] = INT_MAX / 256;
2157             }
2158         }
2159     }
2160
2161     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2162         s->block_last_index[4] =
2163         s->block_last_index[5] = 0;
2164         s->block[4][0] =
2165         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2166     }
2167
2168     // non c quantize code returns incorrect block_last_index FIXME
2169     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2170         for (i = 0; i < mb_block_count; i++) {
2171             int j;
2172             if (s->block_last_index[i] > 0) {
2173                 for (j = 63; j > 0; j--) {
2174                     if (s->block[i][s->intra_scantable.permutated[j]])
2175                         break;
2176                 }
2177                 s->block_last_index[i] = j;
2178             }
2179         }
2180     }
2181
2182     /* huffman encode */
2183     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2184     case AV_CODEC_ID_MPEG1VIDEO:
2185     case AV_CODEC_ID_MPEG2VIDEO:
2186         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2187             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2188         break;
2189     case AV_CODEC_ID_MPEG4:
2190         if (CONFIG_MPEG4_ENCODER)
2191             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2192         break;
2193     case AV_CODEC_ID_MSMPEG4V2:
2194     case AV_CODEC_ID_MSMPEG4V3:
2195     case AV_CODEC_ID_WMV1:
2196         if (CONFIG_MSMPEG4_ENCODER)
2197             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2198         break;
2199     case AV_CODEC_ID_WMV2:
2200         if (CONFIG_WMV2_ENCODER)
2201             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2202         break;
2203     case AV_CODEC_ID_H261:
2204         if (CONFIG_H261_ENCODER)
2205             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2206         break;
2207     case AV_CODEC_ID_H263:
2208     case AV_CODEC_ID_H263P:
2209     case AV_CODEC_ID_FLV1:
2210     case AV_CODEC_ID_RV10:
2211     case AV_CODEC_ID_RV20:
2212         if (CONFIG_H263_ENCODER)
2213             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2214         break;
2215     case AV_CODEC_ID_MJPEG:
2216         if (CONFIG_MJPEG_ENCODER)
2217             ff_mjpeg_encode_mb(s, s->block);
2218         break;
2219     default:
2220         assert(0);
2221     }
2222 }
2223
2224 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2225 {
2226     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2227     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2228 }
2229
2230 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2231     int i;
2232
2233     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2234
2235     /* mpeg1 */
2236     d->mb_skip_run= s->mb_skip_run;
2237     for(i=0; i<3; i++)
2238         d->last_dc[i] = s->last_dc[i];
2239
2240     /* statistics */
2241     d->mv_bits= s->mv_bits;
2242     d->i_tex_bits= s->i_tex_bits;
2243     d->p_tex_bits= s->p_tex_bits;
2244     d->i_count= s->i_count;
2245     d->f_count= s->f_count;
2246     d->b_count= s->b_count;
2247     d->skip_count= s->skip_count;
2248     d->misc_bits= s->misc_bits;
2249     d->last_bits= 0;
2250
2251     d->mb_skipped= 0;
2252     d->qscale= s->qscale;
2253     d->dquant= s->dquant;
2254
2255     d->esc3_level_length= s->esc3_level_length;
2256 }
2257
2258 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2259     int i;
2260
2261     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2262     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2263
2264     /* mpeg1 */
2265     d->mb_skip_run= s->mb_skip_run;
2266     for(i=0; i<3; i++)
2267         d->last_dc[i] = s->last_dc[i];
2268
2269     /* statistics */
2270     d->mv_bits= s->mv_bits;
2271     d->i_tex_bits= s->i_tex_bits;
2272     d->p_tex_bits= s->p_tex_bits;
2273     d->i_count= s->i_count;
2274     d->f_count= s->f_count;
2275     d->b_count= s->b_count;
2276     d->skip_count= s->skip_count;
2277     d->misc_bits= s->misc_bits;
2278
2279     d->mb_intra= s->mb_intra;
2280     d->mb_skipped= s->mb_skipped;
2281     d->mv_type= s->mv_type;
2282     d->mv_dir= s->mv_dir;
2283     d->pb= s->pb;
2284     if(s->data_partitioning){
2285         d->pb2= s->pb2;
2286         d->tex_pb= s->tex_pb;
2287     }
2288     d->block= s->block;
2289     for(i=0; i<8; i++)
2290         d->block_last_index[i]= s->block_last_index[i];
2291     d->interlaced_dct= s->interlaced_dct;
2292     d->qscale= s->qscale;
2293
2294     d->esc3_level_length= s->esc3_level_length;
2295 }
2296
2297 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2298                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2299                            int *dmin, int *next_block, int motion_x, int motion_y)
2300 {
2301     int score;
2302     uint8_t *dest_backup[3];
2303
2304     copy_context_before_encode(s, backup, type);
2305
2306     s->block= s->blocks[*next_block];
2307     s->pb= pb[*next_block];
2308     if(s->data_partitioning){
2309         s->pb2   = pb2   [*next_block];
2310         s->tex_pb= tex_pb[*next_block];
2311     }
2312
2313     if(*next_block){
2314         memcpy(dest_backup, s->dest, sizeof(s->dest));
2315         s->dest[0] = s->rd_scratchpad;
2316         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2317         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2318         assert(s->linesize >= 32); //FIXME
2319     }
2320
2321     encode_mb(s, motion_x, motion_y);
2322
2323     score= put_bits_count(&s->pb);
2324     if(s->data_partitioning){
2325         score+= put_bits_count(&s->pb2);
2326         score+= put_bits_count(&s->tex_pb);
2327     }
2328
2329     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2330         ff_mpv_decode_mb(s, s->block);
2331
2332         score *= s->lambda2;
2333         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2334     }
2335
2336     if(*next_block){
2337         memcpy(s->dest, dest_backup, sizeof(s->dest));
2338     }
2339
2340     if(score<*dmin){
2341         *dmin= score;
2342         *next_block^=1;
2343
2344         copy_context_after_encode(best, s, type);
2345     }
2346 }
2347
2348 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2349     uint32_t *sq = ff_square_tab + 256;
2350     int acc=0;
2351     int x,y;
2352
2353     if(w==16 && h==16)
2354         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2355     else if(w==8 && h==8)
2356         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2357
2358     for(y=0; y<h; y++){
2359         for(x=0; x<w; x++){
2360             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2361         }
2362     }
2363
2364     assert(acc>=0);
2365
2366     return acc;
2367 }
2368
2369 static int sse_mb(MpegEncContext *s){
2370     int w= 16;
2371     int h= 16;
2372
2373     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2374     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2375
2376     if(w==16 && h==16)
2377       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2378         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2379                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2380                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2381       }else{
2382         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2383                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2384                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2385       }
2386     else
2387         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2388                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2389                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2390 }
2391
2392 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2393     MpegEncContext *s= *(void**)arg;
2394
2395
2396     s->me.pre_pass=1;
2397     s->me.dia_size= s->avctx->pre_dia_size;
2398     s->first_slice_line=1;
2399     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2400         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2401             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2402         }
2403         s->first_slice_line=0;
2404     }
2405
2406     s->me.pre_pass=0;
2407
2408     return 0;
2409 }
2410
2411 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2412     MpegEncContext *s= *(void**)arg;
2413
2414     s->me.dia_size= s->avctx->dia_size;
2415     s->first_slice_line=1;
2416     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2417         s->mb_x=0; //for block init below
2418         ff_init_block_index(s);
2419         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2420             s->block_index[0]+=2;
2421             s->block_index[1]+=2;
2422             s->block_index[2]+=2;
2423             s->block_index[3]+=2;
2424
2425             /* compute motion vector & mb_type and store in context */
2426             if(s->pict_type==AV_PICTURE_TYPE_B)
2427                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2428             else
2429                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2430         }
2431         s->first_slice_line=0;
2432     }
2433     return 0;
2434 }
2435
2436 static int mb_var_thread(AVCodecContext *c, void *arg){
2437     MpegEncContext *s= *(void**)arg;
2438     int mb_x, mb_y;
2439
2440     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2441         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2442             int xx = mb_x * 16;
2443             int yy = mb_y * 16;
2444             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2445             int varc;
2446             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2447
2448             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2449                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2450
2451             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2452             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2453             s->me.mb_var_sum_temp    += varc;
2454         }
2455     }
2456     return 0;
2457 }
2458
2459 static void write_slice_end(MpegEncContext *s){
2460     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2461         if(s->partitioned_frame){
2462             ff_mpeg4_merge_partitions(s);
2463         }
2464
2465         ff_mpeg4_stuffing(&s->pb);
2466     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2467         ff_mjpeg_encode_stuffing(&s->pb);
2468     }
2469
2470     avpriv_align_put_bits(&s->pb);
2471     flush_put_bits(&s->pb);
2472
2473     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2474         s->misc_bits+= get_bits_diff(s);
2475 }
2476
2477 static void write_mb_info(MpegEncContext *s)
2478 {
2479     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2480     int offset = put_bits_count(&s->pb);
2481     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2482     int gobn = s->mb_y / s->gob_index;
2483     int pred_x, pred_y;
2484     if (CONFIG_H263_ENCODER)
2485         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2486     bytestream_put_le32(&ptr, offset);
2487     bytestream_put_byte(&ptr, s->qscale);
2488     bytestream_put_byte(&ptr, gobn);
2489     bytestream_put_le16(&ptr, mba);
2490     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2491     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2492     /* 4MV not implemented */
2493     bytestream_put_byte(&ptr, 0); /* hmv2 */
2494     bytestream_put_byte(&ptr, 0); /* vmv2 */
2495 }
2496
2497 static void update_mb_info(MpegEncContext *s, int startcode)
2498 {
2499     if (!s->mb_info)
2500         return;
2501     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2502         s->mb_info_size += 12;
2503         s->prev_mb_info = s->last_mb_info;
2504     }
2505     if (startcode) {
2506         s->prev_mb_info = put_bits_count(&s->pb)/8;
2507         /* This might have incremented mb_info_size above, and we return without
2508          * actually writing any info into that slot yet. But in that case,
2509          * this will be called again at the start of the after writing the
2510          * start code, actually writing the mb info. */
2511         return;
2512     }
2513
2514     s->last_mb_info = put_bits_count(&s->pb)/8;
2515     if (!s->mb_info_size)
2516         s->mb_info_size += 12;
2517     write_mb_info(s);
2518 }
2519
2520 static int encode_thread(AVCodecContext *c, void *arg){
2521     MpegEncContext *s= *(void**)arg;
2522     int mb_x, mb_y, pdif = 0;
2523     int chr_h= 16>>s->chroma_y_shift;
2524     int i, j;
2525     MpegEncContext best_s, backup_s;
2526     uint8_t bit_buf[2][MAX_MB_BYTES];
2527     uint8_t bit_buf2[2][MAX_MB_BYTES];
2528     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2529     PutBitContext pb[2], pb2[2], tex_pb[2];
2530
2531     for(i=0; i<2; i++){
2532         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2533         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2534         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2535     }
2536
2537     s->last_bits= put_bits_count(&s->pb);
2538     s->mv_bits=0;
2539     s->misc_bits=0;
2540     s->i_tex_bits=0;
2541     s->p_tex_bits=0;
2542     s->i_count=0;
2543     s->f_count=0;
2544     s->b_count=0;
2545     s->skip_count=0;
2546
2547     for(i=0; i<3; i++){
2548         /* init last dc values */
2549         /* note: quant matrix value (8) is implied here */
2550         s->last_dc[i] = 128 << s->intra_dc_precision;
2551
2552         s->current_picture.f->error[i] = 0;
2553     }
2554     s->mb_skip_run = 0;
2555     memset(s->last_mv, 0, sizeof(s->last_mv));
2556
2557     s->last_mv_dir = 0;
2558
2559     switch(s->codec_id){
2560     case AV_CODEC_ID_H263:
2561     case AV_CODEC_ID_H263P:
2562     case AV_CODEC_ID_FLV1:
2563         if (CONFIG_H263_ENCODER)
2564             s->gob_index = ff_h263_get_gob_height(s);
2565         break;
2566     case AV_CODEC_ID_MPEG4:
2567         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2568             ff_mpeg4_init_partitions(s);
2569         break;
2570     }
2571
2572     s->resync_mb_x=0;
2573     s->resync_mb_y=0;
2574     s->first_slice_line = 1;
2575     s->ptr_lastgob = s->pb.buf;
2576     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2577         s->mb_x=0;
2578         s->mb_y= mb_y;
2579
2580         ff_set_qscale(s, s->qscale);
2581         ff_init_block_index(s);
2582
2583         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2584             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2585             int mb_type= s->mb_type[xy];
2586 //            int d;
2587             int dmin= INT_MAX;
2588             int dir;
2589
2590             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2591                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2592                 return -1;
2593             }
2594             if(s->data_partitioning){
2595                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2596                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2597                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2598                     return -1;
2599                 }
2600             }
2601
2602             s->mb_x = mb_x;
2603             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2604             ff_update_block_index(s);
2605
2606             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2607                 ff_h261_reorder_mb_index(s);
2608                 xy= s->mb_y*s->mb_stride + s->mb_x;
2609                 mb_type= s->mb_type[xy];
2610             }
2611
2612             /* write gob / video packet header  */
2613             if(s->rtp_mode){
2614                 int current_packet_size, is_gob_start;
2615
2616                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2617
2618                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2619
2620                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2621
2622                 switch(s->codec_id){
2623                 case AV_CODEC_ID_H263:
2624                 case AV_CODEC_ID_H263P:
2625                     if(!s->h263_slice_structured)
2626                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2627                     break;
2628                 case AV_CODEC_ID_MPEG2VIDEO:
2629                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2630                 case AV_CODEC_ID_MPEG1VIDEO:
2631                     if(s->mb_skip_run) is_gob_start=0;
2632                     break;
2633                 }
2634
2635                 if(is_gob_start){
2636                     if(s->start_mb_y != mb_y || mb_x!=0){
2637                         write_slice_end(s);
2638
2639                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2640                             ff_mpeg4_init_partitions(s);
2641                         }
2642                     }
2643
2644                     assert((put_bits_count(&s->pb)&7) == 0);
2645                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2646
2647                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2648                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2649                         int d = 100 / s->error_rate;
2650                         if(r % d == 0){
2651                             current_packet_size=0;
2652                             s->pb.buf_ptr= s->ptr_lastgob;
2653                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2654                         }
2655                     }
2656
2657                     if (s->avctx->rtp_callback){
2658                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2659                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2660                     }
2661                     update_mb_info(s, 1);
2662
2663                     switch(s->codec_id){
2664                     case AV_CODEC_ID_MPEG4:
2665                         if (CONFIG_MPEG4_ENCODER) {
2666                             ff_mpeg4_encode_video_packet_header(s);
2667                             ff_mpeg4_clean_buffers(s);
2668                         }
2669                     break;
2670                     case AV_CODEC_ID_MPEG1VIDEO:
2671                     case AV_CODEC_ID_MPEG2VIDEO:
2672                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2673                             ff_mpeg1_encode_slice_header(s);
2674                             ff_mpeg1_clean_buffers(s);
2675                         }
2676                     break;
2677                     case AV_CODEC_ID_H263:
2678                     case AV_CODEC_ID_H263P:
2679                         if (CONFIG_H263_ENCODER)
2680                             ff_h263_encode_gob_header(s, mb_y);
2681                     break;
2682                     }
2683
2684                     if(s->flags&CODEC_FLAG_PASS1){
2685                         int bits= put_bits_count(&s->pb);
2686                         s->misc_bits+= bits - s->last_bits;
2687                         s->last_bits= bits;
2688                     }
2689
2690                     s->ptr_lastgob += current_packet_size;
2691                     s->first_slice_line=1;
2692                     s->resync_mb_x=mb_x;
2693                     s->resync_mb_y=mb_y;
2694                 }
2695             }
2696
2697             if(  (s->resync_mb_x   == s->mb_x)
2698                && s->resync_mb_y+1 == s->mb_y){
2699                 s->first_slice_line=0;
2700             }
2701
2702             s->mb_skipped=0;
2703             s->dquant=0; //only for QP_RD
2704
2705             update_mb_info(s, 0);
2706
2707             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2708                 int next_block=0;
2709                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2710
2711                 copy_context_before_encode(&backup_s, s, -1);
2712                 backup_s.pb= s->pb;
2713                 best_s.data_partitioning= s->data_partitioning;
2714                 best_s.partitioned_frame= s->partitioned_frame;
2715                 if(s->data_partitioning){
2716                     backup_s.pb2= s->pb2;
2717                     backup_s.tex_pb= s->tex_pb;
2718                 }
2719
2720                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2721                     s->mv_dir = MV_DIR_FORWARD;
2722                     s->mv_type = MV_TYPE_16X16;
2723                     s->mb_intra= 0;
2724                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2725                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2726                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2727                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2728                 }
2729                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2730                     s->mv_dir = MV_DIR_FORWARD;
2731                     s->mv_type = MV_TYPE_FIELD;
2732                     s->mb_intra= 0;
2733                     for(i=0; i<2; i++){
2734                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2735                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2736                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2737                     }
2738                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2739                                  &dmin, &next_block, 0, 0);
2740                 }
2741                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2742                     s->mv_dir = MV_DIR_FORWARD;
2743                     s->mv_type = MV_TYPE_16X16;
2744                     s->mb_intra= 0;
2745                     s->mv[0][0][0] = 0;
2746                     s->mv[0][0][1] = 0;
2747                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2748                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2749                 }
2750                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2751                     s->mv_dir = MV_DIR_FORWARD;
2752                     s->mv_type = MV_TYPE_8X8;
2753                     s->mb_intra= 0;
2754                     for(i=0; i<4; i++){
2755                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2756                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2757                     }
2758                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2759                                  &dmin, &next_block, 0, 0);
2760                 }
2761                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2762                     s->mv_dir = MV_DIR_FORWARD;
2763                     s->mv_type = MV_TYPE_16X16;
2764                     s->mb_intra= 0;
2765                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2766                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2767                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2768                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2769                 }
2770                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2771                     s->mv_dir = MV_DIR_BACKWARD;
2772                     s->mv_type = MV_TYPE_16X16;
2773                     s->mb_intra= 0;
2774                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2775                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2776                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2777                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2778                 }
2779                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2780                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2781                     s->mv_type = MV_TYPE_16X16;
2782                     s->mb_intra= 0;
2783                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2784                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2785                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2786                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2787                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2788                                  &dmin, &next_block, 0, 0);
2789                 }
2790                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2791                     s->mv_dir = MV_DIR_FORWARD;
2792                     s->mv_type = MV_TYPE_FIELD;
2793                     s->mb_intra= 0;
2794                     for(i=0; i<2; i++){
2795                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2796                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2797                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2798                     }
2799                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2800                                  &dmin, &next_block, 0, 0);
2801                 }
2802                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2803                     s->mv_dir = MV_DIR_BACKWARD;
2804                     s->mv_type = MV_TYPE_FIELD;
2805                     s->mb_intra= 0;
2806                     for(i=0; i<2; i++){
2807                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2808                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2809                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2810                     }
2811                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2812                                  &dmin, &next_block, 0, 0);
2813                 }
2814                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2815                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2816                     s->mv_type = MV_TYPE_FIELD;
2817                     s->mb_intra= 0;
2818                     for(dir=0; dir<2; dir++){
2819                         for(i=0; i<2; i++){
2820                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2821                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2822                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2823                         }
2824                     }
2825                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2826                                  &dmin, &next_block, 0, 0);
2827                 }
2828                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2829                     s->mv_dir = 0;
2830                     s->mv_type = MV_TYPE_16X16;
2831                     s->mb_intra= 1;
2832                     s->mv[0][0][0] = 0;
2833                     s->mv[0][0][1] = 0;
2834                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2835                                  &dmin, &next_block, 0, 0);
2836                     if(s->h263_pred || s->h263_aic){
2837                         if(best_s.mb_intra)
2838                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2839                         else
2840                             ff_clean_intra_table_entries(s); //old mode?
2841                     }
2842                 }
2843
2844                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2845                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2846                         const int last_qp= backup_s.qscale;
2847                         int qpi, qp, dc[6];
2848                         int16_t ac[6][16];
2849                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2850                         static const int dquant_tab[4]={-1,1,-2,2};
2851
2852                         assert(backup_s.dquant == 0);
2853
2854                         //FIXME intra
2855                         s->mv_dir= best_s.mv_dir;
2856                         s->mv_type = MV_TYPE_16X16;
2857                         s->mb_intra= best_s.mb_intra;
2858                         s->mv[0][0][0] = best_s.mv[0][0][0];
2859                         s->mv[0][0][1] = best_s.mv[0][0][1];
2860                         s->mv[1][0][0] = best_s.mv[1][0][0];
2861                         s->mv[1][0][1] = best_s.mv[1][0][1];
2862
2863                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2864                         for(; qpi<4; qpi++){
2865                             int dquant= dquant_tab[qpi];
2866                             qp= last_qp + dquant;
2867                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2868                                 continue;
2869                             backup_s.dquant= dquant;
2870                             if(s->mb_intra && s->dc_val[0]){
2871                                 for(i=0; i<6; i++){
2872                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2873                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2874                                 }
2875                             }
2876
2877                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2878                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2879                             if(best_s.qscale != qp){
2880                                 if(s->mb_intra && s->dc_val[0]){
2881                                     for(i=0; i<6; i++){
2882                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2883                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2884                                     }
2885                                 }
2886                             }
2887                         }
2888                     }
2889                 }
2890                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2891                     int mx= s->b_direct_mv_table[xy][0];
2892                     int my= s->b_direct_mv_table[xy][1];
2893
2894                     backup_s.dquant = 0;
2895                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2896                     s->mb_intra= 0;
2897                     ff_mpeg4_set_direct_mv(s, mx, my);
2898                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2899                                  &dmin, &next_block, mx, my);
2900                 }
2901                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2902                     backup_s.dquant = 0;
2903                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2904                     s->mb_intra= 0;
2905                     ff_mpeg4_set_direct_mv(s, 0, 0);
2906                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2907                                  &dmin, &next_block, 0, 0);
2908                 }
2909                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2910                     int coded=0;
2911                     for(i=0; i<6; i++)
2912                         coded |= s->block_last_index[i];
2913                     if(coded){
2914                         int mx,my;
2915                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2916                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2917                             mx=my=0; //FIXME find the one we actually used
2918                             ff_mpeg4_set_direct_mv(s, mx, my);
2919                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2920                             mx= s->mv[1][0][0];
2921                             my= s->mv[1][0][1];
2922                         }else{
2923                             mx= s->mv[0][0][0];
2924                             my= s->mv[0][0][1];
2925                         }
2926
2927                         s->mv_dir= best_s.mv_dir;
2928                         s->mv_type = best_s.mv_type;
2929                         s->mb_intra= 0;
2930 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2931                         s->mv[0][0][1] = best_s.mv[0][0][1];
2932                         s->mv[1][0][0] = best_s.mv[1][0][0];
2933                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2934                         backup_s.dquant= 0;
2935                         s->skipdct=1;
2936                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2937                                         &dmin, &next_block, mx, my);
2938                         s->skipdct=0;
2939                     }
2940                 }
2941
2942                 s->current_picture.qscale_table[xy] = best_s.qscale;
2943
2944                 copy_context_after_encode(s, &best_s, -1);
2945
2946                 pb_bits_count= put_bits_count(&s->pb);
2947                 flush_put_bits(&s->pb);
2948                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2949                 s->pb= backup_s.pb;
2950
2951                 if(s->data_partitioning){
2952                     pb2_bits_count= put_bits_count(&s->pb2);
2953                     flush_put_bits(&s->pb2);
2954                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2955                     s->pb2= backup_s.pb2;
2956
2957                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2958                     flush_put_bits(&s->tex_pb);
2959                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2960                     s->tex_pb= backup_s.tex_pb;
2961                 }
2962                 s->last_bits= put_bits_count(&s->pb);
2963
2964                 if (CONFIG_H263_ENCODER &&
2965                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2966                     ff_h263_update_motion_val(s);
2967
2968                 if(next_block==0){ //FIXME 16 vs linesize16
2969                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2970                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2971                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2972                 }
2973
2974                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2975                     ff_mpv_decode_mb(s, s->block);
2976             } else {
2977                 int motion_x = 0, motion_y = 0;
2978                 s->mv_type=MV_TYPE_16X16;
2979                 // only one MB-Type possible
2980
2981                 switch(mb_type){
2982                 case CANDIDATE_MB_TYPE_INTRA:
2983                     s->mv_dir = 0;
2984                     s->mb_intra= 1;
2985                     motion_x= s->mv[0][0][0] = 0;
2986                     motion_y= s->mv[0][0][1] = 0;
2987                     break;
2988                 case CANDIDATE_MB_TYPE_INTER:
2989                     s->mv_dir = MV_DIR_FORWARD;
2990                     s->mb_intra= 0;
2991                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2992                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2993                     break;
2994                 case CANDIDATE_MB_TYPE_INTER_I:
2995                     s->mv_dir = MV_DIR_FORWARD;
2996                     s->mv_type = MV_TYPE_FIELD;
2997                     s->mb_intra= 0;
2998                     for(i=0; i<2; i++){
2999                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3000                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3001                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3002                     }
3003                     break;
3004                 case CANDIDATE_MB_TYPE_INTER4V:
3005                     s->mv_dir = MV_DIR_FORWARD;
3006                     s->mv_type = MV_TYPE_8X8;
3007                     s->mb_intra= 0;
3008                     for(i=0; i<4; i++){
3009                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3010                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3011                     }
3012                     break;
3013                 case CANDIDATE_MB_TYPE_DIRECT:
3014                     if (CONFIG_MPEG4_ENCODER) {
3015                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3016                         s->mb_intra= 0;
3017                         motion_x=s->b_direct_mv_table[xy][0];
3018                         motion_y=s->b_direct_mv_table[xy][1];
3019                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3020                     }
3021                     break;
3022                 case CANDIDATE_MB_TYPE_DIRECT0:
3023                     if (CONFIG_MPEG4_ENCODER) {
3024                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3025                         s->mb_intra= 0;
3026                         ff_mpeg4_set_direct_mv(s, 0, 0);
3027                     }
3028                     break;
3029                 case CANDIDATE_MB_TYPE_BIDIR:
3030                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3031                     s->mb_intra= 0;
3032                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3033                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3034                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3035                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3036                     break;
3037                 case CANDIDATE_MB_TYPE_BACKWARD:
3038                     s->mv_dir = MV_DIR_BACKWARD;
3039                     s->mb_intra= 0;
3040                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3041                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3042                     break;
3043                 case CANDIDATE_MB_TYPE_FORWARD:
3044                     s->mv_dir = MV_DIR_FORWARD;
3045                     s->mb_intra= 0;
3046                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3047                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3048                     break;
3049                 case CANDIDATE_MB_TYPE_FORWARD_I:
3050                     s->mv_dir = MV_DIR_FORWARD;
3051                     s->mv_type = MV_TYPE_FIELD;
3052                     s->mb_intra= 0;
3053                     for(i=0; i<2; i++){
3054                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3055                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3056                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3057                     }
3058                     break;
3059                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3060                     s->mv_dir = MV_DIR_BACKWARD;
3061                     s->mv_type = MV_TYPE_FIELD;
3062                     s->mb_intra= 0;
3063                     for(i=0; i<2; i++){
3064                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3065                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3066                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3067                     }
3068                     break;
3069                 case CANDIDATE_MB_TYPE_BIDIR_I:
3070                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3071                     s->mv_type = MV_TYPE_FIELD;
3072                     s->mb_intra= 0;
3073                     for(dir=0; dir<2; dir++){
3074                         for(i=0; i<2; i++){
3075                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3076                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3077                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3078                         }
3079                     }
3080                     break;
3081                 default:
3082                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3083                 }
3084
3085                 encode_mb(s, motion_x, motion_y);
3086
3087                 // RAL: Update last macroblock type
3088                 s->last_mv_dir = s->mv_dir;
3089
3090                 if (CONFIG_H263_ENCODER &&
3091                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3092                     ff_h263_update_motion_val(s);
3093
3094                 ff_mpv_decode_mb(s, s->block);
3095             }
3096
3097             /* clean the MV table in IPS frames for direct mode in B frames */
3098             if(s->mb_intra /* && I,P,S_TYPE */){
3099                 s->p_mv_table[xy][0]=0;
3100                 s->p_mv_table[xy][1]=0;
3101             }
3102
3103             if(s->flags&CODEC_FLAG_PSNR){
3104                 int w= 16;
3105                 int h= 16;
3106
3107                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3108                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3109
3110                 s->current_picture.f->error[0] += sse(
3111                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3112                     s->dest[0], w, h, s->linesize);
3113                 s->current_picture.f->error[1] += sse(
3114                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3115                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3116                 s->current_picture.f->error[2] += sse(
3117                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3118                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3119             }
3120             if(s->loop_filter){
3121                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3122                     ff_h263_loop_filter(s);
3123             }
3124             av_dlog(s->avctx, "MB %d %d bits\n",
3125                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3126         }
3127     }
3128
3129     //not beautiful here but we must write it before flushing so it has to be here
3130     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3131         ff_msmpeg4_encode_ext_header(s);
3132
3133     write_slice_end(s);
3134
3135     /* Send the last GOB if RTP */
3136     if (s->avctx->rtp_callback) {
3137         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3138         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3139         /* Call the RTP callback to send the last GOB */
3140         emms_c();
3141         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3142     }
3143
3144     return 0;
3145 }
3146
3147 #define MERGE(field) dst->field += src->field; src->field=0
3148 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3149     MERGE(me.scene_change_score);
3150     MERGE(me.mc_mb_var_sum_temp);
3151     MERGE(me.mb_var_sum_temp);
3152 }
3153
3154 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3155     int i;
3156
3157     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3158     MERGE(dct_count[1]);
3159     MERGE(mv_bits);
3160     MERGE(i_tex_bits);
3161     MERGE(p_tex_bits);
3162     MERGE(i_count);
3163     MERGE(f_count);
3164     MERGE(b_count);
3165     MERGE(skip_count);
3166     MERGE(misc_bits);
3167     MERGE(er.error_count);
3168     MERGE(padding_bug_score);
3169     MERGE(current_picture.f->error[0]);
3170     MERGE(current_picture.f->error[1]);
3171     MERGE(current_picture.f->error[2]);
3172
3173     if(dst->avctx->noise_reduction){
3174         for(i=0; i<64; i++){
3175             MERGE(dct_error_sum[0][i]);
3176             MERGE(dct_error_sum[1][i]);
3177         }
3178     }
3179
3180     assert(put_bits_count(&src->pb) % 8 ==0);
3181     assert(put_bits_count(&dst->pb) % 8 ==0);
3182     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3183     flush_put_bits(&dst->pb);
3184 }
3185
3186 static int estimate_qp(MpegEncContext *s, int dry_run){
3187     if (s->next_lambda){
3188         s->current_picture_ptr->f->quality =
3189         s->current_picture.f->quality = s->next_lambda;
3190         if(!dry_run) s->next_lambda= 0;
3191     } else if (!s->fixed_qscale) {
3192         s->current_picture_ptr->f->quality =
3193         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3194         if (s->current_picture.f->quality < 0)
3195             return -1;
3196     }
3197
3198     if(s->adaptive_quant){
3199         switch(s->codec_id){
3200         case AV_CODEC_ID_MPEG4:
3201             if (CONFIG_MPEG4_ENCODER)
3202                 ff_clean_mpeg4_qscales(s);
3203             break;
3204         case AV_CODEC_ID_H263:
3205         case AV_CODEC_ID_H263P:
3206         case AV_CODEC_ID_FLV1:
3207             if (CONFIG_H263_ENCODER)
3208                 ff_clean_h263_qscales(s);
3209             break;
3210         default:
3211             ff_init_qscale_tab(s);
3212         }
3213
3214         s->lambda= s->lambda_table[0];
3215         //FIXME broken
3216     }else
3217         s->lambda = s->current_picture.f->quality;
3218     update_qscale(s);
3219     return 0;
3220 }
3221
3222 /* must be called before writing the header */
3223 static void set_frame_distances(MpegEncContext * s){
3224     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3225     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3226
3227     if(s->pict_type==AV_PICTURE_TYPE_B){
3228         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3229         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3230     }else{
3231         s->pp_time= s->time - s->last_non_b_time;
3232         s->last_non_b_time= s->time;
3233         assert(s->picture_number==0 || s->pp_time > 0);
3234     }
3235 }
3236
3237 static int encode_picture(MpegEncContext *s, int picture_number)
3238 {
3239     int i, ret;
3240     int bits;
3241     int context_count = s->slice_context_count;
3242
3243     s->picture_number = picture_number;
3244
3245     /* Reset the average MB variance */
3246     s->me.mb_var_sum_temp    =
3247     s->me.mc_mb_var_sum_temp = 0;
3248
3249     /* we need to initialize some time vars before we can encode b-frames */
3250     // RAL: Condition added for MPEG1VIDEO
3251     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3252         set_frame_distances(s);
3253     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3254         ff_set_mpeg4_time(s);
3255
3256     s->me.scene_change_score=0;
3257
3258 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3259
3260     if(s->pict_type==AV_PICTURE_TYPE_I){
3261         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3262         else                        s->no_rounding=0;
3263     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3264         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3265             s->no_rounding ^= 1;
3266     }
3267
3268     if(s->flags & CODEC_FLAG_PASS2){
3269         if (estimate_qp(s,1) < 0)
3270             return -1;
3271         ff_get_2pass_fcode(s);
3272     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3273         if(s->pict_type==AV_PICTURE_TYPE_B)
3274             s->lambda= s->last_lambda_for[s->pict_type];
3275         else
3276             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3277         update_qscale(s);
3278     }
3279
3280     s->mb_intra=0; //for the rate distortion & bit compare functions
3281     for(i=1; i<context_count; i++){
3282         ret = ff_update_duplicate_context(s->thread_context[i], s);
3283         if (ret < 0)
3284             return ret;
3285     }
3286
3287     if(ff_init_me(s)<0)
3288         return -1;
3289
3290     /* Estimate motion for every MB */
3291     if(s->pict_type != AV_PICTURE_TYPE_I){
3292         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3293         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3294         if (s->pict_type != AV_PICTURE_TYPE_B) {
3295             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3296                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3297             }
3298         }
3299
3300         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3301     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3302         /* I-Frame */
3303         for(i=0; i<s->mb_stride*s->mb_height; i++)
3304             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3305
3306         if(!s->fixed_qscale){
3307             /* finding spatial complexity for I-frame rate control */
3308             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3309         }
3310     }
3311     for(i=1; i<context_count; i++){
3312         merge_context_after_me(s, s->thread_context[i]);
3313     }
3314     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3315     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3316     emms_c();
3317
3318     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3319         s->pict_type= AV_PICTURE_TYPE_I;
3320         for(i=0; i<s->mb_stride*s->mb_height; i++)
3321             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3322         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3323                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3324     }
3325
3326     if(!s->umvplus){
3327         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3328             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3329
3330             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3331                 int a,b;
3332                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3333                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3334                 s->f_code= FFMAX3(s->f_code, a, b);
3335             }
3336
3337             ff_fix_long_p_mvs(s);
3338             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3339             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3340                 int j;
3341                 for(i=0; i<2; i++){
3342                     for(j=0; j<2; j++)
3343                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3344                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3345                 }
3346             }
3347         }
3348
3349         if(s->pict_type==AV_PICTURE_TYPE_B){
3350             int a, b;
3351
3352             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3353             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3354             s->f_code = FFMAX(a, b);
3355
3356             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3357             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3358             s->b_code = FFMAX(a, b);
3359
3360             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3361             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3362             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3363             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3364             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3365                 int dir, j;
3366                 for(dir=0; dir<2; dir++){
3367                     for(i=0; i<2; i++){
3368                         for(j=0; j<2; j++){
3369                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3370                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3371                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3372                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3373                         }
3374                     }
3375                 }
3376             }
3377         }
3378     }
3379
3380     if (estimate_qp(s, 0) < 0)
3381         return -1;
3382
3383     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3384         s->qscale= 3; //reduce clipping problems
3385
3386     if (s->out_format == FMT_MJPEG) {
3387         /* for mjpeg, we do include qscale in the matrix */
3388         for(i=1;i<64;i++){
3389             int j = s->idsp.idct_permutation[i];
3390
3391             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3392         }
3393         s->y_dc_scale_table=
3394         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3395         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3396         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3397                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3398         s->qscale= 8;
3399     }
3400
3401     //FIXME var duplication
3402     s->current_picture_ptr->f->key_frame =
3403     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3404     s->current_picture_ptr->f->pict_type =
3405     s->current_picture.f->pict_type = s->pict_type;
3406
3407     if (s->current_picture.f->key_frame)
3408         s->picture_in_gop_number=0;
3409
3410     s->last_bits= put_bits_count(&s->pb);
3411     switch(s->out_format) {
3412     case FMT_MJPEG:
3413         if (CONFIG_MJPEG_ENCODER)
3414             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3415                                            s->intra_matrix);
3416         break;
3417     case FMT_H261:
3418         if (CONFIG_H261_ENCODER)
3419             ff_h261_encode_picture_header(s, picture_number);
3420         break;
3421     case FMT_H263:
3422         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3423             ff_wmv2_encode_picture_header(s, picture_number);
3424         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3425             ff_msmpeg4_encode_picture_header(s, picture_number);
3426         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3427             ff_mpeg4_encode_picture_header(s, picture_number);
3428         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3429             ff_rv10_encode_picture_header(s, picture_number);
3430         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3431             ff_rv20_encode_picture_header(s, picture_number);
3432         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3433             ff_flv_encode_picture_header(s, picture_number);
3434         else if (CONFIG_H263_ENCODER)
3435             ff_h263_encode_picture_header(s, picture_number);
3436         break;
3437     case FMT_MPEG1:
3438         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3439             ff_mpeg1_encode_picture_header(s, picture_number);
3440         break;
3441     default:
3442         assert(0);
3443     }
3444     bits= put_bits_count(&s->pb);
3445     s->header_bits= bits - s->last_bits;
3446
3447     for(i=1; i<context_count; i++){
3448         update_duplicate_context_after_me(s->thread_context[i], s);
3449     }
3450     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3451     for(i=1; i<context_count; i++){
3452         merge_context_after_encode(s, s->thread_context[i]);
3453     }
3454     emms_c();
3455     return 0;
3456 }
3457
3458 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3459     const int intra= s->mb_intra;
3460     int i;
3461
3462     s->dct_count[intra]++;
3463
3464     for(i=0; i<64; i++){
3465         int level= block[i];
3466
3467         if(level){
3468             if(level>0){
3469                 s->dct_error_sum[intra][i] += level;
3470                 level -= s->dct_offset[intra][i];
3471                 if(level<0) level=0;
3472             }else{
3473                 s->dct_error_sum[intra][i] -= level;
3474                 level += s->dct_offset[intra][i];
3475                 if(level>0) level=0;
3476             }
3477             block[i]= level;
3478         }
3479     }
3480 }
3481
3482 static int dct_quantize_trellis_c(MpegEncContext *s,
3483                                   int16_t *block, int n,
3484                                   int qscale, int *overflow){
3485     const int *qmat;
3486     const uint8_t *scantable= s->intra_scantable.scantable;
3487     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3488     int max=0;
3489     unsigned int threshold1, threshold2;
3490     int bias=0;
3491     int run_tab[65];
3492     int level_tab[65];
3493     int score_tab[65];
3494     int survivor[65];
3495     int survivor_count;
3496     int last_run=0;
3497     int last_level=0;
3498     int last_score= 0;
3499     int last_i;
3500     int coeff[2][64];
3501     int coeff_count[64];
3502     int qmul, qadd, start_i, last_non_zero, i, dc;
3503     const int esc_length= s->ac_esc_length;
3504     uint8_t * length;
3505     uint8_t * last_length;
3506     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3507
3508     s->fdsp.fdct(block);
3509
3510     if(s->dct_error_sum)
3511         s->denoise_dct(s, block);
3512     qmul= qscale*16;
3513     qadd= ((qscale-1)|1)*8;
3514
3515     if (s->mb_intra) {
3516         int q;
3517         if (!s->h263_aic) {
3518             if (n < 4)
3519                 q = s->y_dc_scale;
3520             else
3521                 q = s->c_dc_scale;
3522             q = q << 3;
3523         } else{
3524             /* For AIC we skip quant/dequant of INTRADC */
3525             q = 1 << 3;
3526             qadd=0;
3527         }
3528
3529         /* note: block[0] is assumed to be positive */
3530         block[0] = (block[0] + (q >> 1)) / q;
3531         start_i = 1;
3532         last_non_zero = 0;
3533         qmat = s->q_intra_matrix[qscale];
3534         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3535             bias= 1<<(QMAT_SHIFT-1);
3536         length     = s->intra_ac_vlc_length;
3537         last_length= s->intra_ac_vlc_last_length;
3538     } else {
3539         start_i = 0;
3540         last_non_zero = -1;
3541         qmat = s->q_inter_matrix[qscale];
3542         length     = s->inter_ac_vlc_length;
3543         last_length= s->inter_ac_vlc_last_length;
3544     }
3545     last_i= start_i;
3546
3547     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3548     threshold2= (threshold1<<1);
3549
3550     for(i=63; i>=start_i; i--) {
3551         const int j = scantable[i];
3552         int level = block[j] * qmat[j];
3553
3554         if(((unsigned)(level+threshold1))>threshold2){
3555             last_non_zero = i;
3556             break;
3557         }
3558     }
3559
3560     for(i=start_i; i<=last_non_zero; i++) {
3561         const int j = scantable[i];
3562         int level = block[j] * qmat[j];
3563
3564 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3565 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3566         if(((unsigned)(level+threshold1))>threshold2){
3567             if(level>0){
3568                 level= (bias + level)>>QMAT_SHIFT;
3569                 coeff[0][i]= level;
3570                 coeff[1][i]= level-1;
3571 //                coeff[2][k]= level-2;
3572             }else{
3573                 level= (bias - level)>>QMAT_SHIFT;
3574                 coeff[0][i]= -level;
3575                 coeff[1][i]= -level+1;
3576 //                coeff[2][k]= -level+2;
3577             }
3578             coeff_count[i]= FFMIN(level, 2);
3579             assert(coeff_count[i]);
3580             max |=level;
3581         }else{
3582             coeff[0][i]= (level>>31)|1;
3583             coeff_count[i]= 1;
3584         }
3585     }
3586
3587     *overflow= s->max_qcoeff < max; //overflow might have happened
3588
3589     if(last_non_zero < start_i){
3590         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3591         return last_non_zero;
3592     }
3593
3594     score_tab[start_i]= 0;
3595     survivor[0]= start_i;
3596     survivor_count= 1;
3597
3598     for(i=start_i; i<=last_non_zero; i++){
3599         int level_index, j, zero_distortion;
3600         int dct_coeff= FFABS(block[ scantable[i] ]);
3601         int best_score=256*256*256*120;
3602
3603         if (s->fdsp.fdct == ff_fdct_ifast)
3604             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3605         zero_distortion= dct_coeff*dct_coeff;
3606
3607         for(level_index=0; level_index < coeff_count[i]; level_index++){
3608             int distortion;
3609             int level= coeff[level_index][i];
3610             const int alevel= FFABS(level);
3611             int unquant_coeff;
3612
3613             assert(level);
3614
3615             if(s->out_format == FMT_H263){
3616                 unquant_coeff= alevel*qmul + qadd;
3617             }else{ //MPEG1
3618                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3619                 if(s->mb_intra){
3620                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3621                         unquant_coeff =   (unquant_coeff - 1) | 1;
3622                 }else{
3623                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3624                         unquant_coeff =   (unquant_coeff - 1) | 1;
3625                 }
3626                 unquant_coeff<<= 3;
3627             }
3628
3629             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3630             level+=64;
3631             if((level&(~127)) == 0){
3632                 for(j=survivor_count-1; j>=0; j--){
3633                     int run= i - survivor[j];
3634                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3635                     score += score_tab[i-run];
3636
3637                     if(score < best_score){
3638                         best_score= score;
3639                         run_tab[i+1]= run;
3640                         level_tab[i+1]= level-64;
3641                     }
3642                 }
3643
3644                 if(s->out_format == FMT_H263){
3645                     for(j=survivor_count-1; j>=0; j--){
3646                         int run= i - survivor[j];
3647                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3648                         score += score_tab[i-run];
3649                         if(score < last_score){
3650                             last_score= score;
3651                             last_run= run;
3652                             last_level= level-64;
3653                             last_i= i+1;
3654                         }
3655                     }
3656                 }
3657             }else{
3658                 distortion += esc_length*lambda;
3659                 for(j=survivor_count-1; j>=0; j--){
3660                     int run= i - survivor[j];
3661                     int score= distortion + score_tab[i-run];
3662
3663                     if(score < best_score){
3664                         best_score= score;
3665                         run_tab[i+1]= run;
3666                         level_tab[i+1]= level-64;
3667                     }
3668                 }
3669
3670                 if(s->out_format == FMT_H263){
3671                   for(j=survivor_count-1; j>=0; j--){
3672                         int run= i - survivor[j];
3673                         int score= distortion + score_tab[i-run];
3674                         if(score < last_score){
3675                             last_score= score;
3676                             last_run= run;
3677                             last_level= level-64;
3678                             last_i= i+1;
3679                         }
3680                     }
3681                 }
3682             }
3683         }
3684
3685         score_tab[i+1]= best_score;
3686
3687         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3688         if(last_non_zero <= 27){
3689             for(; survivor_count; survivor_count--){
3690                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3691                     break;
3692             }
3693         }else{
3694             for(; survivor_count; survivor_count--){
3695                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3696                     break;
3697             }
3698         }
3699
3700         survivor[ survivor_count++ ]= i+1;
3701     }
3702
3703     if(s->out_format != FMT_H263){
3704         last_score= 256*256*256*120;
3705         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3706             int score= score_tab[i];
3707             if(i) score += lambda*2; //FIXME exacter?
3708
3709             if(score < last_score){
3710                 last_score= score;
3711                 last_i= i;
3712                 last_level= level_tab[i];
3713                 last_run= run_tab[i];
3714             }
3715         }
3716     }
3717
3718     s->coded_score[n] = last_score;
3719
3720     dc= FFABS(block[0]);
3721     last_non_zero= last_i - 1;
3722     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3723
3724     if(last_non_zero < start_i)
3725         return last_non_zero;
3726
3727     if(last_non_zero == 0 && start_i == 0){
3728         int best_level= 0;
3729         int best_score= dc * dc;
3730
3731         for(i=0; i<coeff_count[0]; i++){
3732             int level= coeff[i][0];
3733             int alevel= FFABS(level);
3734             int unquant_coeff, score, distortion;
3735
3736             if(s->out_format == FMT_H263){
3737                     unquant_coeff= (alevel*qmul + qadd)>>3;
3738             }else{ //MPEG1
3739                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3740                     unquant_coeff =   (unquant_coeff - 1) | 1;
3741             }
3742             unquant_coeff = (unquant_coeff + 4) >> 3;
3743             unquant_coeff<<= 3 + 3;
3744
3745             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3746             level+=64;
3747             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3748             else                    score= distortion + esc_length*lambda;
3749
3750             if(score < best_score){
3751                 best_score= score;
3752                 best_level= level - 64;
3753             }
3754         }
3755         block[0]= best_level;
3756         s->coded_score[n] = best_score - dc*dc;
3757         if(best_level == 0) return -1;
3758         else                return last_non_zero;
3759     }
3760
3761     i= last_i;
3762     assert(last_level);
3763
3764     block[ perm_scantable[last_non_zero] ]= last_level;
3765     i -= last_run + 1;
3766
3767     for(; i>start_i; i -= run_tab[i] + 1){
3768         block[ perm_scantable[i-1] ]= level_tab[i];
3769     }
3770
3771     return last_non_zero;
3772 }
3773
3774 //#define REFINE_STATS 1
3775 static int16_t basis[64][64];
3776
3777 static void build_basis(uint8_t *perm){
3778     int i, j, x, y;
3779     emms_c();
3780     for(i=0; i<8; i++){
3781         for(j=0; j<8; j++){
3782             for(y=0; y<8; y++){
3783                 for(x=0; x<8; x++){
3784                     double s= 0.25*(1<<BASIS_SHIFT);
3785                     int index= 8*i + j;
3786                     int perm_index= perm[index];
3787                     if(i==0) s*= sqrt(0.5);
3788                     if(j==0) s*= sqrt(0.5);
3789                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3790                 }
3791             }
3792         }
3793     }
3794 }
3795
3796 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3797                         int16_t *block, int16_t *weight, int16_t *orig,
3798                         int n, int qscale){
3799     int16_t rem[64];
3800     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3801     const uint8_t *scantable= s->intra_scantable.scantable;
3802     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3803 //    unsigned int threshold1, threshold2;
3804 //    int bias=0;
3805     int run_tab[65];
3806     int prev_run=0;
3807     int prev_level=0;
3808     int qmul, qadd, start_i, last_non_zero, i, dc;
3809     uint8_t * length;
3810     uint8_t * last_length;
3811     int lambda;
3812     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3813 #ifdef REFINE_STATS
3814 static int count=0;
3815 static int after_last=0;
3816 static int to_zero=0;
3817 static int from_zero=0;
3818 static int raise=0;
3819 static int lower=0;
3820 static int messed_sign=0;
3821 #endif
3822
3823     if(basis[0][0] == 0)
3824         build_basis(s->idsp.idct_permutation);
3825
3826     qmul= qscale*2;
3827     qadd= (qscale-1)|1;
3828     if (s->mb_intra) {
3829         if (!s->h263_aic) {
3830             if (n < 4)
3831                 q = s->y_dc_scale;
3832             else
3833                 q = s->c_dc_scale;
3834         } else{
3835             /* For AIC we skip quant/dequant of INTRADC */
3836             q = 1;
3837             qadd=0;
3838         }
3839         q <<= RECON_SHIFT-3;
3840         /* note: block[0] is assumed to be positive */
3841         dc= block[0]*q;
3842 //        block[0] = (block[0] + (q >> 1)) / q;
3843         start_i = 1;
3844 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3845 //            bias= 1<<(QMAT_SHIFT-1);
3846         length     = s->intra_ac_vlc_length;
3847         last_length= s->intra_ac_vlc_last_length;
3848     } else {
3849         dc= 0;
3850         start_i = 0;
3851         length     = s->inter_ac_vlc_length;
3852         last_length= s->inter_ac_vlc_last_length;
3853     }
3854     last_non_zero = s->block_last_index[n];
3855
3856 #ifdef REFINE_STATS
3857 {START_TIMER
3858 #endif
3859     dc += (1<<(RECON_SHIFT-1));
3860     for(i=0; i<64; i++){
3861         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3862     }
3863 #ifdef REFINE_STATS
3864 STOP_TIMER("memset rem[]")}
3865 #endif
3866     sum=0;
3867     for(i=0; i<64; i++){
3868         int one= 36;
3869         int qns=4;
3870         int w;
3871
3872         w= FFABS(weight[i]) + qns*one;
3873         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3874
3875         weight[i] = w;
3876 //        w=weight[i] = (63*qns + (w/2)) / w;
3877
3878         assert(w>0);
3879         assert(w<(1<<6));
3880         sum += w*w;
3881     }
3882     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3883 #ifdef REFINE_STATS
3884 {START_TIMER
3885 #endif
3886     run=0;
3887     rle_index=0;
3888     for(i=start_i; i<=last_non_zero; i++){
3889         int j= perm_scantable[i];
3890         const int level= block[j];
3891         int coeff;
3892
3893         if(level){
3894             if(level<0) coeff= qmul*level - qadd;
3895             else        coeff= qmul*level + qadd;
3896             run_tab[rle_index++]=run;
3897             run=0;
3898
3899             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3900         }else{
3901             run++;
3902         }
3903     }
3904 #ifdef REFINE_STATS
3905 if(last_non_zero>0){
3906 STOP_TIMER("init rem[]")
3907 }
3908 }
3909
3910 {START_TIMER
3911 #endif
3912     for(;;){
3913         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3914         int best_coeff=0;
3915         int best_change=0;
3916         int run2, best_unquant_change=0, analyze_gradient;
3917 #ifdef REFINE_STATS
3918 {START_TIMER
3919 #endif
3920         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3921
3922         if(analyze_gradient){
3923 #ifdef REFINE_STATS
3924 {START_TIMER
3925 #endif
3926             for(i=0; i<64; i++){
3927                 int w= weight[i];
3928
3929                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3930             }
3931 #ifdef REFINE_STATS
3932 STOP_TIMER("rem*w*w")}
3933 {START_TIMER
3934 #endif
3935             s->fdsp.fdct(d1);
3936 #ifdef REFINE_STATS
3937 STOP_TIMER("dct")}
3938 #endif
3939         }
3940
3941         if(start_i){
3942             const int level= block[0];
3943             int change, old_coeff;
3944
3945             assert(s->mb_intra);
3946
3947             old_coeff= q*level;
3948
3949             for(change=-1; change<=1; change+=2){
3950                 int new_level= level + change;
3951                 int score, new_coeff;
3952
3953                 new_coeff= q*new_level;
3954                 if(new_coeff >= 2048 || new_coeff < 0)
3955                     continue;
3956
3957                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3958                                                   new_coeff - old_coeff);
3959                 if(score<best_score){
3960                     best_score= score;
3961                     best_coeff= 0;
3962                     best_change= change;
3963                     best_unquant_change= new_coeff - old_coeff;
3964                 }
3965             }
3966         }
3967
3968         run=0;
3969         rle_index=0;
3970         run2= run_tab[rle_index++];
3971         prev_level=0;
3972         prev_run=0;
3973
3974         for(i=start_i; i<64; i++){
3975             int j= perm_scantable[i];
3976             const int level= block[j];
3977             int change, old_coeff;
3978
3979             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3980                 break;
3981
3982             if(level){
3983                 if(level<0) old_coeff= qmul*level - qadd;
3984                 else        old_coeff= qmul*level + qadd;
3985                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3986             }else{
3987                 old_coeff=0;
3988                 run2--;
3989                 assert(run2>=0 || i >= last_non_zero );
3990             }
3991
3992             for(change=-1; change<=1; change+=2){
3993                 int new_level= level + change;
3994                 int score, new_coeff, unquant_change;
3995
3996                 score=0;
3997                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3998                    continue;
3999
4000                 if(new_level){
4001                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4002                     else            new_coeff= qmul*new_level + qadd;
4003                     if(new_coeff >= 2048 || new_coeff <= -2048)
4004                         continue;
4005                     //FIXME check for overflow
4006
4007                     if(level){
4008                         if(level < 63 && level > -63){
4009                             if(i < last_non_zero)
4010                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4011                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4012                             else
4013                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4014                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4015                         }
4016                     }else{
4017                         assert(FFABS(new_level)==1);
4018
4019                         if(analyze_gradient){
4020                             int g= d1[ scantable[i] ];
4021                             if(g && (g^new_level) >= 0)
4022                                 continue;
4023                         }
4024
4025                         if(i < last_non_zero){
4026                             int next_i= i + run2 + 1;
4027                             int next_level= block[ perm_scantable[next_i] ] + 64;
4028
4029                             if(next_level&(~127))
4030                                 next_level= 0;
4031
4032                             if(next_i < last_non_zero)
4033                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4034                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4035                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4036                             else
4037                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4038                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4039                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4040                         }else{
4041                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4042                             if(prev_level){
4043                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4044                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4045                             }
4046                         }
4047                     }
4048                 }else{
4049                     new_coeff=0;
4050                     assert(FFABS(level)==1);
4051
4052                     if(i < last_non_zero){
4053                         int next_i= i + run2 + 1;
4054                         int next_level= block[ perm_scantable[next_i] ] + 64;
4055
4056                         if(next_level&(~127))
4057                             next_level= 0;
4058
4059                         if(next_i < last_non_zero)
4060                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4061                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4062                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4063                         else
4064                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4065                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4066                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4067                     }else{
4068                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4069                         if(prev_level){
4070                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4071                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4072                         }
4073                     }
4074                 }
4075
4076                 score *= lambda;
4077
4078                 unquant_change= new_coeff - old_coeff;
4079                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4080
4081                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4082                                                    unquant_change);
4083                 if(score<best_score){
4084                     best_score= score;
4085                     best_coeff= i;
4086                     best_change= change;
4087                     best_unquant_change= unquant_change;
4088                 }
4089             }
4090             if(level){
4091                 prev_level= level + 64;
4092                 if(prev_level&(~127))
4093                     prev_level= 0;
4094                 prev_run= run;
4095                 run=0;
4096             }else{
4097                 run++;
4098             }
4099         }
4100 #ifdef REFINE_STATS
4101 STOP_TIMER("iterative step")}
4102 #endif
4103
4104         if(best_change){
4105             int j= perm_scantable[ best_coeff ];
4106
4107             block[j] += best_change;
4108
4109             if(best_coeff > last_non_zero){
4110                 last_non_zero= best_coeff;
4111                 assert(block[j]);
4112 #ifdef REFINE_STATS
4113 after_last++;
4114 #endif
4115             }else{
4116 #ifdef REFINE_STATS
4117 if(block[j]){
4118     if(block[j] - best_change){
4119         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4120             raise++;
4121         }else{
4122             lower++;
4123         }
4124     }else{
4125         from_zero++;
4126     }
4127 }else{
4128     to_zero++;
4129 }
4130 #endif
4131                 for(; last_non_zero>=start_i; last_non_zero--){
4132                     if(block[perm_scantable[last_non_zero]])
4133                         break;
4134                 }
4135             }
4136 #ifdef REFINE_STATS
4137 count++;
4138 if(256*256*256*64 % count == 0){
4139     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4140 }
4141 #endif
4142             run=0;
4143             rle_index=0;
4144             for(i=start_i; i<=last_non_zero; i++){
4145                 int j= perm_scantable[i];
4146                 const int level= block[j];
4147
4148                  if(level){
4149                      run_tab[rle_index++]=run;
4150                      run=0;
4151                  }else{
4152                      run++;
4153                  }
4154             }
4155
4156             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4157         }else{
4158             break;
4159         }
4160     }
4161 #ifdef REFINE_STATS
4162 if(last_non_zero>0){
4163 STOP_TIMER("iterative search")
4164 }
4165 }
4166 #endif
4167
4168     return last_non_zero;
4169 }
4170
4171 int ff_dct_quantize_c(MpegEncContext *s,
4172                         int16_t *block, int n,
4173                         int qscale, int *overflow)
4174 {
4175     int i, j, level, last_non_zero, q, start_i;
4176     const int *qmat;
4177     const uint8_t *scantable= s->intra_scantable.scantable;
4178     int bias;
4179     int max=0;
4180     unsigned int threshold1, threshold2;
4181
4182     s->fdsp.fdct(block);
4183
4184     if(s->dct_error_sum)
4185         s->denoise_dct(s, block);
4186
4187     if (s->mb_intra) {
4188         if (!s->h263_aic) {
4189             if (n < 4)
4190                 q = s->y_dc_scale;
4191             else
4192                 q = s->c_dc_scale;
4193             q = q << 3;
4194         } else
4195             /* For AIC we skip quant/dequant of INTRADC */
4196             q = 1 << 3;
4197
4198         /* note: block[0] is assumed to be positive */
4199         block[0] = (block[0] + (q >> 1)) / q;
4200         start_i = 1;
4201         last_non_zero = 0;
4202         qmat = s->q_intra_matrix[qscale];
4203         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4204     } else {
4205         start_i = 0;
4206         last_non_zero = -1;
4207         qmat = s->q_inter_matrix[qscale];
4208         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4209     }
4210     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4211     threshold2= (threshold1<<1);
4212     for(i=63;i>=start_i;i--) {
4213         j = scantable[i];
4214         level = block[j] * qmat[j];
4215
4216         if(((unsigned)(level+threshold1))>threshold2){
4217             last_non_zero = i;
4218             break;
4219         }else{
4220             block[j]=0;
4221         }
4222     }
4223     for(i=start_i; i<=last_non_zero; i++) {
4224         j = scantable[i];
4225         level = block[j] * qmat[j];
4226
4227 //        if(   bias+level >= (1<<QMAT_SHIFT)
4228 //           || bias-level >= (1<<QMAT_SHIFT)){
4229         if(((unsigned)(level+threshold1))>threshold2){
4230             if(level>0){
4231                 level= (bias + level)>>QMAT_SHIFT;
4232                 block[j]= level;
4233             }else{
4234                 level= (bias - level)>>QMAT_SHIFT;
4235                 block[j]= -level;
4236             }
4237             max |=level;
4238         }else{
4239             block[j]=0;
4240         }
4241     }
4242     *overflow= s->max_qcoeff < max; //overflow might have happened
4243
4244     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4245     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4246         ff_block_permute(block, s->idsp.idct_permutation,
4247                          scantable, last_non_zero);
4248
4249     return last_non_zero;
4250 }
4251
4252 #define OFFSET(x) offsetof(MpegEncContext, x)
4253 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4254 static const AVOption h263_options[] = {
4255     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4256     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4257     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4258     FF_MPV_COMMON_OPTS
4259     { NULL },
4260 };
4261
4262 static const AVClass h263_class = {
4263     .class_name = "H.263 encoder",
4264     .item_name  = av_default_item_name,
4265     .option     = h263_options,
4266     .version    = LIBAVUTIL_VERSION_INT,
4267 };
4268
4269 AVCodec ff_h263_encoder = {
4270     .name           = "h263",
4271     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4272     .type           = AVMEDIA_TYPE_VIDEO,
4273     .id             = AV_CODEC_ID_H263,
4274     .priv_data_size = sizeof(MpegEncContext),
4275     .init           = ff_mpv_encode_init,
4276     .encode2        = ff_mpv_encode_picture,
4277     .close          = ff_mpv_encode_end,
4278     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4279     .priv_class     = &h263_class,
4280 };
4281
4282 static const AVOption h263p_options[] = {
4283     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4284     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4285     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4286     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4287     FF_MPV_COMMON_OPTS
4288     { NULL },
4289 };
4290 static const AVClass h263p_class = {
4291     .class_name = "H.263p encoder",
4292     .item_name  = av_default_item_name,
4293     .option     = h263p_options,
4294     .version    = LIBAVUTIL_VERSION_INT,
4295 };
4296
4297 AVCodec ff_h263p_encoder = {
4298     .name           = "h263p",
4299     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4300     .type           = AVMEDIA_TYPE_VIDEO,
4301     .id             = AV_CODEC_ID_H263P,
4302     .priv_data_size = sizeof(MpegEncContext),
4303     .init           = ff_mpv_encode_init,
4304     .encode2        = ff_mpv_encode_picture,
4305     .close          = ff_mpv_encode_end,
4306     .capabilities   = CODEC_CAP_SLICE_THREADS,
4307     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4308     .priv_class     = &h263p_class,
4309 };
4310
4311 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4312
4313 AVCodec ff_msmpeg4v2_encoder = {
4314     .name           = "msmpeg4v2",
4315     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4316     .type           = AVMEDIA_TYPE_VIDEO,
4317     .id             = AV_CODEC_ID_MSMPEG4V2,
4318     .priv_data_size = sizeof(MpegEncContext),
4319     .init           = ff_mpv_encode_init,
4320     .encode2        = ff_mpv_encode_picture,
4321     .close          = ff_mpv_encode_end,
4322     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4323     .priv_class     = &msmpeg4v2_class,
4324 };
4325
4326 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4327
4328 AVCodec ff_msmpeg4v3_encoder = {
4329     .name           = "msmpeg4",
4330     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4331     .type           = AVMEDIA_TYPE_VIDEO,
4332     .id             = AV_CODEC_ID_MSMPEG4V3,
4333     .priv_data_size = sizeof(MpegEncContext),
4334     .init           = ff_mpv_encode_init,
4335     .encode2        = ff_mpv_encode_picture,
4336     .close          = ff_mpv_encode_end,
4337     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4338     .priv_class     = &msmpeg4v3_class,
4339 };
4340
4341 FF_MPV_GENERIC_CLASS(wmv1)
4342
4343 AVCodec ff_wmv1_encoder = {
4344     .name           = "wmv1",
4345     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4346     .type           = AVMEDIA_TYPE_VIDEO,
4347     .id             = AV_CODEC_ID_WMV1,
4348     .priv_data_size = sizeof(MpegEncContext),
4349     .init           = ff_mpv_encode_init,
4350     .encode2        = ff_mpv_encode_picture,
4351     .close          = ff_mpv_encode_end,
4352     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4353     .priv_class     = &wmv1_class,
4354 };