]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
lavc: make lmax/lmin into private options of mpegvideo encoders
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92 #if CONFIG_FAANDCT
93             fdsp->fdct == ff_faandct            ||
94 #endif /* CONFIG_FAANDCT */
95             fdsp->fdct == ff_jpeg_fdct_islow_10) {
96             for (i = 0; i < 64; i++) {
97                 const int j = s->idsp.idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
105                                         (qscale * quant_matrix[j]));
106             }
107         } else if (fdsp->fdct == ff_fdct_ifast) {
108             for (i = 0; i < 64; i++) {
109                 const int j = s->idsp.idct_permutation[i];
110                 /* 16 <= qscale * quant_matrix[i] <= 7905
111                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
112                  *             19952 <=              x  <= 249205026
113                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
114                  *           3444240 >= (1 << 36) / (x) >= 275 */
115
116                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
117                                         (ff_aanscales[i] * qscale *
118                                          quant_matrix[j]));
119             }
120         } else {
121             for (i = 0; i < 64; i++) {
122                 const int j = s->idsp.idct_permutation[i];
123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
124                  * Assume x = qscale * quant_matrix[i]
125                  * So             16 <=              x  <= 7905
126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
127                  * so          32768 >= (1 << 19) / (x) >= 67 */
128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
129                                         (qscale * quant_matrix[j]));
130                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
131                 //                    (qscale * quant_matrix[i]);
132                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
133                                        (qscale * quant_matrix[j]);
134
135                 if (qmat16[qscale][0][i] == 0 ||
136                     qmat16[qscale][0][i] == 128 * 256)
137                     qmat16[qscale][0][i] = 128 * 256 - 1;
138                 qmat16[qscale][1][i] =
139                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
140                                 qmat16[qscale][0][i]);
141             }
142         }
143
144         for (i = intra; i < 64; i++) {
145             int64_t max = 8191;
146             if (fdsp->fdct == ff_fdct_ifast) {
147                 max = (8191LL * ff_aanscales[i]) >> 14;
148             }
149             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
150                 shift++;
151             }
152         }
153     }
154     if (shift) {
155         av_log(NULL, AV_LOG_INFO,
156                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
157                QMAT_SHIFT - shift);
158     }
159 }
160
161 static inline void update_qscale(MpegEncContext *s)
162 {
163     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
164                 (FF_LAMBDA_SHIFT + 7);
165     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
166
167     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
168                  FF_LAMBDA_SHIFT;
169 }
170
171 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
172 {
173     int i;
174
175     if (matrix) {
176         put_bits(pb, 1, 1);
177         for (i = 0; i < 64; i++) {
178             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
179         }
180     } else
181         put_bits(pb, 1, 0);
182 }
183
184 /**
185  * init s->current_picture.qscale_table from s->lambda_table
186  */
187 void ff_init_qscale_tab(MpegEncContext *s)
188 {
189     int8_t * const qscale_table = s->current_picture.qscale_table;
190     int i;
191
192     for (i = 0; i < s->mb_num; i++) {
193         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
194         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
195         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
196                                                   s->avctx->qmax);
197     }
198 }
199
200 static void update_duplicate_context_after_me(MpegEncContext *dst,
201                                               MpegEncContext *src)
202 {
203 #define COPY(a) dst->a= src->a
204     COPY(pict_type);
205     COPY(current_picture);
206     COPY(f_code);
207     COPY(b_code);
208     COPY(qscale);
209     COPY(lambda);
210     COPY(lambda2);
211     COPY(picture_in_gop_number);
212     COPY(gop_picture_number);
213     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
214     COPY(progressive_frame);    // FIXME don't set in encode_header
215     COPY(partitioned_frame);    // FIXME don't set in encode_header
216 #undef COPY
217 }
218
219 /**
220  * Set the given MpegEncContext to defaults for encoding.
221  * the changed fields will not depend upon the prior state of the MpegEncContext.
222  */
223 static void mpv_encode_defaults(MpegEncContext *s)
224 {
225     int i;
226     ff_mpv_common_defaults(s);
227
228     for (i = -16; i < 16; i++) {
229         default_fcode_tab[i + MAX_MV] = 1;
230     }
231     s->me.mv_penalty = default_mv_penalty;
232     s->fcode_tab     = default_fcode_tab;
233
234     s->input_picture_number  = 0;
235     s->picture_in_gop_number = 0;
236 }
237
238 /* init video encoder */
239 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
240 {
241     MpegEncContext *s = avctx->priv_data;
242     int i, ret, format_supported;
243
244     mpv_encode_defaults(s);
245
246     switch (avctx->codec_id) {
247     case AV_CODEC_ID_MPEG2VIDEO:
248         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
249             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
250             av_log(avctx, AV_LOG_ERROR,
251                    "only YUV420 and YUV422 are supported\n");
252             return -1;
253         }
254         break;
255     case AV_CODEC_ID_MJPEG:
256         format_supported = 0;
257         /* JPEG color space */
258         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
259             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
260             (avctx->color_range == AVCOL_RANGE_JPEG &&
261              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
262               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
263             format_supported = 1;
264         /* MPEG color space */
265         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
266                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
267                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
268             format_supported = 1;
269
270         if (!format_supported) {
271             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
272             return -1;
273         }
274         break;
275     default:
276         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
277             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
278             return -1;
279         }
280     }
281
282     switch (avctx->pix_fmt) {
283     case AV_PIX_FMT_YUVJ422P:
284     case AV_PIX_FMT_YUV422P:
285         s->chroma_format = CHROMA_422;
286         break;
287     case AV_PIX_FMT_YUVJ420P:
288     case AV_PIX_FMT_YUV420P:
289     default:
290         s->chroma_format = CHROMA_420;
291         break;
292     }
293
294     s->bit_rate = avctx->bit_rate;
295     s->width    = avctx->width;
296     s->height   = avctx->height;
297     if (avctx->gop_size > 600 &&
298         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
299         av_log(avctx, AV_LOG_ERROR,
300                "Warning keyframe interval too large! reducing it ...\n");
301         avctx->gop_size = 600;
302     }
303     s->gop_size     = avctx->gop_size;
304     s->avctx        = avctx;
305     s->flags        = avctx->flags;
306     s->flags2       = avctx->flags2;
307     if (avctx->max_b_frames > MAX_B_FRAMES) {
308         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
309                "is %d.\n", MAX_B_FRAMES);
310     }
311     s->max_b_frames = avctx->max_b_frames;
312     s->codec_id     = avctx->codec->id;
313     s->strict_std_compliance = avctx->strict_std_compliance;
314     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
315     s->mpeg_quant         = avctx->mpeg_quant;
316     s->rtp_mode           = !!avctx->rtp_payload_size;
317     s->intra_dc_precision = avctx->intra_dc_precision;
318     s->user_specified_pts = AV_NOPTS_VALUE;
319
320     if (s->gop_size <= 1) {
321         s->intra_only = 1;
322         s->gop_size   = 12;
323     } else {
324         s->intra_only = 0;
325     }
326
327     s->me_method = avctx->me_method;
328
329     /* Fixed QSCALE */
330     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
331
332 #if FF_API_MPV_OPT
333     FF_DISABLE_DEPRECATION_WARNINGS
334     if (avctx->border_masking != 0.0)
335         s->border_masking = avctx->border_masking;
336     FF_ENABLE_DEPRECATION_WARNINGS
337 #endif
338
339     s->adaptive_quant = (s->avctx->lumi_masking ||
340                          s->avctx->dark_masking ||
341                          s->avctx->temporal_cplx_masking ||
342                          s->avctx->spatial_cplx_masking  ||
343                          s->avctx->p_masking      ||
344                          s->border_masking ||
345                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
346                         !s->fixed_qscale;
347
348     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
349
350     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
351         av_log(avctx, AV_LOG_ERROR,
352                "a vbv buffer size is needed, "
353                "for encoding with a maximum bitrate\n");
354         return -1;
355     }
356
357     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
358         av_log(avctx, AV_LOG_INFO,
359                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
360     }
361
362     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
363         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
364         return -1;
365     }
366
367     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
368         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
369         return -1;
370     }
371
372     if (avctx->rc_max_rate &&
373         avctx->rc_max_rate == avctx->bit_rate &&
374         avctx->rc_max_rate != avctx->rc_min_rate) {
375         av_log(avctx, AV_LOG_INFO,
376                "impossible bitrate constraints, this will fail\n");
377     }
378
379     if (avctx->rc_buffer_size &&
380         avctx->bit_rate * (int64_t)avctx->time_base.num >
381             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
382         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
383         return -1;
384     }
385
386     if (!s->fixed_qscale &&
387         avctx->bit_rate * av_q2d(avctx->time_base) >
388             avctx->bit_rate_tolerance) {
389         av_log(avctx, AV_LOG_ERROR,
390                "bitrate tolerance too small for bitrate\n");
391         return -1;
392     }
393
394     if (s->avctx->rc_max_rate &&
395         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
396         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
397          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
398         90000LL * (avctx->rc_buffer_size - 1) >
399             s->avctx->rc_max_rate * 0xFFFFLL) {
400         av_log(avctx, AV_LOG_INFO,
401                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
402                "specified vbv buffer is too large for the given bitrate!\n");
403     }
404
405     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
406         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
407         s->codec_id != AV_CODEC_ID_FLV1) {
408         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
409         return -1;
410     }
411
412     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
413         av_log(avctx, AV_LOG_ERROR,
414                "OBMC is only supported with simple mb decision\n");
415         return -1;
416     }
417
418     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
419         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
420         return -1;
421     }
422
423     if (s->max_b_frames                    &&
424         s->codec_id != AV_CODEC_ID_MPEG4      &&
425         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
426         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
427         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
428         return -1;
429     }
430
431     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
432          s->codec_id == AV_CODEC_ID_H263  ||
433          s->codec_id == AV_CODEC_ID_H263P) &&
434         (avctx->sample_aspect_ratio.num > 255 ||
435          avctx->sample_aspect_ratio.den > 255)) {
436         av_log(avctx, AV_LOG_ERROR,
437                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
438                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
439         return -1;
440     }
441
442     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
443         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
444         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
445         return -1;
446     }
447
448     // FIXME mpeg2 uses that too
449     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
450         av_log(avctx, AV_LOG_ERROR,
451                "mpeg2 style quantization not supported by codec\n");
452         return -1;
453     }
454
455     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
456         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
457         return -1;
458     }
459
460     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
461         s->avctx->mb_decision != FF_MB_DECISION_RD) {
462         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
463         return -1;
464     }
465
466     if (s->avctx->scenechange_threshold < 1000000000 &&
467         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
468         av_log(avctx, AV_LOG_ERROR,
469                "closed gop with scene change detection are not supported yet, "
470                "set threshold to 1000000000\n");
471         return -1;
472     }
473
474     if (s->flags & CODEC_FLAG_LOW_DELAY) {
475         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
476             av_log(avctx, AV_LOG_ERROR,
477                   "low delay forcing is only available for mpeg2\n");
478             return -1;
479         }
480         if (s->max_b_frames != 0) {
481             av_log(avctx, AV_LOG_ERROR,
482                    "b frames cannot be used with low delay\n");
483             return -1;
484         }
485     }
486
487     if (s->q_scale_type == 1) {
488         if (avctx->qmax > 12) {
489             av_log(avctx, AV_LOG_ERROR,
490                    "non linear quant only supports qmax <= 12 currently\n");
491             return -1;
492         }
493     }
494
495     if (s->avctx->thread_count > 1         &&
496         s->codec_id != AV_CODEC_ID_MPEG4      &&
497         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
498         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
499         (s->codec_id != AV_CODEC_ID_H263P)) {
500         av_log(avctx, AV_LOG_ERROR,
501                "multi threaded encoding not supported by codec\n");
502         return -1;
503     }
504
505     if (s->avctx->thread_count < 1) {
506         av_log(avctx, AV_LOG_ERROR,
507                "automatic thread number detection not supported by codec,"
508                "patch welcome\n");
509         return -1;
510     }
511
512     if (s->avctx->thread_count > 1)
513         s->rtp_mode = 1;
514
515     if (!avctx->time_base.den || !avctx->time_base.num) {
516         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
517         return -1;
518     }
519
520     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
521         av_log(avctx, AV_LOG_INFO,
522                "notice: b_frame_strategy only affects the first pass\n");
523         avctx->b_frame_strategy = 0;
524     }
525
526     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
527     if (i > 1) {
528         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
529         avctx->time_base.den /= i;
530         avctx->time_base.num /= i;
531         //return -1;
532     }
533
534     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
535         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
536         // (a + x * 3 / 8) / x
537         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
538         s->inter_quant_bias = 0;
539     } else {
540         s->intra_quant_bias = 0;
541         // (a - x / 4) / x
542         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
543     }
544
545     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
546         s->intra_quant_bias = avctx->intra_quant_bias;
547     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
548         s->inter_quant_bias = avctx->inter_quant_bias;
549
550     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
551         s->avctx->time_base.den > (1 << 16) - 1) {
552         av_log(avctx, AV_LOG_ERROR,
553                "timebase %d/%d not supported by MPEG 4 standard, "
554                "the maximum admitted value for the timebase denominator "
555                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
556                (1 << 16) - 1);
557         return -1;
558     }
559     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
560
561     switch (avctx->codec->id) {
562     case AV_CODEC_ID_MPEG1VIDEO:
563         s->out_format = FMT_MPEG1;
564         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
565         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
566         break;
567     case AV_CODEC_ID_MPEG2VIDEO:
568         s->out_format = FMT_MPEG1;
569         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
570         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
571         s->rtp_mode   = 1;
572         break;
573     case AV_CODEC_ID_MJPEG:
574         s->out_format = FMT_MJPEG;
575         s->intra_only = 1; /* force intra only for jpeg */
576         if (!CONFIG_MJPEG_ENCODER ||
577             ff_mjpeg_encode_init(s) < 0)
578             return -1;
579         avctx->delay = 0;
580         s->low_delay = 1;
581         break;
582     case AV_CODEC_ID_H261:
583         if (!CONFIG_H261_ENCODER)
584             return -1;
585         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
586             av_log(avctx, AV_LOG_ERROR,
587                    "The specified picture size of %dx%d is not valid for the "
588                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
589                     s->width, s->height);
590             return -1;
591         }
592         s->out_format = FMT_H261;
593         avctx->delay  = 0;
594         s->low_delay  = 1;
595         break;
596     case AV_CODEC_ID_H263:
597         if (!CONFIG_H263_ENCODER)
598         return -1;
599         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
600                              s->width, s->height) == 8) {
601             av_log(avctx, AV_LOG_INFO,
602                    "The specified picture size of %dx%d is not valid for "
603                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
604                    "352x288, 704x576, and 1408x1152."
605                    "Try H.263+.\n", s->width, s->height);
606             return -1;
607         }
608         s->out_format = FMT_H263;
609         avctx->delay  = 0;
610         s->low_delay  = 1;
611         break;
612     case AV_CODEC_ID_H263P:
613         s->out_format = FMT_H263;
614         s->h263_plus  = 1;
615         /* Fx */
616         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
617         s->modified_quant  = s->h263_aic;
618         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
619         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
620
621         /* /Fx */
622         /* These are just to be sure */
623         avctx->delay = 0;
624         s->low_delay = 1;
625         break;
626     case AV_CODEC_ID_FLV1:
627         s->out_format      = FMT_H263;
628         s->h263_flv        = 2; /* format = 1; 11-bit codes */
629         s->unrestricted_mv = 1;
630         s->rtp_mode  = 0; /* don't allow GOB */
631         avctx->delay = 0;
632         s->low_delay = 1;
633         break;
634     case AV_CODEC_ID_RV10:
635         s->out_format = FMT_H263;
636         avctx->delay  = 0;
637         s->low_delay  = 1;
638         break;
639     case AV_CODEC_ID_RV20:
640         s->out_format      = FMT_H263;
641         avctx->delay       = 0;
642         s->low_delay       = 1;
643         s->modified_quant  = 1;
644         s->h263_aic        = 1;
645         s->h263_plus       = 1;
646         s->loop_filter     = 1;
647         s->unrestricted_mv = 0;
648         break;
649     case AV_CODEC_ID_MPEG4:
650         s->out_format      = FMT_H263;
651         s->h263_pred       = 1;
652         s->unrestricted_mv = 1;
653         s->low_delay       = s->max_b_frames ? 0 : 1;
654         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
655         break;
656     case AV_CODEC_ID_MSMPEG4V2:
657         s->out_format      = FMT_H263;
658         s->h263_pred       = 1;
659         s->unrestricted_mv = 1;
660         s->msmpeg4_version = 2;
661         avctx->delay       = 0;
662         s->low_delay       = 1;
663         break;
664     case AV_CODEC_ID_MSMPEG4V3:
665         s->out_format        = FMT_H263;
666         s->h263_pred         = 1;
667         s->unrestricted_mv   = 1;
668         s->msmpeg4_version   = 3;
669         s->flipflop_rounding = 1;
670         avctx->delay         = 0;
671         s->low_delay         = 1;
672         break;
673     case AV_CODEC_ID_WMV1:
674         s->out_format        = FMT_H263;
675         s->h263_pred         = 1;
676         s->unrestricted_mv   = 1;
677         s->msmpeg4_version   = 4;
678         s->flipflop_rounding = 1;
679         avctx->delay         = 0;
680         s->low_delay         = 1;
681         break;
682     case AV_CODEC_ID_WMV2:
683         s->out_format        = FMT_H263;
684         s->h263_pred         = 1;
685         s->unrestricted_mv   = 1;
686         s->msmpeg4_version   = 5;
687         s->flipflop_rounding = 1;
688         avctx->delay         = 0;
689         s->low_delay         = 1;
690         break;
691     default:
692         return -1;
693     }
694
695     avctx->has_b_frames = !s->low_delay;
696
697     s->encoding = 1;
698
699     s->progressive_frame    =
700     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
701                                                 CODEC_FLAG_INTERLACED_ME) ||
702                                 s->alternate_scan);
703
704     /* init */
705     ff_mpv_idct_init(s);
706     if (ff_mpv_common_init(s) < 0)
707         return -1;
708
709     if (ARCH_X86)
710         ff_mpv_encode_init_x86(s);
711
712     ff_fdctdsp_init(&s->fdsp, avctx);
713     ff_me_cmp_init(&s->mecc, avctx);
714     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
715     ff_pixblockdsp_init(&s->pdsp, avctx);
716     ff_qpeldsp_init(&s->qdsp);
717
718     s->avctx->coded_frame = s->current_picture.f;
719
720     if (s->msmpeg4_version) {
721         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
722                           2 * 2 * (MAX_LEVEL + 1) *
723                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
724     }
725     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
726
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
730     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
731     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
732                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
733     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
734                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
735
736     if (s->avctx->noise_reduction) {
737         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
738                           2 * 64 * sizeof(uint16_t), fail);
739     }
740
741     if (CONFIG_H263_ENCODER)
742         ff_h263dsp_init(&s->h263dsp);
743     if (!s->dct_quantize)
744         s->dct_quantize = ff_dct_quantize_c;
745     if (!s->denoise_dct)
746         s->denoise_dct  = denoise_dct_c;
747     s->fast_dct_quantize = s->dct_quantize;
748     if (avctx->trellis)
749         s->dct_quantize  = dct_quantize_trellis_c;
750
751     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
752         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
753
754     s->quant_precision = 5;
755
756     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
757     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
758
759     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
760         ff_h261_encode_init(s);
761     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
762         ff_h263_encode_init(s);
763     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
764         ff_msmpeg4_encode_init(s);
765     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
766         && s->out_format == FMT_MPEG1)
767         ff_mpeg1_encode_init(s);
768
769     /* init q matrix */
770     for (i = 0; i < 64; i++) {
771         int j = s->idsp.idct_permutation[i];
772         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
773             s->mpeg_quant) {
774             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
775             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
776         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
777             s->intra_matrix[j] =
778             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
779         } else {
780             /* mpeg1/2 */
781             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
782             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
783         }
784         if (s->avctx->intra_matrix)
785             s->intra_matrix[j] = s->avctx->intra_matrix[i];
786         if (s->avctx->inter_matrix)
787             s->inter_matrix[j] = s->avctx->inter_matrix[i];
788     }
789
790     /* precompute matrix */
791     /* for mjpeg, we do include qscale in the matrix */
792     if (s->out_format != FMT_MJPEG) {
793         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
794                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
795                           31, 1);
796         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
797                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
798                           31, 0);
799     }
800
801     if (ff_rate_control_init(s) < 0)
802         return -1;
803
804 #if FF_API_ERROR_RATE
805     FF_DISABLE_DEPRECATION_WARNINGS
806     if (avctx->error_rate)
807         s->error_rate = avctx->error_rate;
808     FF_ENABLE_DEPRECATION_WARNINGS;
809 #endif
810
811 #if FF_API_NORMALIZE_AQP
812     FF_DISABLE_DEPRECATION_WARNINGS
813     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
814         s->mpv_flags |= FF_MPV_FLAG_NAQ;
815     FF_ENABLE_DEPRECATION_WARNINGS;
816 #endif
817
818 #if FF_API_MV0
819     FF_DISABLE_DEPRECATION_WARNINGS
820     if (avctx->flags & CODEC_FLAG_MV0)
821         s->mpv_flags |= FF_MPV_FLAG_MV0;
822     FF_ENABLE_DEPRECATION_WARNINGS
823 #endif
824
825 #if FF_API_MPV_OPT
826     FF_DISABLE_DEPRECATION_WARNINGS
827     if (avctx->rc_qsquish != 0.0)
828         s->rc_qsquish = avctx->rc_qsquish;
829     if (avctx->rc_qmod_amp != 0.0)
830         s->rc_qmod_amp = avctx->rc_qmod_amp;
831     if (avctx->rc_qmod_freq)
832         s->rc_qmod_freq = avctx->rc_qmod_freq;
833     if (avctx->rc_buffer_aggressivity != 1.0)
834         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
835     if (avctx->rc_initial_cplx != 0.0)
836         s->rc_initial_cplx = avctx->rc_initial_cplx;
837     if (avctx->lmin)
838         s->lmin = avctx->lmin;
839     if (avctx->lmax)
840         s->lmax = avctx->lmax;
841
842     if (avctx->rc_eq) {
843         av_freep(&s->rc_eq);
844         s->rc_eq = av_strdup(avctx->rc_eq);
845         if (!s->rc_eq)
846             return AVERROR(ENOMEM);
847     }
848     FF_ENABLE_DEPRECATION_WARNINGS
849 #endif
850
851     if (avctx->b_frame_strategy == 2) {
852         for (i = 0; i < s->max_b_frames + 2; i++) {
853             s->tmp_frames[i] = av_frame_alloc();
854             if (!s->tmp_frames[i])
855                 return AVERROR(ENOMEM);
856
857             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
858             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
859             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
860
861             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
862             if (ret < 0)
863                 return ret;
864         }
865     }
866
867     return 0;
868 fail:
869     ff_mpv_encode_end(avctx);
870     return AVERROR_UNKNOWN;
871 }
872
873 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
874 {
875     MpegEncContext *s = avctx->priv_data;
876     int i;
877
878     ff_rate_control_uninit(s);
879
880     ff_mpv_common_end(s);
881     if (CONFIG_MJPEG_ENCODER &&
882         s->out_format == FMT_MJPEG)
883         ff_mjpeg_encode_close(s);
884
885     av_freep(&avctx->extradata);
886
887     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
888         av_frame_free(&s->tmp_frames[i]);
889
890     ff_free_picture_tables(&s->new_picture);
891     ff_mpeg_unref_picture(s, &s->new_picture);
892
893     av_freep(&s->avctx->stats_out);
894     av_freep(&s->ac_stats);
895
896     av_freep(&s->q_intra_matrix);
897     av_freep(&s->q_inter_matrix);
898     av_freep(&s->q_intra_matrix16);
899     av_freep(&s->q_inter_matrix16);
900     av_freep(&s->input_picture);
901     av_freep(&s->reordered_input_picture);
902     av_freep(&s->dct_offset);
903
904     return 0;
905 }
906
907 static int get_sae(uint8_t *src, int ref, int stride)
908 {
909     int x,y;
910     int acc = 0;
911
912     for (y = 0; y < 16; y++) {
913         for (x = 0; x < 16; x++) {
914             acc += FFABS(src[x + y * stride] - ref);
915         }
916     }
917
918     return acc;
919 }
920
921 static int get_intra_count(MpegEncContext *s, uint8_t *src,
922                            uint8_t *ref, int stride)
923 {
924     int x, y, w, h;
925     int acc = 0;
926
927     w = s->width  & ~15;
928     h = s->height & ~15;
929
930     for (y = 0; y < h; y += 16) {
931         for (x = 0; x < w; x += 16) {
932             int offset = x + y * stride;
933             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
934                                       stride, 16);
935             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
936             int sae  = get_sae(src + offset, mean, stride);
937
938             acc += sae + 500 < sad;
939         }
940     }
941     return acc;
942 }
943
944
945 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
946 {
947     Picture *pic = NULL;
948     int64_t pts;
949     int i, display_picture_number = 0, ret;
950     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
951                                                  (s->low_delay ? 0 : 1);
952     int direct = 1;
953
954     if (pic_arg) {
955         pts = pic_arg->pts;
956         display_picture_number = s->input_picture_number++;
957
958         if (pts != AV_NOPTS_VALUE) {
959             if (s->user_specified_pts != AV_NOPTS_VALUE) {
960                 int64_t time = pts;
961                 int64_t last = s->user_specified_pts;
962
963                 if (time <= last) {
964                     av_log(s->avctx, AV_LOG_ERROR,
965                            "Error, Invalid timestamp=%"PRId64", "
966                            "last=%"PRId64"\n", pts, s->user_specified_pts);
967                     return -1;
968                 }
969
970                 if (!s->low_delay && display_picture_number == 1)
971                     s->dts_delta = time - last;
972             }
973             s->user_specified_pts = pts;
974         } else {
975             if (s->user_specified_pts != AV_NOPTS_VALUE) {
976                 s->user_specified_pts =
977                 pts = s->user_specified_pts + 1;
978                 av_log(s->avctx, AV_LOG_INFO,
979                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
980                        pts);
981             } else {
982                 pts = display_picture_number;
983             }
984         }
985     }
986
987     if (pic_arg) {
988         if (!pic_arg->buf[0]);
989             direct = 0;
990         if (pic_arg->linesize[0] != s->linesize)
991             direct = 0;
992         if (pic_arg->linesize[1] != s->uvlinesize)
993             direct = 0;
994         if (pic_arg->linesize[2] != s->uvlinesize)
995             direct = 0;
996
997         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
998                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
999
1000         if (direct) {
1001             i = ff_find_unused_picture(s, 1);
1002             if (i < 0)
1003                 return i;
1004
1005             pic = &s->picture[i];
1006             pic->reference = 3;
1007
1008             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1009                 return ret;
1010             if (ff_alloc_picture(s, pic, 1) < 0) {
1011                 return -1;
1012             }
1013         } else {
1014             i = ff_find_unused_picture(s, 0);
1015             if (i < 0)
1016                 return i;
1017
1018             pic = &s->picture[i];
1019             pic->reference = 3;
1020
1021             if (ff_alloc_picture(s, pic, 0) < 0) {
1022                 return -1;
1023             }
1024
1025             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1026                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1027                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1028                 // empty
1029             } else {
1030                 int h_chroma_shift, v_chroma_shift;
1031                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1032                                                  &h_chroma_shift,
1033                                                  &v_chroma_shift);
1034
1035                 for (i = 0; i < 3; i++) {
1036                     int src_stride = pic_arg->linesize[i];
1037                     int dst_stride = i ? s->uvlinesize : s->linesize;
1038                     int h_shift = i ? h_chroma_shift : 0;
1039                     int v_shift = i ? v_chroma_shift : 0;
1040                     int w = s->width  >> h_shift;
1041                     int h = s->height >> v_shift;
1042                     uint8_t *src = pic_arg->data[i];
1043                     uint8_t *dst = pic->f->data[i];
1044
1045                     if (!s->avctx->rc_buffer_size)
1046                         dst += INPLACE_OFFSET;
1047
1048                     if (src_stride == dst_stride)
1049                         memcpy(dst, src, src_stride * h);
1050                     else {
1051                         while (h--) {
1052                             memcpy(dst, src, w);
1053                             dst += dst_stride;
1054                             src += src_stride;
1055                         }
1056                     }
1057                 }
1058             }
1059         }
1060         ret = av_frame_copy_props(pic->f, pic_arg);
1061         if (ret < 0)
1062             return ret;
1063
1064         pic->f->display_picture_number = display_picture_number;
1065         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1066     }
1067
1068     /* shift buffer entries */
1069     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1070         s->input_picture[i - 1] = s->input_picture[i];
1071
1072     s->input_picture[encoding_delay] = (Picture*) pic;
1073
1074     return 0;
1075 }
1076
1077 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1078 {
1079     int x, y, plane;
1080     int score = 0;
1081     int64_t score64 = 0;
1082
1083     for (plane = 0; plane < 3; plane++) {
1084         const int stride = p->f->linesize[plane];
1085         const int bw = plane ? 1 : 2;
1086         for (y = 0; y < s->mb_height * bw; y++) {
1087             for (x = 0; x < s->mb_width * bw; x++) {
1088                 int off = p->shared ? 0 : 16;
1089                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1090                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1091                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1092
1093                 switch (s->avctx->frame_skip_exp) {
1094                 case 0: score    =  FFMAX(score, v);          break;
1095                 case 1: score   += FFABS(v);                  break;
1096                 case 2: score   += v * v;                     break;
1097                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1098                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1099                 }
1100             }
1101         }
1102     }
1103
1104     if (score)
1105         score64 = score;
1106
1107     if (score64 < s->avctx->frame_skip_threshold)
1108         return 1;
1109     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1110         return 1;
1111     return 0;
1112 }
1113
1114 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1115 {
1116     AVPacket pkt = { 0 };
1117     int ret, got_output;
1118
1119     av_init_packet(&pkt);
1120     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1121     if (ret < 0)
1122         return ret;
1123
1124     ret = pkt.size;
1125     av_free_packet(&pkt);
1126     return ret;
1127 }
1128
1129 static int estimate_best_b_count(MpegEncContext *s)
1130 {
1131     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1132     AVCodecContext *c = avcodec_alloc_context3(NULL);
1133     const int scale = s->avctx->brd_scale;
1134     int i, j, out_size, p_lambda, b_lambda, lambda2;
1135     int64_t best_rd  = INT64_MAX;
1136     int best_b_count = -1;
1137
1138     assert(scale >= 0 && scale <= 3);
1139
1140     //emms_c();
1141     //s->next_picture_ptr->quality;
1142     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1143     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1144     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1145     if (!b_lambda) // FIXME we should do this somewhere else
1146         b_lambda = p_lambda;
1147     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1148                FF_LAMBDA_SHIFT;
1149
1150     c->width        = s->width  >> scale;
1151     c->height       = s->height >> scale;
1152     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1153     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1154     c->mb_decision  = s->avctx->mb_decision;
1155     c->me_cmp       = s->avctx->me_cmp;
1156     c->mb_cmp       = s->avctx->mb_cmp;
1157     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1158     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1159     c->time_base    = s->avctx->time_base;
1160     c->max_b_frames = s->max_b_frames;
1161
1162     if (avcodec_open2(c, codec, NULL) < 0)
1163         return -1;
1164
1165     for (i = 0; i < s->max_b_frames + 2; i++) {
1166         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1167                                                 s->next_picture_ptr;
1168
1169         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1170             pre_input = *pre_input_ptr;
1171
1172             if (!pre_input.shared && i) {
1173                 pre_input.f->data[0] += INPLACE_OFFSET;
1174                 pre_input.f->data[1] += INPLACE_OFFSET;
1175                 pre_input.f->data[2] += INPLACE_OFFSET;
1176             }
1177
1178             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1179                                        s->tmp_frames[i]->linesize[0],
1180                                        pre_input.f->data[0],
1181                                        pre_input.f->linesize[0],
1182                                        c->width, c->height);
1183             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1184                                        s->tmp_frames[i]->linesize[1],
1185                                        pre_input.f->data[1],
1186                                        pre_input.f->linesize[1],
1187                                        c->width >> 1, c->height >> 1);
1188             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1189                                        s->tmp_frames[i]->linesize[2],
1190                                        pre_input.f->data[2],
1191                                        pre_input.f->linesize[2],
1192                                        c->width >> 1, c->height >> 1);
1193         }
1194     }
1195
1196     for (j = 0; j < s->max_b_frames + 1; j++) {
1197         int64_t rd = 0;
1198
1199         if (!s->input_picture[j])
1200             break;
1201
1202         c->error[0] = c->error[1] = c->error[2] = 0;
1203
1204         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1205         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1206
1207         out_size = encode_frame(c, s->tmp_frames[0]);
1208
1209         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1210
1211         for (i = 0; i < s->max_b_frames + 1; i++) {
1212             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1213
1214             s->tmp_frames[i + 1]->pict_type = is_p ?
1215                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1216             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1217
1218             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1219
1220             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1221         }
1222
1223         /* get the delayed frames */
1224         while (out_size) {
1225             out_size = encode_frame(c, NULL);
1226             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1227         }
1228
1229         rd += c->error[0] + c->error[1] + c->error[2];
1230
1231         if (rd < best_rd) {
1232             best_rd = rd;
1233             best_b_count = j;
1234         }
1235     }
1236
1237     avcodec_close(c);
1238     av_freep(&c);
1239
1240     return best_b_count;
1241 }
1242
1243 static int select_input_picture(MpegEncContext *s)
1244 {
1245     int i, ret;
1246
1247     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1248         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1249     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1250
1251     /* set next picture type & ordering */
1252     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1253         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1254             !s->next_picture_ptr || s->intra_only) {
1255             s->reordered_input_picture[0] = s->input_picture[0];
1256             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1257             s->reordered_input_picture[0]->f->coded_picture_number =
1258                 s->coded_picture_number++;
1259         } else {
1260             int b_frames;
1261
1262             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1263                 if (s->picture_in_gop_number < s->gop_size &&
1264                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1265                     // FIXME check that te gop check above is +-1 correct
1266                     av_frame_unref(s->input_picture[0]->f);
1267
1268                     emms_c();
1269                     ff_vbv_update(s, 0);
1270
1271                     goto no_output_pic;
1272                 }
1273             }
1274
1275             if (s->flags & CODEC_FLAG_PASS2) {
1276                 for (i = 0; i < s->max_b_frames + 1; i++) {
1277                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1278
1279                     if (pict_num >= s->rc_context.num_entries)
1280                         break;
1281                     if (!s->input_picture[i]) {
1282                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1283                         break;
1284                     }
1285
1286                     s->input_picture[i]->f->pict_type =
1287                         s->rc_context.entry[pict_num].new_pict_type;
1288                 }
1289             }
1290
1291             if (s->avctx->b_frame_strategy == 0) {
1292                 b_frames = s->max_b_frames;
1293                 while (b_frames && !s->input_picture[b_frames])
1294                     b_frames--;
1295             } else if (s->avctx->b_frame_strategy == 1) {
1296                 for (i = 1; i < s->max_b_frames + 1; i++) {
1297                     if (s->input_picture[i] &&
1298                         s->input_picture[i]->b_frame_score == 0) {
1299                         s->input_picture[i]->b_frame_score =
1300                             get_intra_count(s,
1301                                             s->input_picture[i    ]->f->data[0],
1302                                             s->input_picture[i - 1]->f->data[0],
1303                                             s->linesize) + 1;
1304                     }
1305                 }
1306                 for (i = 0; i < s->max_b_frames + 1; i++) {
1307                     if (!s->input_picture[i] ||
1308                         s->input_picture[i]->b_frame_score - 1 >
1309                             s->mb_num / s->avctx->b_sensitivity)
1310                         break;
1311                 }
1312
1313                 b_frames = FFMAX(0, i - 1);
1314
1315                 /* reset scores */
1316                 for (i = 0; i < b_frames + 1; i++) {
1317                     s->input_picture[i]->b_frame_score = 0;
1318                 }
1319             } else if (s->avctx->b_frame_strategy == 2) {
1320                 b_frames = estimate_best_b_count(s);
1321             } else {
1322                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1323                 b_frames = 0;
1324             }
1325
1326             emms_c();
1327
1328             for (i = b_frames - 1; i >= 0; i--) {
1329                 int type = s->input_picture[i]->f->pict_type;
1330                 if (type && type != AV_PICTURE_TYPE_B)
1331                     b_frames = i;
1332             }
1333             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1334                 b_frames == s->max_b_frames) {
1335                 av_log(s->avctx, AV_LOG_ERROR,
1336                        "warning, too many b frames in a row\n");
1337             }
1338
1339             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1340                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1341                     s->gop_size > s->picture_in_gop_number) {
1342                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1343                 } else {
1344                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1345                         b_frames = 0;
1346                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1347                 }
1348             }
1349
1350             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1351                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1352                 b_frames--;
1353
1354             s->reordered_input_picture[0] = s->input_picture[b_frames];
1355             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1356                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1357             s->reordered_input_picture[0]->f->coded_picture_number =
1358                 s->coded_picture_number++;
1359             for (i = 0; i < b_frames; i++) {
1360                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1361                 s->reordered_input_picture[i + 1]->f->pict_type =
1362                     AV_PICTURE_TYPE_B;
1363                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1364                     s->coded_picture_number++;
1365             }
1366         }
1367     }
1368 no_output_pic:
1369     if (s->reordered_input_picture[0]) {
1370         s->reordered_input_picture[0]->reference =
1371            s->reordered_input_picture[0]->f->pict_type !=
1372                AV_PICTURE_TYPE_B ? 3 : 0;
1373
1374         ff_mpeg_unref_picture(s, &s->new_picture);
1375         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1376             return ret;
1377
1378         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1379             // input is a shared pix, so we can't modifiy it -> alloc a new
1380             // one & ensure that the shared one is reuseable
1381
1382             Picture *pic;
1383             int i = ff_find_unused_picture(s, 0);
1384             if (i < 0)
1385                 return i;
1386             pic = &s->picture[i];
1387
1388             pic->reference = s->reordered_input_picture[0]->reference;
1389             if (ff_alloc_picture(s, pic, 0) < 0) {
1390                 return -1;
1391             }
1392
1393             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1394             if (ret < 0)
1395                 return ret;
1396
1397             /* mark us unused / free shared pic */
1398             av_frame_unref(s->reordered_input_picture[0]->f);
1399             s->reordered_input_picture[0]->shared = 0;
1400
1401             s->current_picture_ptr = pic;
1402         } else {
1403             // input is not a shared pix -> reuse buffer for current_pix
1404             s->current_picture_ptr = s->reordered_input_picture[0];
1405             for (i = 0; i < 4; i++) {
1406                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1407             }
1408         }
1409         ff_mpeg_unref_picture(s, &s->current_picture);
1410         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1411                                        s->current_picture_ptr)) < 0)
1412             return ret;
1413
1414         s->picture_number = s->new_picture.f->display_picture_number;
1415     } else {
1416         ff_mpeg_unref_picture(s, &s->new_picture);
1417     }
1418     return 0;
1419 }
1420
1421 static void frame_end(MpegEncContext *s)
1422 {
1423     int i;
1424
1425     if (s->unrestricted_mv &&
1426         s->current_picture.reference &&
1427         !s->intra_only) {
1428         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1429         int hshift = desc->log2_chroma_w;
1430         int vshift = desc->log2_chroma_h;
1431         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1432                                 s->h_edge_pos, s->v_edge_pos,
1433                                 EDGE_WIDTH, EDGE_WIDTH,
1434                                 EDGE_TOP | EDGE_BOTTOM);
1435         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1436                                 s->h_edge_pos >> hshift,
1437                                 s->v_edge_pos >> vshift,
1438                                 EDGE_WIDTH >> hshift,
1439                                 EDGE_WIDTH >> vshift,
1440                                 EDGE_TOP | EDGE_BOTTOM);
1441         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1442                                 s->h_edge_pos >> hshift,
1443                                 s->v_edge_pos >> vshift,
1444                                 EDGE_WIDTH >> hshift,
1445                                 EDGE_WIDTH >> vshift,
1446                                 EDGE_TOP | EDGE_BOTTOM);
1447     }
1448
1449     emms_c();
1450
1451     s->last_pict_type                 = s->pict_type;
1452     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1453     if (s->pict_type!= AV_PICTURE_TYPE_B)
1454         s->last_non_b_pict_type = s->pict_type;
1455
1456     if (s->encoding) {
1457         /* release non-reference frames */
1458         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1459             if (!s->picture[i].reference)
1460                 ff_mpeg_unref_picture(s, &s->picture[i]);
1461         }
1462     }
1463
1464     s->avctx->coded_frame = s->current_picture_ptr->f;
1465
1466 }
1467
1468 static void update_noise_reduction(MpegEncContext *s)
1469 {
1470     int intra, i;
1471
1472     for (intra = 0; intra < 2; intra++) {
1473         if (s->dct_count[intra] > (1 << 16)) {
1474             for (i = 0; i < 64; i++) {
1475                 s->dct_error_sum[intra][i] >>= 1;
1476             }
1477             s->dct_count[intra] >>= 1;
1478         }
1479
1480         for (i = 0; i < 64; i++) {
1481             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1482                                        s->dct_count[intra] +
1483                                        s->dct_error_sum[intra][i] / 2) /
1484                                       (s->dct_error_sum[intra][i] + 1);
1485         }
1486     }
1487 }
1488
1489 static int frame_start(MpegEncContext *s)
1490 {
1491     int ret;
1492
1493     /* mark & release old frames */
1494     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1495         s->last_picture_ptr != s->next_picture_ptr &&
1496         s->last_picture_ptr->f->buf[0]) {
1497         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1498     }
1499
1500     s->current_picture_ptr->f->pict_type = s->pict_type;
1501     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1502
1503     ff_mpeg_unref_picture(s, &s->current_picture);
1504     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1505                                    s->current_picture_ptr)) < 0)
1506         return ret;
1507
1508     if (s->pict_type != AV_PICTURE_TYPE_B) {
1509         s->last_picture_ptr = s->next_picture_ptr;
1510         if (!s->droppable)
1511             s->next_picture_ptr = s->current_picture_ptr;
1512     }
1513
1514     if (s->last_picture_ptr) {
1515         ff_mpeg_unref_picture(s, &s->last_picture);
1516         if (s->last_picture_ptr->f->buf[0] &&
1517             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1518                                        s->last_picture_ptr)) < 0)
1519             return ret;
1520     }
1521     if (s->next_picture_ptr) {
1522         ff_mpeg_unref_picture(s, &s->next_picture);
1523         if (s->next_picture_ptr->f->buf[0] &&
1524             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1525                                        s->next_picture_ptr)) < 0)
1526             return ret;
1527     }
1528
1529     if (s->picture_structure!= PICT_FRAME) {
1530         int i;
1531         for (i = 0; i < 4; i++) {
1532             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1533                 s->current_picture.f->data[i] +=
1534                     s->current_picture.f->linesize[i];
1535             }
1536             s->current_picture.f->linesize[i] *= 2;
1537             s->last_picture.f->linesize[i]    *= 2;
1538             s->next_picture.f->linesize[i]    *= 2;
1539         }
1540     }
1541
1542     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1543         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1544         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1545     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1546         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1547         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1548     } else {
1549         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1550         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1551     }
1552
1553     if (s->dct_error_sum) {
1554         assert(s->avctx->noise_reduction && s->encoding);
1555         update_noise_reduction(s);
1556     }
1557
1558     return 0;
1559 }
1560
1561 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1562                           const AVFrame *pic_arg, int *got_packet)
1563 {
1564     MpegEncContext *s = avctx->priv_data;
1565     int i, stuffing_count, ret;
1566     int context_count = s->slice_context_count;
1567
1568     s->picture_in_gop_number++;
1569
1570     if (load_input_picture(s, pic_arg) < 0)
1571         return -1;
1572
1573     if (select_input_picture(s) < 0) {
1574         return -1;
1575     }
1576
1577     /* output? */
1578     if (s->new_picture.f->data[0]) {
1579         if (!pkt->data &&
1580             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1581             return ret;
1582         if (s->mb_info) {
1583             s->mb_info_ptr = av_packet_new_side_data(pkt,
1584                                  AV_PKT_DATA_H263_MB_INFO,
1585                                  s->mb_width*s->mb_height*12);
1586             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1587         }
1588
1589         for (i = 0; i < context_count; i++) {
1590             int start_y = s->thread_context[i]->start_mb_y;
1591             int   end_y = s->thread_context[i]->  end_mb_y;
1592             int h       = s->mb_height;
1593             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1594             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1595
1596             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1597         }
1598
1599         s->pict_type = s->new_picture.f->pict_type;
1600         //emms_c();
1601         ret = frame_start(s);
1602         if (ret < 0)
1603             return ret;
1604 vbv_retry:
1605         if (encode_picture(s, s->picture_number) < 0)
1606             return -1;
1607
1608         avctx->header_bits = s->header_bits;
1609         avctx->mv_bits     = s->mv_bits;
1610         avctx->misc_bits   = s->misc_bits;
1611         avctx->i_tex_bits  = s->i_tex_bits;
1612         avctx->p_tex_bits  = s->p_tex_bits;
1613         avctx->i_count     = s->i_count;
1614         // FIXME f/b_count in avctx
1615         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1616         avctx->skip_count  = s->skip_count;
1617
1618         frame_end(s);
1619
1620         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1621             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1622
1623         if (avctx->rc_buffer_size) {
1624             RateControlContext *rcc = &s->rc_context;
1625             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1626
1627             if (put_bits_count(&s->pb) > max_size &&
1628                 s->lambda < s->lmax) {
1629                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1630                                        (s->qscale + 1) / s->qscale);
1631                 if (s->adaptive_quant) {
1632                     int i;
1633                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1634                         s->lambda_table[i] =
1635                             FFMAX(s->lambda_table[i] + 1,
1636                                   s->lambda_table[i] * (s->qscale + 1) /
1637                                   s->qscale);
1638                 }
1639                 s->mb_skipped = 0;        // done in frame_start()
1640                 // done in encode_picture() so we must undo it
1641                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1642                     if (s->flipflop_rounding          ||
1643                         s->codec_id == AV_CODEC_ID_H263P ||
1644                         s->codec_id == AV_CODEC_ID_MPEG4)
1645                         s->no_rounding ^= 1;
1646                 }
1647                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1648                     s->time_base       = s->last_time_base;
1649                     s->last_non_b_time = s->time - s->pp_time;
1650                 }
1651                 for (i = 0; i < context_count; i++) {
1652                     PutBitContext *pb = &s->thread_context[i]->pb;
1653                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1654                 }
1655                 goto vbv_retry;
1656             }
1657
1658             assert(s->avctx->rc_max_rate);
1659         }
1660
1661         if (s->flags & CODEC_FLAG_PASS1)
1662             ff_write_pass1_stats(s);
1663
1664         for (i = 0; i < 4; i++) {
1665             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1666             avctx->error[i] += s->current_picture_ptr->f->error[i];
1667         }
1668
1669         if (s->flags & CODEC_FLAG_PASS1)
1670             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1671                    avctx->i_tex_bits + avctx->p_tex_bits ==
1672                        put_bits_count(&s->pb));
1673         flush_put_bits(&s->pb);
1674         s->frame_bits  = put_bits_count(&s->pb);
1675
1676         stuffing_count = ff_vbv_update(s, s->frame_bits);
1677         if (stuffing_count) {
1678             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1679                     stuffing_count + 50) {
1680                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1681                 return -1;
1682             }
1683
1684             switch (s->codec_id) {
1685             case AV_CODEC_ID_MPEG1VIDEO:
1686             case AV_CODEC_ID_MPEG2VIDEO:
1687                 while (stuffing_count--) {
1688                     put_bits(&s->pb, 8, 0);
1689                 }
1690             break;
1691             case AV_CODEC_ID_MPEG4:
1692                 put_bits(&s->pb, 16, 0);
1693                 put_bits(&s->pb, 16, 0x1C3);
1694                 stuffing_count -= 4;
1695                 while (stuffing_count--) {
1696                     put_bits(&s->pb, 8, 0xFF);
1697                 }
1698             break;
1699             default:
1700                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1701             }
1702             flush_put_bits(&s->pb);
1703             s->frame_bits  = put_bits_count(&s->pb);
1704         }
1705
1706         /* update mpeg1/2 vbv_delay for CBR */
1707         if (s->avctx->rc_max_rate                          &&
1708             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1709             s->out_format == FMT_MPEG1                     &&
1710             90000LL * (avctx->rc_buffer_size - 1) <=
1711                 s->avctx->rc_max_rate * 0xFFFFLL) {
1712             int vbv_delay, min_delay;
1713             double inbits  = s->avctx->rc_max_rate *
1714                              av_q2d(s->avctx->time_base);
1715             int    minbits = s->frame_bits - 8 *
1716                              (s->vbv_delay_ptr - s->pb.buf - 1);
1717             double bits    = s->rc_context.buffer_index + minbits - inbits;
1718
1719             if (bits < 0)
1720                 av_log(s->avctx, AV_LOG_ERROR,
1721                        "Internal error, negative bits\n");
1722
1723             assert(s->repeat_first_field == 0);
1724
1725             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1726             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1727                         s->avctx->rc_max_rate;
1728
1729             vbv_delay = FFMAX(vbv_delay, min_delay);
1730
1731             assert(vbv_delay < 0xFFFF);
1732
1733             s->vbv_delay_ptr[0] &= 0xF8;
1734             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1735             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1736             s->vbv_delay_ptr[2] &= 0x07;
1737             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1738             avctx->vbv_delay     = vbv_delay * 300;
1739         }
1740         s->total_bits     += s->frame_bits;
1741         avctx->frame_bits  = s->frame_bits;
1742
1743         pkt->pts = s->current_picture.f->pts;
1744         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1745             if (!s->current_picture.f->coded_picture_number)
1746                 pkt->dts = pkt->pts - s->dts_delta;
1747             else
1748                 pkt->dts = s->reordered_pts;
1749             s->reordered_pts = pkt->pts;
1750         } else
1751             pkt->dts = pkt->pts;
1752         if (s->current_picture.f->key_frame)
1753             pkt->flags |= AV_PKT_FLAG_KEY;
1754         if (s->mb_info)
1755             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1756     } else {
1757         s->frame_bits = 0;
1758     }
1759     assert((s->frame_bits & 7) == 0);
1760
1761     pkt->size = s->frame_bits / 8;
1762     *got_packet = !!pkt->size;
1763     return 0;
1764 }
1765
1766 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1767                                                 int n, int threshold)
1768 {
1769     static const char tab[64] = {
1770         3, 2, 2, 1, 1, 1, 1, 1,
1771         1, 1, 1, 1, 1, 1, 1, 1,
1772         1, 1, 1, 1, 1, 1, 1, 1,
1773         0, 0, 0, 0, 0, 0, 0, 0,
1774         0, 0, 0, 0, 0, 0, 0, 0,
1775         0, 0, 0, 0, 0, 0, 0, 0,
1776         0, 0, 0, 0, 0, 0, 0, 0,
1777         0, 0, 0, 0, 0, 0, 0, 0
1778     };
1779     int score = 0;
1780     int run = 0;
1781     int i;
1782     int16_t *block = s->block[n];
1783     const int last_index = s->block_last_index[n];
1784     int skip_dc;
1785
1786     if (threshold < 0) {
1787         skip_dc = 0;
1788         threshold = -threshold;
1789     } else
1790         skip_dc = 1;
1791
1792     /* Are all we could set to zero already zero? */
1793     if (last_index <= skip_dc - 1)
1794         return;
1795
1796     for (i = 0; i <= last_index; i++) {
1797         const int j = s->intra_scantable.permutated[i];
1798         const int level = FFABS(block[j]);
1799         if (level == 1) {
1800             if (skip_dc && i == 0)
1801                 continue;
1802             score += tab[run];
1803             run = 0;
1804         } else if (level > 1) {
1805             return;
1806         } else {
1807             run++;
1808         }
1809     }
1810     if (score >= threshold)
1811         return;
1812     for (i = skip_dc; i <= last_index; i++) {
1813         const int j = s->intra_scantable.permutated[i];
1814         block[j] = 0;
1815     }
1816     if (block[0])
1817         s->block_last_index[n] = 0;
1818     else
1819         s->block_last_index[n] = -1;
1820 }
1821
1822 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1823                                int last_index)
1824 {
1825     int i;
1826     const int maxlevel = s->max_qcoeff;
1827     const int minlevel = s->min_qcoeff;
1828     int overflow = 0;
1829
1830     if (s->mb_intra) {
1831         i = 1; // skip clipping of intra dc
1832     } else
1833         i = 0;
1834
1835     for (; i <= last_index; i++) {
1836         const int j = s->intra_scantable.permutated[i];
1837         int level = block[j];
1838
1839         if (level > maxlevel) {
1840             level = maxlevel;
1841             overflow++;
1842         } else if (level < minlevel) {
1843             level = minlevel;
1844             overflow++;
1845         }
1846
1847         block[j] = level;
1848     }
1849
1850     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1851         av_log(s->avctx, AV_LOG_INFO,
1852                "warning, clipping %d dct coefficients to %d..%d\n",
1853                overflow, minlevel, maxlevel);
1854 }
1855
1856 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1857 {
1858     int x, y;
1859     // FIXME optimize
1860     for (y = 0; y < 8; y++) {
1861         for (x = 0; x < 8; x++) {
1862             int x2, y2;
1863             int sum = 0;
1864             int sqr = 0;
1865             int count = 0;
1866
1867             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1868                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1869                     int v = ptr[x2 + y2 * stride];
1870                     sum += v;
1871                     sqr += v * v;
1872                     count++;
1873                 }
1874             }
1875             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1876         }
1877     }
1878 }
1879
1880 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1881                                                 int motion_x, int motion_y,
1882                                                 int mb_block_height,
1883                                                 int mb_block_count)
1884 {
1885     int16_t weight[8][64];
1886     int16_t orig[8][64];
1887     const int mb_x = s->mb_x;
1888     const int mb_y = s->mb_y;
1889     int i;
1890     int skip_dct[8];
1891     int dct_offset = s->linesize * 8; // default for progressive frames
1892     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1893     ptrdiff_t wrap_y, wrap_c;
1894
1895     for (i = 0; i < mb_block_count; i++)
1896         skip_dct[i] = s->skipdct;
1897
1898     if (s->adaptive_quant) {
1899         const int last_qp = s->qscale;
1900         const int mb_xy = mb_x + mb_y * s->mb_stride;
1901
1902         s->lambda = s->lambda_table[mb_xy];
1903         update_qscale(s);
1904
1905         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1906             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1907             s->dquant = s->qscale - last_qp;
1908
1909             if (s->out_format == FMT_H263) {
1910                 s->dquant = av_clip(s->dquant, -2, 2);
1911
1912                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1913                     if (!s->mb_intra) {
1914                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1915                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1916                                 s->dquant = 0;
1917                         }
1918                         if (s->mv_type == MV_TYPE_8X8)
1919                             s->dquant = 0;
1920                     }
1921                 }
1922             }
1923         }
1924         ff_set_qscale(s, last_qp + s->dquant);
1925     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1926         ff_set_qscale(s, s->qscale + s->dquant);
1927
1928     wrap_y = s->linesize;
1929     wrap_c = s->uvlinesize;
1930     ptr_y  = s->new_picture.f->data[0] +
1931              (mb_y * 16 * wrap_y)              + mb_x * 16;
1932     ptr_cb = s->new_picture.f->data[1] +
1933              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1934     ptr_cr = s->new_picture.f->data[2] +
1935              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1936
1937     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1938         uint8_t *ebuf = s->edge_emu_buffer + 32;
1939         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1940                                  wrap_y, wrap_y,
1941                                  16, 16, mb_x * 16, mb_y * 16,
1942                                  s->width, s->height);
1943         ptr_y = ebuf;
1944         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1945                                  wrap_c, wrap_c,
1946                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1947                                  s->width >> 1, s->height >> 1);
1948         ptr_cb = ebuf + 18 * wrap_y;
1949         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1950                                  wrap_c, wrap_c,
1951                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1952                                  s->width >> 1, s->height >> 1);
1953         ptr_cr = ebuf + 18 * wrap_y + 8;
1954     }
1955
1956     if (s->mb_intra) {
1957         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1958             int progressive_score, interlaced_score;
1959
1960             s->interlaced_dct = 0;
1961             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1962                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1963                                                      NULL, wrap_y, 8) - 400;
1964
1965             if (progressive_score > 0) {
1966                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1967                                                         NULL, wrap_y * 2, 8) +
1968                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1969                                                         NULL, wrap_y * 2, 8);
1970                 if (progressive_score > interlaced_score) {
1971                     s->interlaced_dct = 1;
1972
1973                     dct_offset = wrap_y;
1974                     wrap_y <<= 1;
1975                     if (s->chroma_format == CHROMA_422)
1976                         wrap_c <<= 1;
1977                 }
1978             }
1979         }
1980
1981         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1982         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1983         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1984         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1985
1986         if (s->flags & CODEC_FLAG_GRAY) {
1987             skip_dct[4] = 1;
1988             skip_dct[5] = 1;
1989         } else {
1990             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1991             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1992             if (!s->chroma_y_shift) { /* 422 */
1993                 s->pdsp.get_pixels(s->block[6],
1994                                    ptr_cb + (dct_offset >> 1), wrap_c);
1995                 s->pdsp.get_pixels(s->block[7],
1996                                    ptr_cr + (dct_offset >> 1), wrap_c);
1997             }
1998         }
1999     } else {
2000         op_pixels_func (*op_pix)[4];
2001         qpel_mc_func (*op_qpix)[16];
2002         uint8_t *dest_y, *dest_cb, *dest_cr;
2003
2004         dest_y  = s->dest[0];
2005         dest_cb = s->dest[1];
2006         dest_cr = s->dest[2];
2007
2008         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2009             op_pix  = s->hdsp.put_pixels_tab;
2010             op_qpix = s->qdsp.put_qpel_pixels_tab;
2011         } else {
2012             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2013             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2014         }
2015
2016         if (s->mv_dir & MV_DIR_FORWARD) {
2017             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2018                           s->last_picture.f->data,
2019                           op_pix, op_qpix);
2020             op_pix  = s->hdsp.avg_pixels_tab;
2021             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2022         }
2023         if (s->mv_dir & MV_DIR_BACKWARD) {
2024             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2025                           s->next_picture.f->data,
2026                           op_pix, op_qpix);
2027         }
2028
2029         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2030             int progressive_score, interlaced_score;
2031
2032             s->interlaced_dct = 0;
2033             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2034                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2035                                                      ptr_y + wrap_y * 8,
2036                                                      wrap_y, 8) - 400;
2037
2038             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2039                 progressive_score -= 400;
2040
2041             if (progressive_score > 0) {
2042                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2043                                                         wrap_y * 2, 8) +
2044                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2045                                                         ptr_y + wrap_y,
2046                                                         wrap_y * 2, 8);
2047
2048                 if (progressive_score > interlaced_score) {
2049                     s->interlaced_dct = 1;
2050
2051                     dct_offset = wrap_y;
2052                     wrap_y <<= 1;
2053                     if (s->chroma_format == CHROMA_422)
2054                         wrap_c <<= 1;
2055                 }
2056             }
2057         }
2058
2059         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2060         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2061         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2062                             dest_y + dct_offset, wrap_y);
2063         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2064                             dest_y + dct_offset + 8, wrap_y);
2065
2066         if (s->flags & CODEC_FLAG_GRAY) {
2067             skip_dct[4] = 1;
2068             skip_dct[5] = 1;
2069         } else {
2070             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2071             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2072             if (!s->chroma_y_shift) { /* 422 */
2073                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2074                                     dest_cb + (dct_offset >> 1), wrap_c);
2075                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2076                                     dest_cr + (dct_offset >> 1), wrap_c);
2077             }
2078         }
2079         /* pre quantization */
2080         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2081                 2 * s->qscale * s->qscale) {
2082             // FIXME optimize
2083             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2084                 skip_dct[0] = 1;
2085             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2086                 skip_dct[1] = 1;
2087             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2088                                wrap_y, 8) < 20 * s->qscale)
2089                 skip_dct[2] = 1;
2090             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2091                                wrap_y, 8) < 20 * s->qscale)
2092                 skip_dct[3] = 1;
2093             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2094                 skip_dct[4] = 1;
2095             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2096                 skip_dct[5] = 1;
2097             if (!s->chroma_y_shift) { /* 422 */
2098                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2099                                    dest_cb + (dct_offset >> 1),
2100                                    wrap_c, 8) < 20 * s->qscale)
2101                     skip_dct[6] = 1;
2102                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2103                                    dest_cr + (dct_offset >> 1),
2104                                    wrap_c, 8) < 20 * s->qscale)
2105                     skip_dct[7] = 1;
2106             }
2107         }
2108     }
2109
2110     if (s->quantizer_noise_shaping) {
2111         if (!skip_dct[0])
2112             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2113         if (!skip_dct[1])
2114             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2115         if (!skip_dct[2])
2116             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2117         if (!skip_dct[3])
2118             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2119         if (!skip_dct[4])
2120             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2121         if (!skip_dct[5])
2122             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2123         if (!s->chroma_y_shift) { /* 422 */
2124             if (!skip_dct[6])
2125                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2126                                   wrap_c);
2127             if (!skip_dct[7])
2128                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2129                                   wrap_c);
2130         }
2131         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2132     }
2133
2134     /* DCT & quantize */
2135     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2136     {
2137         for (i = 0; i < mb_block_count; i++) {
2138             if (!skip_dct[i]) {
2139                 int overflow;
2140                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2141                 // FIXME we could decide to change to quantizer instead of
2142                 // clipping
2143                 // JS: I don't think that would be a good idea it could lower
2144                 //     quality instead of improve it. Just INTRADC clipping
2145                 //     deserves changes in quantizer
2146                 if (overflow)
2147                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2148             } else
2149                 s->block_last_index[i] = -1;
2150         }
2151         if (s->quantizer_noise_shaping) {
2152             for (i = 0; i < mb_block_count; i++) {
2153                 if (!skip_dct[i]) {
2154                     s->block_last_index[i] =
2155                         dct_quantize_refine(s, s->block[i], weight[i],
2156                                             orig[i], i, s->qscale);
2157                 }
2158             }
2159         }
2160
2161         if (s->luma_elim_threshold && !s->mb_intra)
2162             for (i = 0; i < 4; i++)
2163                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2164         if (s->chroma_elim_threshold && !s->mb_intra)
2165             for (i = 4; i < mb_block_count; i++)
2166                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2167
2168         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2169             for (i = 0; i < mb_block_count; i++) {
2170                 if (s->block_last_index[i] == -1)
2171                     s->coded_score[i] = INT_MAX / 256;
2172             }
2173         }
2174     }
2175
2176     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2177         s->block_last_index[4] =
2178         s->block_last_index[5] = 0;
2179         s->block[4][0] =
2180         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2181     }
2182
2183     // non c quantize code returns incorrect block_last_index FIXME
2184     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2185         for (i = 0; i < mb_block_count; i++) {
2186             int j;
2187             if (s->block_last_index[i] > 0) {
2188                 for (j = 63; j > 0; j--) {
2189                     if (s->block[i][s->intra_scantable.permutated[j]])
2190                         break;
2191                 }
2192                 s->block_last_index[i] = j;
2193             }
2194         }
2195     }
2196
2197     /* huffman encode */
2198     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2199     case AV_CODEC_ID_MPEG1VIDEO:
2200     case AV_CODEC_ID_MPEG2VIDEO:
2201         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2202             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2203         break;
2204     case AV_CODEC_ID_MPEG4:
2205         if (CONFIG_MPEG4_ENCODER)
2206             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2207         break;
2208     case AV_CODEC_ID_MSMPEG4V2:
2209     case AV_CODEC_ID_MSMPEG4V3:
2210     case AV_CODEC_ID_WMV1:
2211         if (CONFIG_MSMPEG4_ENCODER)
2212             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2213         break;
2214     case AV_CODEC_ID_WMV2:
2215         if (CONFIG_WMV2_ENCODER)
2216             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2217         break;
2218     case AV_CODEC_ID_H261:
2219         if (CONFIG_H261_ENCODER)
2220             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2221         break;
2222     case AV_CODEC_ID_H263:
2223     case AV_CODEC_ID_H263P:
2224     case AV_CODEC_ID_FLV1:
2225     case AV_CODEC_ID_RV10:
2226     case AV_CODEC_ID_RV20:
2227         if (CONFIG_H263_ENCODER)
2228             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2229         break;
2230     case AV_CODEC_ID_MJPEG:
2231         if (CONFIG_MJPEG_ENCODER)
2232             ff_mjpeg_encode_mb(s, s->block);
2233         break;
2234     default:
2235         assert(0);
2236     }
2237 }
2238
2239 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2240 {
2241     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2242     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2243 }
2244
2245 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2246     int i;
2247
2248     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2249
2250     /* mpeg1 */
2251     d->mb_skip_run= s->mb_skip_run;
2252     for(i=0; i<3; i++)
2253         d->last_dc[i] = s->last_dc[i];
2254
2255     /* statistics */
2256     d->mv_bits= s->mv_bits;
2257     d->i_tex_bits= s->i_tex_bits;
2258     d->p_tex_bits= s->p_tex_bits;
2259     d->i_count= s->i_count;
2260     d->f_count= s->f_count;
2261     d->b_count= s->b_count;
2262     d->skip_count= s->skip_count;
2263     d->misc_bits= s->misc_bits;
2264     d->last_bits= 0;
2265
2266     d->mb_skipped= 0;
2267     d->qscale= s->qscale;
2268     d->dquant= s->dquant;
2269
2270     d->esc3_level_length= s->esc3_level_length;
2271 }
2272
2273 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2274     int i;
2275
2276     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2277     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2278
2279     /* mpeg1 */
2280     d->mb_skip_run= s->mb_skip_run;
2281     for(i=0; i<3; i++)
2282         d->last_dc[i] = s->last_dc[i];
2283
2284     /* statistics */
2285     d->mv_bits= s->mv_bits;
2286     d->i_tex_bits= s->i_tex_bits;
2287     d->p_tex_bits= s->p_tex_bits;
2288     d->i_count= s->i_count;
2289     d->f_count= s->f_count;
2290     d->b_count= s->b_count;
2291     d->skip_count= s->skip_count;
2292     d->misc_bits= s->misc_bits;
2293
2294     d->mb_intra= s->mb_intra;
2295     d->mb_skipped= s->mb_skipped;
2296     d->mv_type= s->mv_type;
2297     d->mv_dir= s->mv_dir;
2298     d->pb= s->pb;
2299     if(s->data_partitioning){
2300         d->pb2= s->pb2;
2301         d->tex_pb= s->tex_pb;
2302     }
2303     d->block= s->block;
2304     for(i=0; i<8; i++)
2305         d->block_last_index[i]= s->block_last_index[i];
2306     d->interlaced_dct= s->interlaced_dct;
2307     d->qscale= s->qscale;
2308
2309     d->esc3_level_length= s->esc3_level_length;
2310 }
2311
2312 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2313                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2314                            int *dmin, int *next_block, int motion_x, int motion_y)
2315 {
2316     int score;
2317     uint8_t *dest_backup[3];
2318
2319     copy_context_before_encode(s, backup, type);
2320
2321     s->block= s->blocks[*next_block];
2322     s->pb= pb[*next_block];
2323     if(s->data_partitioning){
2324         s->pb2   = pb2   [*next_block];
2325         s->tex_pb= tex_pb[*next_block];
2326     }
2327
2328     if(*next_block){
2329         memcpy(dest_backup, s->dest, sizeof(s->dest));
2330         s->dest[0] = s->rd_scratchpad;
2331         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2332         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2333         assert(s->linesize >= 32); //FIXME
2334     }
2335
2336     encode_mb(s, motion_x, motion_y);
2337
2338     score= put_bits_count(&s->pb);
2339     if(s->data_partitioning){
2340         score+= put_bits_count(&s->pb2);
2341         score+= put_bits_count(&s->tex_pb);
2342     }
2343
2344     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2345         ff_mpv_decode_mb(s, s->block);
2346
2347         score *= s->lambda2;
2348         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2349     }
2350
2351     if(*next_block){
2352         memcpy(s->dest, dest_backup, sizeof(s->dest));
2353     }
2354
2355     if(score<*dmin){
2356         *dmin= score;
2357         *next_block^=1;
2358
2359         copy_context_after_encode(best, s, type);
2360     }
2361 }
2362
2363 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2364     uint32_t *sq = ff_square_tab + 256;
2365     int acc=0;
2366     int x,y;
2367
2368     if(w==16 && h==16)
2369         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2370     else if(w==8 && h==8)
2371         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2372
2373     for(y=0; y<h; y++){
2374         for(x=0; x<w; x++){
2375             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2376         }
2377     }
2378
2379     assert(acc>=0);
2380
2381     return acc;
2382 }
2383
2384 static int sse_mb(MpegEncContext *s){
2385     int w= 16;
2386     int h= 16;
2387
2388     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2389     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2390
2391     if(w==16 && h==16)
2392       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2393         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2394                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2395                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2396       }else{
2397         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2398                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2399                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2400       }
2401     else
2402         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2403                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2404                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2405 }
2406
2407 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2408     MpegEncContext *s= *(void**)arg;
2409
2410
2411     s->me.pre_pass=1;
2412     s->me.dia_size= s->avctx->pre_dia_size;
2413     s->first_slice_line=1;
2414     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2415         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2416             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2417         }
2418         s->first_slice_line=0;
2419     }
2420
2421     s->me.pre_pass=0;
2422
2423     return 0;
2424 }
2425
2426 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2427     MpegEncContext *s= *(void**)arg;
2428
2429     s->me.dia_size= s->avctx->dia_size;
2430     s->first_slice_line=1;
2431     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2432         s->mb_x=0; //for block init below
2433         ff_init_block_index(s);
2434         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2435             s->block_index[0]+=2;
2436             s->block_index[1]+=2;
2437             s->block_index[2]+=2;
2438             s->block_index[3]+=2;
2439
2440             /* compute motion vector & mb_type and store in context */
2441             if(s->pict_type==AV_PICTURE_TYPE_B)
2442                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2443             else
2444                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2445         }
2446         s->first_slice_line=0;
2447     }
2448     return 0;
2449 }
2450
2451 static int mb_var_thread(AVCodecContext *c, void *arg){
2452     MpegEncContext *s= *(void**)arg;
2453     int mb_x, mb_y;
2454
2455     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2456         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2457             int xx = mb_x * 16;
2458             int yy = mb_y * 16;
2459             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2460             int varc;
2461             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2462
2463             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2464                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2465
2466             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2467             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2468             s->me.mb_var_sum_temp    += varc;
2469         }
2470     }
2471     return 0;
2472 }
2473
2474 static void write_slice_end(MpegEncContext *s){
2475     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2476         if(s->partitioned_frame){
2477             ff_mpeg4_merge_partitions(s);
2478         }
2479
2480         ff_mpeg4_stuffing(&s->pb);
2481     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2482         ff_mjpeg_encode_stuffing(&s->pb);
2483     }
2484
2485     avpriv_align_put_bits(&s->pb);
2486     flush_put_bits(&s->pb);
2487
2488     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2489         s->misc_bits+= get_bits_diff(s);
2490 }
2491
2492 static void write_mb_info(MpegEncContext *s)
2493 {
2494     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2495     int offset = put_bits_count(&s->pb);
2496     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2497     int gobn = s->mb_y / s->gob_index;
2498     int pred_x, pred_y;
2499     if (CONFIG_H263_ENCODER)
2500         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2501     bytestream_put_le32(&ptr, offset);
2502     bytestream_put_byte(&ptr, s->qscale);
2503     bytestream_put_byte(&ptr, gobn);
2504     bytestream_put_le16(&ptr, mba);
2505     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2506     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2507     /* 4MV not implemented */
2508     bytestream_put_byte(&ptr, 0); /* hmv2 */
2509     bytestream_put_byte(&ptr, 0); /* vmv2 */
2510 }
2511
2512 static void update_mb_info(MpegEncContext *s, int startcode)
2513 {
2514     if (!s->mb_info)
2515         return;
2516     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2517         s->mb_info_size += 12;
2518         s->prev_mb_info = s->last_mb_info;
2519     }
2520     if (startcode) {
2521         s->prev_mb_info = put_bits_count(&s->pb)/8;
2522         /* This might have incremented mb_info_size above, and we return without
2523          * actually writing any info into that slot yet. But in that case,
2524          * this will be called again at the start of the after writing the
2525          * start code, actually writing the mb info. */
2526         return;
2527     }
2528
2529     s->last_mb_info = put_bits_count(&s->pb)/8;
2530     if (!s->mb_info_size)
2531         s->mb_info_size += 12;
2532     write_mb_info(s);
2533 }
2534
2535 static int encode_thread(AVCodecContext *c, void *arg){
2536     MpegEncContext *s= *(void**)arg;
2537     int mb_x, mb_y, pdif = 0;
2538     int chr_h= 16>>s->chroma_y_shift;
2539     int i, j;
2540     MpegEncContext best_s, backup_s;
2541     uint8_t bit_buf[2][MAX_MB_BYTES];
2542     uint8_t bit_buf2[2][MAX_MB_BYTES];
2543     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2544     PutBitContext pb[2], pb2[2], tex_pb[2];
2545
2546     for(i=0; i<2; i++){
2547         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2548         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2549         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2550     }
2551
2552     s->last_bits= put_bits_count(&s->pb);
2553     s->mv_bits=0;
2554     s->misc_bits=0;
2555     s->i_tex_bits=0;
2556     s->p_tex_bits=0;
2557     s->i_count=0;
2558     s->f_count=0;
2559     s->b_count=0;
2560     s->skip_count=0;
2561
2562     for(i=0; i<3; i++){
2563         /* init last dc values */
2564         /* note: quant matrix value (8) is implied here */
2565         s->last_dc[i] = 128 << s->intra_dc_precision;
2566
2567         s->current_picture.f->error[i] = 0;
2568     }
2569     s->mb_skip_run = 0;
2570     memset(s->last_mv, 0, sizeof(s->last_mv));
2571
2572     s->last_mv_dir = 0;
2573
2574     switch(s->codec_id){
2575     case AV_CODEC_ID_H263:
2576     case AV_CODEC_ID_H263P:
2577     case AV_CODEC_ID_FLV1:
2578         if (CONFIG_H263_ENCODER)
2579             s->gob_index = ff_h263_get_gob_height(s);
2580         break;
2581     case AV_CODEC_ID_MPEG4:
2582         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2583             ff_mpeg4_init_partitions(s);
2584         break;
2585     }
2586
2587     s->resync_mb_x=0;
2588     s->resync_mb_y=0;
2589     s->first_slice_line = 1;
2590     s->ptr_lastgob = s->pb.buf;
2591     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2592         s->mb_x=0;
2593         s->mb_y= mb_y;
2594
2595         ff_set_qscale(s, s->qscale);
2596         ff_init_block_index(s);
2597
2598         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2599             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2600             int mb_type= s->mb_type[xy];
2601 //            int d;
2602             int dmin= INT_MAX;
2603             int dir;
2604
2605             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2606                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2607                 return -1;
2608             }
2609             if(s->data_partitioning){
2610                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2611                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2612                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2613                     return -1;
2614                 }
2615             }
2616
2617             s->mb_x = mb_x;
2618             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2619             ff_update_block_index(s);
2620
2621             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2622                 ff_h261_reorder_mb_index(s);
2623                 xy= s->mb_y*s->mb_stride + s->mb_x;
2624                 mb_type= s->mb_type[xy];
2625             }
2626
2627             /* write gob / video packet header  */
2628             if(s->rtp_mode){
2629                 int current_packet_size, is_gob_start;
2630
2631                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2632
2633                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2634
2635                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2636
2637                 switch(s->codec_id){
2638                 case AV_CODEC_ID_H263:
2639                 case AV_CODEC_ID_H263P:
2640                     if(!s->h263_slice_structured)
2641                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2642                     break;
2643                 case AV_CODEC_ID_MPEG2VIDEO:
2644                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2645                 case AV_CODEC_ID_MPEG1VIDEO:
2646                     if(s->mb_skip_run) is_gob_start=0;
2647                     break;
2648                 }
2649
2650                 if(is_gob_start){
2651                     if(s->start_mb_y != mb_y || mb_x!=0){
2652                         write_slice_end(s);
2653
2654                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2655                             ff_mpeg4_init_partitions(s);
2656                         }
2657                     }
2658
2659                     assert((put_bits_count(&s->pb)&7) == 0);
2660                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2661
2662                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2663                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2664                         int d = 100 / s->error_rate;
2665                         if(r % d == 0){
2666                             current_packet_size=0;
2667                             s->pb.buf_ptr= s->ptr_lastgob;
2668                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2669                         }
2670                     }
2671
2672                     if (s->avctx->rtp_callback){
2673                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2674                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2675                     }
2676                     update_mb_info(s, 1);
2677
2678                     switch(s->codec_id){
2679                     case AV_CODEC_ID_MPEG4:
2680                         if (CONFIG_MPEG4_ENCODER) {
2681                             ff_mpeg4_encode_video_packet_header(s);
2682                             ff_mpeg4_clean_buffers(s);
2683                         }
2684                     break;
2685                     case AV_CODEC_ID_MPEG1VIDEO:
2686                     case AV_CODEC_ID_MPEG2VIDEO:
2687                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2688                             ff_mpeg1_encode_slice_header(s);
2689                             ff_mpeg1_clean_buffers(s);
2690                         }
2691                     break;
2692                     case AV_CODEC_ID_H263:
2693                     case AV_CODEC_ID_H263P:
2694                         if (CONFIG_H263_ENCODER)
2695                             ff_h263_encode_gob_header(s, mb_y);
2696                     break;
2697                     }
2698
2699                     if(s->flags&CODEC_FLAG_PASS1){
2700                         int bits= put_bits_count(&s->pb);
2701                         s->misc_bits+= bits - s->last_bits;
2702                         s->last_bits= bits;
2703                     }
2704
2705                     s->ptr_lastgob += current_packet_size;
2706                     s->first_slice_line=1;
2707                     s->resync_mb_x=mb_x;
2708                     s->resync_mb_y=mb_y;
2709                 }
2710             }
2711
2712             if(  (s->resync_mb_x   == s->mb_x)
2713                && s->resync_mb_y+1 == s->mb_y){
2714                 s->first_slice_line=0;
2715             }
2716
2717             s->mb_skipped=0;
2718             s->dquant=0; //only for QP_RD
2719
2720             update_mb_info(s, 0);
2721
2722             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2723                 int next_block=0;
2724                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2725
2726                 copy_context_before_encode(&backup_s, s, -1);
2727                 backup_s.pb= s->pb;
2728                 best_s.data_partitioning= s->data_partitioning;
2729                 best_s.partitioned_frame= s->partitioned_frame;
2730                 if(s->data_partitioning){
2731                     backup_s.pb2= s->pb2;
2732                     backup_s.tex_pb= s->tex_pb;
2733                 }
2734
2735                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2736                     s->mv_dir = MV_DIR_FORWARD;
2737                     s->mv_type = MV_TYPE_16X16;
2738                     s->mb_intra= 0;
2739                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2740                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2741                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2742                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2743                 }
2744                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2745                     s->mv_dir = MV_DIR_FORWARD;
2746                     s->mv_type = MV_TYPE_FIELD;
2747                     s->mb_intra= 0;
2748                     for(i=0; i<2; i++){
2749                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2750                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2751                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2752                     }
2753                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2754                                  &dmin, &next_block, 0, 0);
2755                 }
2756                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2757                     s->mv_dir = MV_DIR_FORWARD;
2758                     s->mv_type = MV_TYPE_16X16;
2759                     s->mb_intra= 0;
2760                     s->mv[0][0][0] = 0;
2761                     s->mv[0][0][1] = 0;
2762                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2763                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2764                 }
2765                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2766                     s->mv_dir = MV_DIR_FORWARD;
2767                     s->mv_type = MV_TYPE_8X8;
2768                     s->mb_intra= 0;
2769                     for(i=0; i<4; i++){
2770                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2771                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2772                     }
2773                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2774                                  &dmin, &next_block, 0, 0);
2775                 }
2776                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2777                     s->mv_dir = MV_DIR_FORWARD;
2778                     s->mv_type = MV_TYPE_16X16;
2779                     s->mb_intra= 0;
2780                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2781                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2782                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2783                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2784                 }
2785                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2786                     s->mv_dir = MV_DIR_BACKWARD;
2787                     s->mv_type = MV_TYPE_16X16;
2788                     s->mb_intra= 0;
2789                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2790                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2791                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2792                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2793                 }
2794                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2795                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2796                     s->mv_type = MV_TYPE_16X16;
2797                     s->mb_intra= 0;
2798                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2799                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2800                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2801                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2802                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2803                                  &dmin, &next_block, 0, 0);
2804                 }
2805                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2806                     s->mv_dir = MV_DIR_FORWARD;
2807                     s->mv_type = MV_TYPE_FIELD;
2808                     s->mb_intra= 0;
2809                     for(i=0; i<2; i++){
2810                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2811                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2812                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2813                     }
2814                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2815                                  &dmin, &next_block, 0, 0);
2816                 }
2817                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2818                     s->mv_dir = MV_DIR_BACKWARD;
2819                     s->mv_type = MV_TYPE_FIELD;
2820                     s->mb_intra= 0;
2821                     for(i=0; i<2; i++){
2822                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2823                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2824                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2825                     }
2826                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2827                                  &dmin, &next_block, 0, 0);
2828                 }
2829                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2830                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2831                     s->mv_type = MV_TYPE_FIELD;
2832                     s->mb_intra= 0;
2833                     for(dir=0; dir<2; dir++){
2834                         for(i=0; i<2; i++){
2835                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2836                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2837                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2838                         }
2839                     }
2840                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2841                                  &dmin, &next_block, 0, 0);
2842                 }
2843                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2844                     s->mv_dir = 0;
2845                     s->mv_type = MV_TYPE_16X16;
2846                     s->mb_intra= 1;
2847                     s->mv[0][0][0] = 0;
2848                     s->mv[0][0][1] = 0;
2849                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2850                                  &dmin, &next_block, 0, 0);
2851                     if(s->h263_pred || s->h263_aic){
2852                         if(best_s.mb_intra)
2853                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2854                         else
2855                             ff_clean_intra_table_entries(s); //old mode?
2856                     }
2857                 }
2858
2859                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2860                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2861                         const int last_qp= backup_s.qscale;
2862                         int qpi, qp, dc[6];
2863                         int16_t ac[6][16];
2864                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2865                         static const int dquant_tab[4]={-1,1,-2,2};
2866
2867                         assert(backup_s.dquant == 0);
2868
2869                         //FIXME intra
2870                         s->mv_dir= best_s.mv_dir;
2871                         s->mv_type = MV_TYPE_16X16;
2872                         s->mb_intra= best_s.mb_intra;
2873                         s->mv[0][0][0] = best_s.mv[0][0][0];
2874                         s->mv[0][0][1] = best_s.mv[0][0][1];
2875                         s->mv[1][0][0] = best_s.mv[1][0][0];
2876                         s->mv[1][0][1] = best_s.mv[1][0][1];
2877
2878                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2879                         for(; qpi<4; qpi++){
2880                             int dquant= dquant_tab[qpi];
2881                             qp= last_qp + dquant;
2882                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2883                                 continue;
2884                             backup_s.dquant= dquant;
2885                             if(s->mb_intra && s->dc_val[0]){
2886                                 for(i=0; i<6; i++){
2887                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2888                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2889                                 }
2890                             }
2891
2892                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2893                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2894                             if(best_s.qscale != qp){
2895                                 if(s->mb_intra && s->dc_val[0]){
2896                                     for(i=0; i<6; i++){
2897                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2898                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2899                                     }
2900                                 }
2901                             }
2902                         }
2903                     }
2904                 }
2905                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2906                     int mx= s->b_direct_mv_table[xy][0];
2907                     int my= s->b_direct_mv_table[xy][1];
2908
2909                     backup_s.dquant = 0;
2910                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2911                     s->mb_intra= 0;
2912                     ff_mpeg4_set_direct_mv(s, mx, my);
2913                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2914                                  &dmin, &next_block, mx, my);
2915                 }
2916                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2917                     backup_s.dquant = 0;
2918                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2919                     s->mb_intra= 0;
2920                     ff_mpeg4_set_direct_mv(s, 0, 0);
2921                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2922                                  &dmin, &next_block, 0, 0);
2923                 }
2924                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2925                     int coded=0;
2926                     for(i=0; i<6; i++)
2927                         coded |= s->block_last_index[i];
2928                     if(coded){
2929                         int mx,my;
2930                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2931                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2932                             mx=my=0; //FIXME find the one we actually used
2933                             ff_mpeg4_set_direct_mv(s, mx, my);
2934                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2935                             mx= s->mv[1][0][0];
2936                             my= s->mv[1][0][1];
2937                         }else{
2938                             mx= s->mv[0][0][0];
2939                             my= s->mv[0][0][1];
2940                         }
2941
2942                         s->mv_dir= best_s.mv_dir;
2943                         s->mv_type = best_s.mv_type;
2944                         s->mb_intra= 0;
2945 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2946                         s->mv[0][0][1] = best_s.mv[0][0][1];
2947                         s->mv[1][0][0] = best_s.mv[1][0][0];
2948                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2949                         backup_s.dquant= 0;
2950                         s->skipdct=1;
2951                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2952                                         &dmin, &next_block, mx, my);
2953                         s->skipdct=0;
2954                     }
2955                 }
2956
2957                 s->current_picture.qscale_table[xy] = best_s.qscale;
2958
2959                 copy_context_after_encode(s, &best_s, -1);
2960
2961                 pb_bits_count= put_bits_count(&s->pb);
2962                 flush_put_bits(&s->pb);
2963                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2964                 s->pb= backup_s.pb;
2965
2966                 if(s->data_partitioning){
2967                     pb2_bits_count= put_bits_count(&s->pb2);
2968                     flush_put_bits(&s->pb2);
2969                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2970                     s->pb2= backup_s.pb2;
2971
2972                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2973                     flush_put_bits(&s->tex_pb);
2974                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2975                     s->tex_pb= backup_s.tex_pb;
2976                 }
2977                 s->last_bits= put_bits_count(&s->pb);
2978
2979                 if (CONFIG_H263_ENCODER &&
2980                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2981                     ff_h263_update_motion_val(s);
2982
2983                 if(next_block==0){ //FIXME 16 vs linesize16
2984                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2985                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2986                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2987                 }
2988
2989                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2990                     ff_mpv_decode_mb(s, s->block);
2991             } else {
2992                 int motion_x = 0, motion_y = 0;
2993                 s->mv_type=MV_TYPE_16X16;
2994                 // only one MB-Type possible
2995
2996                 switch(mb_type){
2997                 case CANDIDATE_MB_TYPE_INTRA:
2998                     s->mv_dir = 0;
2999                     s->mb_intra= 1;
3000                     motion_x= s->mv[0][0][0] = 0;
3001                     motion_y= s->mv[0][0][1] = 0;
3002                     break;
3003                 case CANDIDATE_MB_TYPE_INTER:
3004                     s->mv_dir = MV_DIR_FORWARD;
3005                     s->mb_intra= 0;
3006                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3007                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3008                     break;
3009                 case CANDIDATE_MB_TYPE_INTER_I:
3010                     s->mv_dir = MV_DIR_FORWARD;
3011                     s->mv_type = MV_TYPE_FIELD;
3012                     s->mb_intra= 0;
3013                     for(i=0; i<2; i++){
3014                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3015                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3016                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3017                     }
3018                     break;
3019                 case CANDIDATE_MB_TYPE_INTER4V:
3020                     s->mv_dir = MV_DIR_FORWARD;
3021                     s->mv_type = MV_TYPE_8X8;
3022                     s->mb_intra= 0;
3023                     for(i=0; i<4; i++){
3024                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3025                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3026                     }
3027                     break;
3028                 case CANDIDATE_MB_TYPE_DIRECT:
3029                     if (CONFIG_MPEG4_ENCODER) {
3030                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3031                         s->mb_intra= 0;
3032                         motion_x=s->b_direct_mv_table[xy][0];
3033                         motion_y=s->b_direct_mv_table[xy][1];
3034                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3035                     }
3036                     break;
3037                 case CANDIDATE_MB_TYPE_DIRECT0:
3038                     if (CONFIG_MPEG4_ENCODER) {
3039                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3040                         s->mb_intra= 0;
3041                         ff_mpeg4_set_direct_mv(s, 0, 0);
3042                     }
3043                     break;
3044                 case CANDIDATE_MB_TYPE_BIDIR:
3045                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3046                     s->mb_intra= 0;
3047                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3048                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3049                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3050                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3051                     break;
3052                 case CANDIDATE_MB_TYPE_BACKWARD:
3053                     s->mv_dir = MV_DIR_BACKWARD;
3054                     s->mb_intra= 0;
3055                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3056                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3057                     break;
3058                 case CANDIDATE_MB_TYPE_FORWARD:
3059                     s->mv_dir = MV_DIR_FORWARD;
3060                     s->mb_intra= 0;
3061                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3062                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3063                     break;
3064                 case CANDIDATE_MB_TYPE_FORWARD_I:
3065                     s->mv_dir = MV_DIR_FORWARD;
3066                     s->mv_type = MV_TYPE_FIELD;
3067                     s->mb_intra= 0;
3068                     for(i=0; i<2; i++){
3069                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3070                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3071                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3072                     }
3073                     break;
3074                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3075                     s->mv_dir = MV_DIR_BACKWARD;
3076                     s->mv_type = MV_TYPE_FIELD;
3077                     s->mb_intra= 0;
3078                     for(i=0; i<2; i++){
3079                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3080                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3081                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3082                     }
3083                     break;
3084                 case CANDIDATE_MB_TYPE_BIDIR_I:
3085                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3086                     s->mv_type = MV_TYPE_FIELD;
3087                     s->mb_intra= 0;
3088                     for(dir=0; dir<2; dir++){
3089                         for(i=0; i<2; i++){
3090                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3091                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3092                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3093                         }
3094                     }
3095                     break;
3096                 default:
3097                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3098                 }
3099
3100                 encode_mb(s, motion_x, motion_y);
3101
3102                 // RAL: Update last macroblock type
3103                 s->last_mv_dir = s->mv_dir;
3104
3105                 if (CONFIG_H263_ENCODER &&
3106                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3107                     ff_h263_update_motion_val(s);
3108
3109                 ff_mpv_decode_mb(s, s->block);
3110             }
3111
3112             /* clean the MV table in IPS frames for direct mode in B frames */
3113             if(s->mb_intra /* && I,P,S_TYPE */){
3114                 s->p_mv_table[xy][0]=0;
3115                 s->p_mv_table[xy][1]=0;
3116             }
3117
3118             if(s->flags&CODEC_FLAG_PSNR){
3119                 int w= 16;
3120                 int h= 16;
3121
3122                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3123                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3124
3125                 s->current_picture.f->error[0] += sse(
3126                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3127                     s->dest[0], w, h, s->linesize);
3128                 s->current_picture.f->error[1] += sse(
3129                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3130                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3131                 s->current_picture.f->error[2] += sse(
3132                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3133                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3134             }
3135             if(s->loop_filter){
3136                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3137                     ff_h263_loop_filter(s);
3138             }
3139             av_dlog(s->avctx, "MB %d %d bits\n",
3140                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3141         }
3142     }
3143
3144     //not beautiful here but we must write it before flushing so it has to be here
3145     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3146         ff_msmpeg4_encode_ext_header(s);
3147
3148     write_slice_end(s);
3149
3150     /* Send the last GOB if RTP */
3151     if (s->avctx->rtp_callback) {
3152         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3153         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3154         /* Call the RTP callback to send the last GOB */
3155         emms_c();
3156         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3157     }
3158
3159     return 0;
3160 }
3161
3162 #define MERGE(field) dst->field += src->field; src->field=0
3163 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3164     MERGE(me.scene_change_score);
3165     MERGE(me.mc_mb_var_sum_temp);
3166     MERGE(me.mb_var_sum_temp);
3167 }
3168
3169 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3170     int i;
3171
3172     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3173     MERGE(dct_count[1]);
3174     MERGE(mv_bits);
3175     MERGE(i_tex_bits);
3176     MERGE(p_tex_bits);
3177     MERGE(i_count);
3178     MERGE(f_count);
3179     MERGE(b_count);
3180     MERGE(skip_count);
3181     MERGE(misc_bits);
3182     MERGE(er.error_count);
3183     MERGE(padding_bug_score);
3184     MERGE(current_picture.f->error[0]);
3185     MERGE(current_picture.f->error[1]);
3186     MERGE(current_picture.f->error[2]);
3187
3188     if(dst->avctx->noise_reduction){
3189         for(i=0; i<64; i++){
3190             MERGE(dct_error_sum[0][i]);
3191             MERGE(dct_error_sum[1][i]);
3192         }
3193     }
3194
3195     assert(put_bits_count(&src->pb) % 8 ==0);
3196     assert(put_bits_count(&dst->pb) % 8 ==0);
3197     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3198     flush_put_bits(&dst->pb);
3199 }
3200
3201 static int estimate_qp(MpegEncContext *s, int dry_run){
3202     if (s->next_lambda){
3203         s->current_picture_ptr->f->quality =
3204         s->current_picture.f->quality = s->next_lambda;
3205         if(!dry_run) s->next_lambda= 0;
3206     } else if (!s->fixed_qscale) {
3207         s->current_picture_ptr->f->quality =
3208         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3209         if (s->current_picture.f->quality < 0)
3210             return -1;
3211     }
3212
3213     if(s->adaptive_quant){
3214         switch(s->codec_id){
3215         case AV_CODEC_ID_MPEG4:
3216             if (CONFIG_MPEG4_ENCODER)
3217                 ff_clean_mpeg4_qscales(s);
3218             break;
3219         case AV_CODEC_ID_H263:
3220         case AV_CODEC_ID_H263P:
3221         case AV_CODEC_ID_FLV1:
3222             if (CONFIG_H263_ENCODER)
3223                 ff_clean_h263_qscales(s);
3224             break;
3225         default:
3226             ff_init_qscale_tab(s);
3227         }
3228
3229         s->lambda= s->lambda_table[0];
3230         //FIXME broken
3231     }else
3232         s->lambda = s->current_picture.f->quality;
3233     update_qscale(s);
3234     return 0;
3235 }
3236
3237 /* must be called before writing the header */
3238 static void set_frame_distances(MpegEncContext * s){
3239     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3240     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3241
3242     if(s->pict_type==AV_PICTURE_TYPE_B){
3243         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3244         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3245     }else{
3246         s->pp_time= s->time - s->last_non_b_time;
3247         s->last_non_b_time= s->time;
3248         assert(s->picture_number==0 || s->pp_time > 0);
3249     }
3250 }
3251
3252 static int encode_picture(MpegEncContext *s, int picture_number)
3253 {
3254     int i, ret;
3255     int bits;
3256     int context_count = s->slice_context_count;
3257
3258     s->picture_number = picture_number;
3259
3260     /* Reset the average MB variance */
3261     s->me.mb_var_sum_temp    =
3262     s->me.mc_mb_var_sum_temp = 0;
3263
3264     /* we need to initialize some time vars before we can encode b-frames */
3265     // RAL: Condition added for MPEG1VIDEO
3266     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3267         set_frame_distances(s);
3268     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3269         ff_set_mpeg4_time(s);
3270
3271     s->me.scene_change_score=0;
3272
3273 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3274
3275     if(s->pict_type==AV_PICTURE_TYPE_I){
3276         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3277         else                        s->no_rounding=0;
3278     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3279         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3280             s->no_rounding ^= 1;
3281     }
3282
3283     if(s->flags & CODEC_FLAG_PASS2){
3284         if (estimate_qp(s,1) < 0)
3285             return -1;
3286         ff_get_2pass_fcode(s);
3287     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3288         if(s->pict_type==AV_PICTURE_TYPE_B)
3289             s->lambda= s->last_lambda_for[s->pict_type];
3290         else
3291             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3292         update_qscale(s);
3293     }
3294
3295     s->mb_intra=0; //for the rate distortion & bit compare functions
3296     for(i=1; i<context_count; i++){
3297         ret = ff_update_duplicate_context(s->thread_context[i], s);
3298         if (ret < 0)
3299             return ret;
3300     }
3301
3302     if(ff_init_me(s)<0)
3303         return -1;
3304
3305     /* Estimate motion for every MB */
3306     if(s->pict_type != AV_PICTURE_TYPE_I){
3307         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3308         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3309         if (s->pict_type != AV_PICTURE_TYPE_B) {
3310             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3311                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3312             }
3313         }
3314
3315         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3316     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3317         /* I-Frame */
3318         for(i=0; i<s->mb_stride*s->mb_height; i++)
3319             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3320
3321         if(!s->fixed_qscale){
3322             /* finding spatial complexity for I-frame rate control */
3323             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3324         }
3325     }
3326     for(i=1; i<context_count; i++){
3327         merge_context_after_me(s, s->thread_context[i]);
3328     }
3329     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3330     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3331     emms_c();
3332
3333     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3334         s->pict_type= AV_PICTURE_TYPE_I;
3335         for(i=0; i<s->mb_stride*s->mb_height; i++)
3336             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3337         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3338                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3339     }
3340
3341     if(!s->umvplus){
3342         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3343             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3344
3345             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3346                 int a,b;
3347                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3348                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3349                 s->f_code= FFMAX3(s->f_code, a, b);
3350             }
3351
3352             ff_fix_long_p_mvs(s);
3353             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3354             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3355                 int j;
3356                 for(i=0; i<2; i++){
3357                     for(j=0; j<2; j++)
3358                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3359                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3360                 }
3361             }
3362         }
3363
3364         if(s->pict_type==AV_PICTURE_TYPE_B){
3365             int a, b;
3366
3367             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3368             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3369             s->f_code = FFMAX(a, b);
3370
3371             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3372             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3373             s->b_code = FFMAX(a, b);
3374
3375             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3376             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3377             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3378             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3379             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3380                 int dir, j;
3381                 for(dir=0; dir<2; dir++){
3382                     for(i=0; i<2; i++){
3383                         for(j=0; j<2; j++){
3384                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3385                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3386                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3387                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3388                         }
3389                     }
3390                 }
3391             }
3392         }
3393     }
3394
3395     if (estimate_qp(s, 0) < 0)
3396         return -1;
3397
3398     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3399         s->qscale= 3; //reduce clipping problems
3400
3401     if (s->out_format == FMT_MJPEG) {
3402         /* for mjpeg, we do include qscale in the matrix */
3403         for(i=1;i<64;i++){
3404             int j = s->idsp.idct_permutation[i];
3405
3406             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3407         }
3408         s->y_dc_scale_table=
3409         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3410         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3411         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3412                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3413         s->qscale= 8;
3414     }
3415
3416     //FIXME var duplication
3417     s->current_picture_ptr->f->key_frame =
3418     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3419     s->current_picture_ptr->f->pict_type =
3420     s->current_picture.f->pict_type = s->pict_type;
3421
3422     if (s->current_picture.f->key_frame)
3423         s->picture_in_gop_number=0;
3424
3425     s->last_bits= put_bits_count(&s->pb);
3426     switch(s->out_format) {
3427     case FMT_MJPEG:
3428         if (CONFIG_MJPEG_ENCODER)
3429             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3430                                            s->intra_matrix);
3431         break;
3432     case FMT_H261:
3433         if (CONFIG_H261_ENCODER)
3434             ff_h261_encode_picture_header(s, picture_number);
3435         break;
3436     case FMT_H263:
3437         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3438             ff_wmv2_encode_picture_header(s, picture_number);
3439         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3440             ff_msmpeg4_encode_picture_header(s, picture_number);
3441         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3442             ff_mpeg4_encode_picture_header(s, picture_number);
3443         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3444             ff_rv10_encode_picture_header(s, picture_number);
3445         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3446             ff_rv20_encode_picture_header(s, picture_number);
3447         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3448             ff_flv_encode_picture_header(s, picture_number);
3449         else if (CONFIG_H263_ENCODER)
3450             ff_h263_encode_picture_header(s, picture_number);
3451         break;
3452     case FMT_MPEG1:
3453         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3454             ff_mpeg1_encode_picture_header(s, picture_number);
3455         break;
3456     default:
3457         assert(0);
3458     }
3459     bits= put_bits_count(&s->pb);
3460     s->header_bits= bits - s->last_bits;
3461
3462     for(i=1; i<context_count; i++){
3463         update_duplicate_context_after_me(s->thread_context[i], s);
3464     }
3465     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3466     for(i=1; i<context_count; i++){
3467         merge_context_after_encode(s, s->thread_context[i]);
3468     }
3469     emms_c();
3470     return 0;
3471 }
3472
3473 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3474     const int intra= s->mb_intra;
3475     int i;
3476
3477     s->dct_count[intra]++;
3478
3479     for(i=0; i<64; i++){
3480         int level= block[i];
3481
3482         if(level){
3483             if(level>0){
3484                 s->dct_error_sum[intra][i] += level;
3485                 level -= s->dct_offset[intra][i];
3486                 if(level<0) level=0;
3487             }else{
3488                 s->dct_error_sum[intra][i] -= level;
3489                 level += s->dct_offset[intra][i];
3490                 if(level>0) level=0;
3491             }
3492             block[i]= level;
3493         }
3494     }
3495 }
3496
3497 static int dct_quantize_trellis_c(MpegEncContext *s,
3498                                   int16_t *block, int n,
3499                                   int qscale, int *overflow){
3500     const int *qmat;
3501     const uint8_t *scantable= s->intra_scantable.scantable;
3502     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3503     int max=0;
3504     unsigned int threshold1, threshold2;
3505     int bias=0;
3506     int run_tab[65];
3507     int level_tab[65];
3508     int score_tab[65];
3509     int survivor[65];
3510     int survivor_count;
3511     int last_run=0;
3512     int last_level=0;
3513     int last_score= 0;
3514     int last_i;
3515     int coeff[2][64];
3516     int coeff_count[64];
3517     int qmul, qadd, start_i, last_non_zero, i, dc;
3518     const int esc_length= s->ac_esc_length;
3519     uint8_t * length;
3520     uint8_t * last_length;
3521     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3522
3523     s->fdsp.fdct(block);
3524
3525     if(s->dct_error_sum)
3526         s->denoise_dct(s, block);
3527     qmul= qscale*16;
3528     qadd= ((qscale-1)|1)*8;
3529
3530     if (s->mb_intra) {
3531         int q;
3532         if (!s->h263_aic) {
3533             if (n < 4)
3534                 q = s->y_dc_scale;
3535             else
3536                 q = s->c_dc_scale;
3537             q = q << 3;
3538         } else{
3539             /* For AIC we skip quant/dequant of INTRADC */
3540             q = 1 << 3;
3541             qadd=0;
3542         }
3543
3544         /* note: block[0] is assumed to be positive */
3545         block[0] = (block[0] + (q >> 1)) / q;
3546         start_i = 1;
3547         last_non_zero = 0;
3548         qmat = s->q_intra_matrix[qscale];
3549         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3550             bias= 1<<(QMAT_SHIFT-1);
3551         length     = s->intra_ac_vlc_length;
3552         last_length= s->intra_ac_vlc_last_length;
3553     } else {
3554         start_i = 0;
3555         last_non_zero = -1;
3556         qmat = s->q_inter_matrix[qscale];
3557         length     = s->inter_ac_vlc_length;
3558         last_length= s->inter_ac_vlc_last_length;
3559     }
3560     last_i= start_i;
3561
3562     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3563     threshold2= (threshold1<<1);
3564
3565     for(i=63; i>=start_i; i--) {
3566         const int j = scantable[i];
3567         int level = block[j] * qmat[j];
3568
3569         if(((unsigned)(level+threshold1))>threshold2){
3570             last_non_zero = i;
3571             break;
3572         }
3573     }
3574
3575     for(i=start_i; i<=last_non_zero; i++) {
3576         const int j = scantable[i];
3577         int level = block[j] * qmat[j];
3578
3579 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3580 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3581         if(((unsigned)(level+threshold1))>threshold2){
3582             if(level>0){
3583                 level= (bias + level)>>QMAT_SHIFT;
3584                 coeff[0][i]= level;
3585                 coeff[1][i]= level-1;
3586 //                coeff[2][k]= level-2;
3587             }else{
3588                 level= (bias - level)>>QMAT_SHIFT;
3589                 coeff[0][i]= -level;
3590                 coeff[1][i]= -level+1;
3591 //                coeff[2][k]= -level+2;
3592             }
3593             coeff_count[i]= FFMIN(level, 2);
3594             assert(coeff_count[i]);
3595             max |=level;
3596         }else{
3597             coeff[0][i]= (level>>31)|1;
3598             coeff_count[i]= 1;
3599         }
3600     }
3601
3602     *overflow= s->max_qcoeff < max; //overflow might have happened
3603
3604     if(last_non_zero < start_i){
3605         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3606         return last_non_zero;
3607     }
3608
3609     score_tab[start_i]= 0;
3610     survivor[0]= start_i;
3611     survivor_count= 1;
3612
3613     for(i=start_i; i<=last_non_zero; i++){
3614         int level_index, j, zero_distortion;
3615         int dct_coeff= FFABS(block[ scantable[i] ]);
3616         int best_score=256*256*256*120;
3617
3618         if (s->fdsp.fdct == ff_fdct_ifast)
3619             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3620         zero_distortion= dct_coeff*dct_coeff;
3621
3622         for(level_index=0; level_index < coeff_count[i]; level_index++){
3623             int distortion;
3624             int level= coeff[level_index][i];
3625             const int alevel= FFABS(level);
3626             int unquant_coeff;
3627
3628             assert(level);
3629
3630             if(s->out_format == FMT_H263){
3631                 unquant_coeff= alevel*qmul + qadd;
3632             }else{ //MPEG1
3633                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3634                 if(s->mb_intra){
3635                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3636                         unquant_coeff =   (unquant_coeff - 1) | 1;
3637                 }else{
3638                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3639                         unquant_coeff =   (unquant_coeff - 1) | 1;
3640                 }
3641                 unquant_coeff<<= 3;
3642             }
3643
3644             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3645             level+=64;
3646             if((level&(~127)) == 0){
3647                 for(j=survivor_count-1; j>=0; j--){
3648                     int run= i - survivor[j];
3649                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3650                     score += score_tab[i-run];
3651
3652                     if(score < best_score){
3653                         best_score= score;
3654                         run_tab[i+1]= run;
3655                         level_tab[i+1]= level-64;
3656                     }
3657                 }
3658
3659                 if(s->out_format == FMT_H263){
3660                     for(j=survivor_count-1; j>=0; j--){
3661                         int run= i - survivor[j];
3662                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3663                         score += score_tab[i-run];
3664                         if(score < last_score){
3665                             last_score= score;
3666                             last_run= run;
3667                             last_level= level-64;
3668                             last_i= i+1;
3669                         }
3670                     }
3671                 }
3672             }else{
3673                 distortion += esc_length*lambda;
3674                 for(j=survivor_count-1; j>=0; j--){
3675                     int run= i - survivor[j];
3676                     int score= distortion + score_tab[i-run];
3677
3678                     if(score < best_score){
3679                         best_score= score;
3680                         run_tab[i+1]= run;
3681                         level_tab[i+1]= level-64;
3682                     }
3683                 }
3684
3685                 if(s->out_format == FMT_H263){
3686                   for(j=survivor_count-1; j>=0; j--){
3687                         int run= i - survivor[j];
3688                         int score= distortion + score_tab[i-run];
3689                         if(score < last_score){
3690                             last_score= score;
3691                             last_run= run;
3692                             last_level= level-64;
3693                             last_i= i+1;
3694                         }
3695                     }
3696                 }
3697             }
3698         }
3699
3700         score_tab[i+1]= best_score;
3701
3702         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3703         if(last_non_zero <= 27){
3704             for(; survivor_count; survivor_count--){
3705                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3706                     break;
3707             }
3708         }else{
3709             for(; survivor_count; survivor_count--){
3710                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3711                     break;
3712             }
3713         }
3714
3715         survivor[ survivor_count++ ]= i+1;
3716     }
3717
3718     if(s->out_format != FMT_H263){
3719         last_score= 256*256*256*120;
3720         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3721             int score= score_tab[i];
3722             if(i) score += lambda*2; //FIXME exacter?
3723
3724             if(score < last_score){
3725                 last_score= score;
3726                 last_i= i;
3727                 last_level= level_tab[i];
3728                 last_run= run_tab[i];
3729             }
3730         }
3731     }
3732
3733     s->coded_score[n] = last_score;
3734
3735     dc= FFABS(block[0]);
3736     last_non_zero= last_i - 1;
3737     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3738
3739     if(last_non_zero < start_i)
3740         return last_non_zero;
3741
3742     if(last_non_zero == 0 && start_i == 0){
3743         int best_level= 0;
3744         int best_score= dc * dc;
3745
3746         for(i=0; i<coeff_count[0]; i++){
3747             int level= coeff[i][0];
3748             int alevel= FFABS(level);
3749             int unquant_coeff, score, distortion;
3750
3751             if(s->out_format == FMT_H263){
3752                     unquant_coeff= (alevel*qmul + qadd)>>3;
3753             }else{ //MPEG1
3754                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3755                     unquant_coeff =   (unquant_coeff - 1) | 1;
3756             }
3757             unquant_coeff = (unquant_coeff + 4) >> 3;
3758             unquant_coeff<<= 3 + 3;
3759
3760             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3761             level+=64;
3762             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3763             else                    score= distortion + esc_length*lambda;
3764
3765             if(score < best_score){
3766                 best_score= score;
3767                 best_level= level - 64;
3768             }
3769         }
3770         block[0]= best_level;
3771         s->coded_score[n] = best_score - dc*dc;
3772         if(best_level == 0) return -1;
3773         else                return last_non_zero;
3774     }
3775
3776     i= last_i;
3777     assert(last_level);
3778
3779     block[ perm_scantable[last_non_zero] ]= last_level;
3780     i -= last_run + 1;
3781
3782     for(; i>start_i; i -= run_tab[i] + 1){
3783         block[ perm_scantable[i-1] ]= level_tab[i];
3784     }
3785
3786     return last_non_zero;
3787 }
3788
3789 //#define REFINE_STATS 1
3790 static int16_t basis[64][64];
3791
3792 static void build_basis(uint8_t *perm){
3793     int i, j, x, y;
3794     emms_c();
3795     for(i=0; i<8; i++){
3796         for(j=0; j<8; j++){
3797             for(y=0; y<8; y++){
3798                 for(x=0; x<8; x++){
3799                     double s= 0.25*(1<<BASIS_SHIFT);
3800                     int index= 8*i + j;
3801                     int perm_index= perm[index];
3802                     if(i==0) s*= sqrt(0.5);
3803                     if(j==0) s*= sqrt(0.5);
3804                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3805                 }
3806             }
3807         }
3808     }
3809 }
3810
3811 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3812                         int16_t *block, int16_t *weight, int16_t *orig,
3813                         int n, int qscale){
3814     int16_t rem[64];
3815     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3816     const uint8_t *scantable= s->intra_scantable.scantable;
3817     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3818 //    unsigned int threshold1, threshold2;
3819 //    int bias=0;
3820     int run_tab[65];
3821     int prev_run=0;
3822     int prev_level=0;
3823     int qmul, qadd, start_i, last_non_zero, i, dc;
3824     uint8_t * length;
3825     uint8_t * last_length;
3826     int lambda;
3827     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3828 #ifdef REFINE_STATS
3829 static int count=0;
3830 static int after_last=0;
3831 static int to_zero=0;
3832 static int from_zero=0;
3833 static int raise=0;
3834 static int lower=0;
3835 static int messed_sign=0;
3836 #endif
3837
3838     if(basis[0][0] == 0)
3839         build_basis(s->idsp.idct_permutation);
3840
3841     qmul= qscale*2;
3842     qadd= (qscale-1)|1;
3843     if (s->mb_intra) {
3844         if (!s->h263_aic) {
3845             if (n < 4)
3846                 q = s->y_dc_scale;
3847             else
3848                 q = s->c_dc_scale;
3849         } else{
3850             /* For AIC we skip quant/dequant of INTRADC */
3851             q = 1;
3852             qadd=0;
3853         }
3854         q <<= RECON_SHIFT-3;
3855         /* note: block[0] is assumed to be positive */
3856         dc= block[0]*q;
3857 //        block[0] = (block[0] + (q >> 1)) / q;
3858         start_i = 1;
3859 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3860 //            bias= 1<<(QMAT_SHIFT-1);
3861         length     = s->intra_ac_vlc_length;
3862         last_length= s->intra_ac_vlc_last_length;
3863     } else {
3864         dc= 0;
3865         start_i = 0;
3866         length     = s->inter_ac_vlc_length;
3867         last_length= s->inter_ac_vlc_last_length;
3868     }
3869     last_non_zero = s->block_last_index[n];
3870
3871 #ifdef REFINE_STATS
3872 {START_TIMER
3873 #endif
3874     dc += (1<<(RECON_SHIFT-1));
3875     for(i=0; i<64; i++){
3876         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3877     }
3878 #ifdef REFINE_STATS
3879 STOP_TIMER("memset rem[]")}
3880 #endif
3881     sum=0;
3882     for(i=0; i<64; i++){
3883         int one= 36;
3884         int qns=4;
3885         int w;
3886
3887         w= FFABS(weight[i]) + qns*one;
3888         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3889
3890         weight[i] = w;
3891 //        w=weight[i] = (63*qns + (w/2)) / w;
3892
3893         assert(w>0);
3894         assert(w<(1<<6));
3895         sum += w*w;
3896     }
3897     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3898 #ifdef REFINE_STATS
3899 {START_TIMER
3900 #endif
3901     run=0;
3902     rle_index=0;
3903     for(i=start_i; i<=last_non_zero; i++){
3904         int j= perm_scantable[i];
3905         const int level= block[j];
3906         int coeff;
3907
3908         if(level){
3909             if(level<0) coeff= qmul*level - qadd;
3910             else        coeff= qmul*level + qadd;
3911             run_tab[rle_index++]=run;
3912             run=0;
3913
3914             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3915         }else{
3916             run++;
3917         }
3918     }
3919 #ifdef REFINE_STATS
3920 if(last_non_zero>0){
3921 STOP_TIMER("init rem[]")
3922 }
3923 }
3924
3925 {START_TIMER
3926 #endif
3927     for(;;){
3928         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3929         int best_coeff=0;
3930         int best_change=0;
3931         int run2, best_unquant_change=0, analyze_gradient;
3932 #ifdef REFINE_STATS
3933 {START_TIMER
3934 #endif
3935         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3936
3937         if(analyze_gradient){
3938 #ifdef REFINE_STATS
3939 {START_TIMER
3940 #endif
3941             for(i=0; i<64; i++){
3942                 int w= weight[i];
3943
3944                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3945             }
3946 #ifdef REFINE_STATS
3947 STOP_TIMER("rem*w*w")}
3948 {START_TIMER
3949 #endif
3950             s->fdsp.fdct(d1);
3951 #ifdef REFINE_STATS
3952 STOP_TIMER("dct")}
3953 #endif
3954         }
3955
3956         if(start_i){
3957             const int level= block[0];
3958             int change, old_coeff;
3959
3960             assert(s->mb_intra);
3961
3962             old_coeff= q*level;
3963
3964             for(change=-1; change<=1; change+=2){
3965                 int new_level= level + change;
3966                 int score, new_coeff;
3967
3968                 new_coeff= q*new_level;
3969                 if(new_coeff >= 2048 || new_coeff < 0)
3970                     continue;
3971
3972                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3973                                                   new_coeff - old_coeff);
3974                 if(score<best_score){
3975                     best_score= score;
3976                     best_coeff= 0;
3977                     best_change= change;
3978                     best_unquant_change= new_coeff - old_coeff;
3979                 }
3980             }
3981         }
3982
3983         run=0;
3984         rle_index=0;
3985         run2= run_tab[rle_index++];
3986         prev_level=0;
3987         prev_run=0;
3988
3989         for(i=start_i; i<64; i++){
3990             int j= perm_scantable[i];
3991             const int level= block[j];
3992             int change, old_coeff;
3993
3994             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3995                 break;
3996
3997             if(level){
3998                 if(level<0) old_coeff= qmul*level - qadd;
3999                 else        old_coeff= qmul*level + qadd;
4000                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4001             }else{
4002                 old_coeff=0;
4003                 run2--;
4004                 assert(run2>=0 || i >= last_non_zero );
4005             }
4006
4007             for(change=-1; change<=1; change+=2){
4008                 int new_level= level + change;
4009                 int score, new_coeff, unquant_change;
4010
4011                 score=0;
4012                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4013                    continue;
4014
4015                 if(new_level){
4016                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4017                     else            new_coeff= qmul*new_level + qadd;
4018                     if(new_coeff >= 2048 || new_coeff <= -2048)
4019                         continue;
4020                     //FIXME check for overflow
4021
4022                     if(level){
4023                         if(level < 63 && level > -63){
4024                             if(i < last_non_zero)
4025                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4026                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4027                             else
4028                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4029                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4030                         }
4031                     }else{
4032                         assert(FFABS(new_level)==1);
4033
4034                         if(analyze_gradient){
4035                             int g= d1[ scantable[i] ];
4036                             if(g && (g^new_level) >= 0)
4037                                 continue;
4038                         }
4039
4040                         if(i < last_non_zero){
4041                             int next_i= i + run2 + 1;
4042                             int next_level= block[ perm_scantable[next_i] ] + 64;
4043
4044                             if(next_level&(~127))
4045                                 next_level= 0;
4046
4047                             if(next_i < last_non_zero)
4048                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4049                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4050                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4051                             else
4052                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4053                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4054                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4055                         }else{
4056                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4057                             if(prev_level){
4058                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4059                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4060                             }
4061                         }
4062                     }
4063                 }else{
4064                     new_coeff=0;
4065                     assert(FFABS(level)==1);
4066
4067                     if(i < last_non_zero){
4068                         int next_i= i + run2 + 1;
4069                         int next_level= block[ perm_scantable[next_i] ] + 64;
4070
4071                         if(next_level&(~127))
4072                             next_level= 0;
4073
4074                         if(next_i < last_non_zero)
4075                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4076                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4077                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4078                         else
4079                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4080                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4081                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4082                     }else{
4083                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4084                         if(prev_level){
4085                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4086                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4087                         }
4088                     }
4089                 }
4090
4091                 score *= lambda;
4092
4093                 unquant_change= new_coeff - old_coeff;
4094                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4095
4096                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4097                                                    unquant_change);
4098                 if(score<best_score){
4099                     best_score= score;
4100                     best_coeff= i;
4101                     best_change= change;
4102                     best_unquant_change= unquant_change;
4103                 }
4104             }
4105             if(level){
4106                 prev_level= level + 64;
4107                 if(prev_level&(~127))
4108                     prev_level= 0;
4109                 prev_run= run;
4110                 run=0;
4111             }else{
4112                 run++;
4113             }
4114         }
4115 #ifdef REFINE_STATS
4116 STOP_TIMER("iterative step")}
4117 #endif
4118
4119         if(best_change){
4120             int j= perm_scantable[ best_coeff ];
4121
4122             block[j] += best_change;
4123
4124             if(best_coeff > last_non_zero){
4125                 last_non_zero= best_coeff;
4126                 assert(block[j]);
4127 #ifdef REFINE_STATS
4128 after_last++;
4129 #endif
4130             }else{
4131 #ifdef REFINE_STATS
4132 if(block[j]){
4133     if(block[j] - best_change){
4134         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4135             raise++;
4136         }else{
4137             lower++;
4138         }
4139     }else{
4140         from_zero++;
4141     }
4142 }else{
4143     to_zero++;
4144 }
4145 #endif
4146                 for(; last_non_zero>=start_i; last_non_zero--){
4147                     if(block[perm_scantable[last_non_zero]])
4148                         break;
4149                 }
4150             }
4151 #ifdef REFINE_STATS
4152 count++;
4153 if(256*256*256*64 % count == 0){
4154     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4155 }
4156 #endif
4157             run=0;
4158             rle_index=0;
4159             for(i=start_i; i<=last_non_zero; i++){
4160                 int j= perm_scantable[i];
4161                 const int level= block[j];
4162
4163                  if(level){
4164                      run_tab[rle_index++]=run;
4165                      run=0;
4166                  }else{
4167                      run++;
4168                  }
4169             }
4170
4171             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4172         }else{
4173             break;
4174         }
4175     }
4176 #ifdef REFINE_STATS
4177 if(last_non_zero>0){
4178 STOP_TIMER("iterative search")
4179 }
4180 }
4181 #endif
4182
4183     return last_non_zero;
4184 }
4185
4186 int ff_dct_quantize_c(MpegEncContext *s,
4187                         int16_t *block, int n,
4188                         int qscale, int *overflow)
4189 {
4190     int i, j, level, last_non_zero, q, start_i;
4191     const int *qmat;
4192     const uint8_t *scantable= s->intra_scantable.scantable;
4193     int bias;
4194     int max=0;
4195     unsigned int threshold1, threshold2;
4196
4197     s->fdsp.fdct(block);
4198
4199     if(s->dct_error_sum)
4200         s->denoise_dct(s, block);
4201
4202     if (s->mb_intra) {
4203         if (!s->h263_aic) {
4204             if (n < 4)
4205                 q = s->y_dc_scale;
4206             else
4207                 q = s->c_dc_scale;
4208             q = q << 3;
4209         } else
4210             /* For AIC we skip quant/dequant of INTRADC */
4211             q = 1 << 3;
4212
4213         /* note: block[0] is assumed to be positive */
4214         block[0] = (block[0] + (q >> 1)) / q;
4215         start_i = 1;
4216         last_non_zero = 0;
4217         qmat = s->q_intra_matrix[qscale];
4218         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4219     } else {
4220         start_i = 0;
4221         last_non_zero = -1;
4222         qmat = s->q_inter_matrix[qscale];
4223         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4224     }
4225     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4226     threshold2= (threshold1<<1);
4227     for(i=63;i>=start_i;i--) {
4228         j = scantable[i];
4229         level = block[j] * qmat[j];
4230
4231         if(((unsigned)(level+threshold1))>threshold2){
4232             last_non_zero = i;
4233             break;
4234         }else{
4235             block[j]=0;
4236         }
4237     }
4238     for(i=start_i; i<=last_non_zero; i++) {
4239         j = scantable[i];
4240         level = block[j] * qmat[j];
4241
4242 //        if(   bias+level >= (1<<QMAT_SHIFT)
4243 //           || bias-level >= (1<<QMAT_SHIFT)){
4244         if(((unsigned)(level+threshold1))>threshold2){
4245             if(level>0){
4246                 level= (bias + level)>>QMAT_SHIFT;
4247                 block[j]= level;
4248             }else{
4249                 level= (bias - level)>>QMAT_SHIFT;
4250                 block[j]= -level;
4251             }
4252             max |=level;
4253         }else{
4254             block[j]=0;
4255         }
4256     }
4257     *overflow= s->max_qcoeff < max; //overflow might have happened
4258
4259     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4260     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4261         ff_block_permute(block, s->idsp.idct_permutation,
4262                          scantable, last_non_zero);
4263
4264     return last_non_zero;
4265 }
4266
4267 #define OFFSET(x) offsetof(MpegEncContext, x)
4268 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4269 static const AVOption h263_options[] = {
4270     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4271     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4272     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4273     FF_MPV_COMMON_OPTS
4274     { NULL },
4275 };
4276
4277 static const AVClass h263_class = {
4278     .class_name = "H.263 encoder",
4279     .item_name  = av_default_item_name,
4280     .option     = h263_options,
4281     .version    = LIBAVUTIL_VERSION_INT,
4282 };
4283
4284 AVCodec ff_h263_encoder = {
4285     .name           = "h263",
4286     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4287     .type           = AVMEDIA_TYPE_VIDEO,
4288     .id             = AV_CODEC_ID_H263,
4289     .priv_data_size = sizeof(MpegEncContext),
4290     .init           = ff_mpv_encode_init,
4291     .encode2        = ff_mpv_encode_picture,
4292     .close          = ff_mpv_encode_end,
4293     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4294     .priv_class     = &h263_class,
4295 };
4296
4297 static const AVOption h263p_options[] = {
4298     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4299     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4300     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4301     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4302     FF_MPV_COMMON_OPTS
4303     { NULL },
4304 };
4305 static const AVClass h263p_class = {
4306     .class_name = "H.263p encoder",
4307     .item_name  = av_default_item_name,
4308     .option     = h263p_options,
4309     .version    = LIBAVUTIL_VERSION_INT,
4310 };
4311
4312 AVCodec ff_h263p_encoder = {
4313     .name           = "h263p",
4314     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4315     .type           = AVMEDIA_TYPE_VIDEO,
4316     .id             = AV_CODEC_ID_H263P,
4317     .priv_data_size = sizeof(MpegEncContext),
4318     .init           = ff_mpv_encode_init,
4319     .encode2        = ff_mpv_encode_picture,
4320     .close          = ff_mpv_encode_end,
4321     .capabilities   = CODEC_CAP_SLICE_THREADS,
4322     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4323     .priv_class     = &h263p_class,
4324 };
4325
4326 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4327
4328 AVCodec ff_msmpeg4v2_encoder = {
4329     .name           = "msmpeg4v2",
4330     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4331     .type           = AVMEDIA_TYPE_VIDEO,
4332     .id             = AV_CODEC_ID_MSMPEG4V2,
4333     .priv_data_size = sizeof(MpegEncContext),
4334     .init           = ff_mpv_encode_init,
4335     .encode2        = ff_mpv_encode_picture,
4336     .close          = ff_mpv_encode_end,
4337     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4338     .priv_class     = &msmpeg4v2_class,
4339 };
4340
4341 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4342
4343 AVCodec ff_msmpeg4v3_encoder = {
4344     .name           = "msmpeg4",
4345     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4346     .type           = AVMEDIA_TYPE_VIDEO,
4347     .id             = AV_CODEC_ID_MSMPEG4V3,
4348     .priv_data_size = sizeof(MpegEncContext),
4349     .init           = ff_mpv_encode_init,
4350     .encode2        = ff_mpv_encode_picture,
4351     .close          = ff_mpv_encode_end,
4352     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4353     .priv_class     = &msmpeg4v3_class,
4354 };
4355
4356 FF_MPV_GENERIC_CLASS(wmv1)
4357
4358 AVCodec ff_wmv1_encoder = {
4359     .name           = "wmv1",
4360     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4361     .type           = AVMEDIA_TYPE_VIDEO,
4362     .id             = AV_CODEC_ID_WMV1,
4363     .priv_data_size = sizeof(MpegEncContext),
4364     .init           = ff_mpv_encode_init,
4365     .encode2        = ff_mpv_encode_picture,
4366     .close          = ff_mpv_encode_end,
4367     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4368     .priv_class     = &wmv1_class,
4369 };