]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
msmpeg4: check memory allocations and propagate errors
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92 #if CONFIG_FAANDCT
93             fdsp->fdct == ff_faandct            ||
94 #endif /* CONFIG_FAANDCT */
95             fdsp->fdct == ff_jpeg_fdct_islow_10) {
96             for (i = 0; i < 64; i++) {
97                 const int j = s->idsp.idct_permutation[i];
98                 int64_t den = (int64_t) qscale * quant_matrix[j];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
106             }
107         } else if (fdsp->fdct == ff_fdct_ifast) {
108             for (i = 0; i < 64; i++) {
109                 const int j = s->idsp.idct_permutation[i];
110                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
111                 /* 16 <= qscale * quant_matrix[i] <= 7905
112                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
113                  *             19952 <=              x  <= 249205026
114                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
115                  *           3444240 >= (1 << 36) / (x) >= 275 */
116
117                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
118             }
119         } else {
120             for (i = 0; i < 64; i++) {
121                 const int j = s->idsp.idct_permutation[i];
122                 int64_t den = (int64_t) qscale * quant_matrix[j];
123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
124                  * Assume x = qscale * quant_matrix[i]
125                  * So             16 <=              x  <= 7905
126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
127                  * so          32768 >= (1 << 19) / (x) >= 67 */
128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
129                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
130                 //                    (qscale * quant_matrix[i]);
131                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
132
133                 if (qmat16[qscale][0][i] == 0 ||
134                     qmat16[qscale][0][i] == 128 * 256)
135                     qmat16[qscale][0][i] = 128 * 256 - 1;
136                 qmat16[qscale][1][i] =
137                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
138                                 qmat16[qscale][0][i]);
139             }
140         }
141
142         for (i = intra; i < 64; i++) {
143             int64_t max = 8191;
144             if (fdsp->fdct == ff_fdct_ifast) {
145                 max = (8191LL * ff_aanscales[i]) >> 14;
146             }
147             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
148                 shift++;
149             }
150         }
151     }
152     if (shift) {
153         av_log(NULL, AV_LOG_INFO,
154                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
155                QMAT_SHIFT - shift);
156     }
157 }
158
159 static inline void update_qscale(MpegEncContext *s)
160 {
161     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
162                 (FF_LAMBDA_SHIFT + 7);
163     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
164
165     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
166                  FF_LAMBDA_SHIFT;
167 }
168
169 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
170 {
171     int i;
172
173     if (matrix) {
174         put_bits(pb, 1, 1);
175         for (i = 0; i < 64; i++) {
176             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
177         }
178     } else
179         put_bits(pb, 1, 0);
180 }
181
182 /**
183  * init s->current_picture.qscale_table from s->lambda_table
184  */
185 void ff_init_qscale_tab(MpegEncContext *s)
186 {
187     int8_t * const qscale_table = s->current_picture.qscale_table;
188     int i;
189
190     for (i = 0; i < s->mb_num; i++) {
191         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
192         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
193         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
194                                                   s->avctx->qmax);
195     }
196 }
197
198 static void update_duplicate_context_after_me(MpegEncContext *dst,
199                                               MpegEncContext *src)
200 {
201 #define COPY(a) dst->a= src->a
202     COPY(pict_type);
203     COPY(current_picture);
204     COPY(f_code);
205     COPY(b_code);
206     COPY(qscale);
207     COPY(lambda);
208     COPY(lambda2);
209     COPY(picture_in_gop_number);
210     COPY(gop_picture_number);
211     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
212     COPY(progressive_frame);    // FIXME don't set in encode_header
213     COPY(partitioned_frame);    // FIXME don't set in encode_header
214 #undef COPY
215 }
216
217 /**
218  * Set the given MpegEncContext to defaults for encoding.
219  * the changed fields will not depend upon the prior state of the MpegEncContext.
220  */
221 static void mpv_encode_defaults(MpegEncContext *s)
222 {
223     int i;
224     ff_mpv_common_defaults(s);
225
226     for (i = -16; i < 16; i++) {
227         default_fcode_tab[i + MAX_MV] = 1;
228     }
229     s->me.mv_penalty = default_mv_penalty;
230     s->fcode_tab     = default_fcode_tab;
231
232     s->input_picture_number  = 0;
233     s->picture_in_gop_number = 0;
234 }
235
236 /* init video encoder */
237 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
238 {
239     MpegEncContext *s = avctx->priv_data;
240     int i, ret, format_supported;
241
242     mpv_encode_defaults(s);
243
244     switch (avctx->codec_id) {
245     case AV_CODEC_ID_MPEG2VIDEO:
246         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
247             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
248             av_log(avctx, AV_LOG_ERROR,
249                    "only YUV420 and YUV422 are supported\n");
250             return -1;
251         }
252         break;
253     case AV_CODEC_ID_MJPEG:
254         format_supported = 0;
255         /* JPEG color space */
256         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
257             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
258             (avctx->color_range == AVCOL_RANGE_JPEG &&
259              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
260               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
261             format_supported = 1;
262         /* MPEG color space */
263         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
264                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
265                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
266             format_supported = 1;
267
268         if (!format_supported) {
269             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
270             return -1;
271         }
272         break;
273     default:
274         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
275             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
276             return -1;
277         }
278     }
279
280     switch (avctx->pix_fmt) {
281     case AV_PIX_FMT_YUVJ422P:
282     case AV_PIX_FMT_YUV422P:
283         s->chroma_format = CHROMA_422;
284         break;
285     case AV_PIX_FMT_YUVJ420P:
286     case AV_PIX_FMT_YUV420P:
287     default:
288         s->chroma_format = CHROMA_420;
289         break;
290     }
291
292     s->bit_rate = avctx->bit_rate;
293     s->width    = avctx->width;
294     s->height   = avctx->height;
295     if (avctx->gop_size > 600 &&
296         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
297         av_log(avctx, AV_LOG_ERROR,
298                "Warning keyframe interval too large! reducing it ...\n");
299         avctx->gop_size = 600;
300     }
301     s->gop_size     = avctx->gop_size;
302     s->avctx        = avctx;
303     s->flags        = avctx->flags;
304     s->flags2       = avctx->flags2;
305     if (avctx->max_b_frames > MAX_B_FRAMES) {
306         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
307                "is %d.\n", MAX_B_FRAMES);
308     }
309     s->max_b_frames = avctx->max_b_frames;
310     s->codec_id     = avctx->codec->id;
311     s->strict_std_compliance = avctx->strict_std_compliance;
312     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
313     s->mpeg_quant         = avctx->mpeg_quant;
314     s->rtp_mode           = !!avctx->rtp_payload_size;
315     s->intra_dc_precision = avctx->intra_dc_precision;
316     s->user_specified_pts = AV_NOPTS_VALUE;
317
318     if (s->gop_size <= 1) {
319         s->intra_only = 1;
320         s->gop_size   = 12;
321     } else {
322         s->intra_only = 0;
323     }
324
325     s->me_method = avctx->me_method;
326
327     /* Fixed QSCALE */
328     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
329
330 #if FF_API_MPV_OPT
331     FF_DISABLE_DEPRECATION_WARNINGS
332     if (avctx->border_masking != 0.0)
333         s->border_masking = avctx->border_masking;
334     FF_ENABLE_DEPRECATION_WARNINGS
335 #endif
336
337     s->adaptive_quant = (s->avctx->lumi_masking ||
338                          s->avctx->dark_masking ||
339                          s->avctx->temporal_cplx_masking ||
340                          s->avctx->spatial_cplx_masking  ||
341                          s->avctx->p_masking      ||
342                          s->border_masking ||
343                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
344                         !s->fixed_qscale;
345
346     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
347
348     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
349         av_log(avctx, AV_LOG_ERROR,
350                "a vbv buffer size is needed, "
351                "for encoding with a maximum bitrate\n");
352         return -1;
353     }
354
355     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
356         av_log(avctx, AV_LOG_INFO,
357                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
358     }
359
360     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
361         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
362         return -1;
363     }
364
365     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
366         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
367         return -1;
368     }
369
370     if (avctx->rc_max_rate &&
371         avctx->rc_max_rate == avctx->bit_rate &&
372         avctx->rc_max_rate != avctx->rc_min_rate) {
373         av_log(avctx, AV_LOG_INFO,
374                "impossible bitrate constraints, this will fail\n");
375     }
376
377     if (avctx->rc_buffer_size &&
378         avctx->bit_rate * (int64_t)avctx->time_base.num >
379             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
380         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
381         return -1;
382     }
383
384     if (!s->fixed_qscale &&
385         avctx->bit_rate * av_q2d(avctx->time_base) >
386             avctx->bit_rate_tolerance) {
387         av_log(avctx, AV_LOG_ERROR,
388                "bitrate tolerance too small for bitrate\n");
389         return -1;
390     }
391
392     if (s->avctx->rc_max_rate &&
393         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
394         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
395          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
396         90000LL * (avctx->rc_buffer_size - 1) >
397             s->avctx->rc_max_rate * 0xFFFFLL) {
398         av_log(avctx, AV_LOG_INFO,
399                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
400                "specified vbv buffer is too large for the given bitrate!\n");
401     }
402
403     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
404         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
405         s->codec_id != AV_CODEC_ID_FLV1) {
406         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
407         return -1;
408     }
409
410     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
411         av_log(avctx, AV_LOG_ERROR,
412                "OBMC is only supported with simple mb decision\n");
413         return -1;
414     }
415
416     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
417         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
418         return -1;
419     }
420
421     if (s->max_b_frames                    &&
422         s->codec_id != AV_CODEC_ID_MPEG4      &&
423         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
424         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
425         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
426         return -1;
427     }
428
429     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
430          s->codec_id == AV_CODEC_ID_H263  ||
431          s->codec_id == AV_CODEC_ID_H263P) &&
432         (avctx->sample_aspect_ratio.num > 255 ||
433          avctx->sample_aspect_ratio.den > 255)) {
434         av_log(avctx, AV_LOG_ERROR,
435                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
436                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
437         return -1;
438     }
439
440     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
441         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
442         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
443         return -1;
444     }
445
446     // FIXME mpeg2 uses that too
447     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
448         av_log(avctx, AV_LOG_ERROR,
449                "mpeg2 style quantization not supported by codec\n");
450         return -1;
451     }
452
453     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
454         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
455         return -1;
456     }
457
458     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
459         s->avctx->mb_decision != FF_MB_DECISION_RD) {
460         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
461         return -1;
462     }
463
464     if (s->avctx->scenechange_threshold < 1000000000 &&
465         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
466         av_log(avctx, AV_LOG_ERROR,
467                "closed gop with scene change detection are not supported yet, "
468                "set threshold to 1000000000\n");
469         return -1;
470     }
471
472     if (s->flags & CODEC_FLAG_LOW_DELAY) {
473         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
474             av_log(avctx, AV_LOG_ERROR,
475                   "low delay forcing is only available for mpeg2\n");
476             return -1;
477         }
478         if (s->max_b_frames != 0) {
479             av_log(avctx, AV_LOG_ERROR,
480                    "b frames cannot be used with low delay\n");
481             return -1;
482         }
483     }
484
485     if (s->q_scale_type == 1) {
486         if (avctx->qmax > 12) {
487             av_log(avctx, AV_LOG_ERROR,
488                    "non linear quant only supports qmax <= 12 currently\n");
489             return -1;
490         }
491     }
492
493     if (s->avctx->thread_count > 1         &&
494         s->codec_id != AV_CODEC_ID_MPEG4      &&
495         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
496         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
497         (s->codec_id != AV_CODEC_ID_H263P)) {
498         av_log(avctx, AV_LOG_ERROR,
499                "multi threaded encoding not supported by codec\n");
500         return -1;
501     }
502
503     if (s->avctx->thread_count < 1) {
504         av_log(avctx, AV_LOG_ERROR,
505                "automatic thread number detection not supported by codec,"
506                "patch welcome\n");
507         return -1;
508     }
509
510     if (s->avctx->thread_count > 1)
511         s->rtp_mode = 1;
512
513     if (!avctx->time_base.den || !avctx->time_base.num) {
514         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
515         return -1;
516     }
517
518     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
519         av_log(avctx, AV_LOG_INFO,
520                "notice: b_frame_strategy only affects the first pass\n");
521         avctx->b_frame_strategy = 0;
522     }
523
524     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
525     if (i > 1) {
526         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
527         avctx->time_base.den /= i;
528         avctx->time_base.num /= i;
529         //return -1;
530     }
531
532     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
533         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
534         // (a + x * 3 / 8) / x
535         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
536         s->inter_quant_bias = 0;
537     } else {
538         s->intra_quant_bias = 0;
539         // (a - x / 4) / x
540         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
541     }
542
543     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
544         s->intra_quant_bias = avctx->intra_quant_bias;
545     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
546         s->inter_quant_bias = avctx->inter_quant_bias;
547
548     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
549         s->avctx->time_base.den > (1 << 16) - 1) {
550         av_log(avctx, AV_LOG_ERROR,
551                "timebase %d/%d not supported by MPEG 4 standard, "
552                "the maximum admitted value for the timebase denominator "
553                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
554                (1 << 16) - 1);
555         return -1;
556     }
557     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
558
559     switch (avctx->codec->id) {
560     case AV_CODEC_ID_MPEG1VIDEO:
561         s->out_format = FMT_MPEG1;
562         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
563         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
564         break;
565     case AV_CODEC_ID_MPEG2VIDEO:
566         s->out_format = FMT_MPEG1;
567         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
568         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
569         s->rtp_mode   = 1;
570         break;
571     case AV_CODEC_ID_MJPEG:
572         s->out_format = FMT_MJPEG;
573         s->intra_only = 1; /* force intra only for jpeg */
574         if (!CONFIG_MJPEG_ENCODER ||
575             ff_mjpeg_encode_init(s) < 0)
576             return -1;
577         avctx->delay = 0;
578         s->low_delay = 1;
579         break;
580     case AV_CODEC_ID_H261:
581         if (!CONFIG_H261_ENCODER)
582             return -1;
583         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
584             av_log(avctx, AV_LOG_ERROR,
585                    "The specified picture size of %dx%d is not valid for the "
586                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
587                     s->width, s->height);
588             return -1;
589         }
590         s->out_format = FMT_H261;
591         avctx->delay  = 0;
592         s->low_delay  = 1;
593         s->rtp_mode   = 0; /* Sliced encoding not supported */
594         break;
595     case AV_CODEC_ID_H263:
596         if (!CONFIG_H263_ENCODER)
597         return -1;
598         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
599                              s->width, s->height) == 8) {
600             av_log(avctx, AV_LOG_INFO,
601                    "The specified picture size of %dx%d is not valid for "
602                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
603                    "352x288, 704x576, and 1408x1152."
604                    "Try H.263+.\n", s->width, s->height);
605             return -1;
606         }
607         s->out_format = FMT_H263;
608         avctx->delay  = 0;
609         s->low_delay  = 1;
610         break;
611     case AV_CODEC_ID_H263P:
612         s->out_format = FMT_H263;
613         s->h263_plus  = 1;
614         /* Fx */
615         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
616         s->modified_quant  = s->h263_aic;
617         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
618         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
619
620         /* /Fx */
621         /* These are just to be sure */
622         avctx->delay = 0;
623         s->low_delay = 1;
624         break;
625     case AV_CODEC_ID_FLV1:
626         s->out_format      = FMT_H263;
627         s->h263_flv        = 2; /* format = 1; 11-bit codes */
628         s->unrestricted_mv = 1;
629         s->rtp_mode  = 0; /* don't allow GOB */
630         avctx->delay = 0;
631         s->low_delay = 1;
632         break;
633     case AV_CODEC_ID_RV10:
634         s->out_format = FMT_H263;
635         avctx->delay  = 0;
636         s->low_delay  = 1;
637         break;
638     case AV_CODEC_ID_RV20:
639         s->out_format      = FMT_H263;
640         avctx->delay       = 0;
641         s->low_delay       = 1;
642         s->modified_quant  = 1;
643         s->h263_aic        = 1;
644         s->h263_plus       = 1;
645         s->loop_filter     = 1;
646         s->unrestricted_mv = 0;
647         break;
648     case AV_CODEC_ID_MPEG4:
649         s->out_format      = FMT_H263;
650         s->h263_pred       = 1;
651         s->unrestricted_mv = 1;
652         s->low_delay       = s->max_b_frames ? 0 : 1;
653         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
654         break;
655     case AV_CODEC_ID_MSMPEG4V2:
656         s->out_format      = FMT_H263;
657         s->h263_pred       = 1;
658         s->unrestricted_mv = 1;
659         s->msmpeg4_version = 2;
660         avctx->delay       = 0;
661         s->low_delay       = 1;
662         break;
663     case AV_CODEC_ID_MSMPEG4V3:
664         s->out_format        = FMT_H263;
665         s->h263_pred         = 1;
666         s->unrestricted_mv   = 1;
667         s->msmpeg4_version   = 3;
668         s->flipflop_rounding = 1;
669         avctx->delay         = 0;
670         s->low_delay         = 1;
671         break;
672     case AV_CODEC_ID_WMV1:
673         s->out_format        = FMT_H263;
674         s->h263_pred         = 1;
675         s->unrestricted_mv   = 1;
676         s->msmpeg4_version   = 4;
677         s->flipflop_rounding = 1;
678         avctx->delay         = 0;
679         s->low_delay         = 1;
680         break;
681     case AV_CODEC_ID_WMV2:
682         s->out_format        = FMT_H263;
683         s->h263_pred         = 1;
684         s->unrestricted_mv   = 1;
685         s->msmpeg4_version   = 5;
686         s->flipflop_rounding = 1;
687         avctx->delay         = 0;
688         s->low_delay         = 1;
689         break;
690     default:
691         return -1;
692     }
693
694     avctx->has_b_frames = !s->low_delay;
695
696     s->encoding = 1;
697
698     s->progressive_frame    =
699     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
700                                                 CODEC_FLAG_INTERLACED_ME) ||
701                                 s->alternate_scan);
702
703     /* init */
704     ff_mpv_idct_init(s);
705     if (ff_mpv_common_init(s) < 0)
706         return -1;
707
708     if (ARCH_X86)
709         ff_mpv_encode_init_x86(s);
710
711     ff_fdctdsp_init(&s->fdsp, avctx);
712     ff_me_cmp_init(&s->mecc, avctx);
713     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
714     ff_pixblockdsp_init(&s->pdsp, avctx);
715     ff_qpeldsp_init(&s->qdsp);
716
717     s->avctx->coded_frame = s->current_picture.f;
718
719     if (s->msmpeg4_version) {
720         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
721                           2 * 2 * (MAX_LEVEL + 1) *
722                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
723     }
724     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
725
726     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
730     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
731                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
732     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
733                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
734
735     if (s->avctx->noise_reduction) {
736         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
737                           2 * 64 * sizeof(uint16_t), fail);
738     }
739
740     if (CONFIG_H263_ENCODER)
741         ff_h263dsp_init(&s->h263dsp);
742     if (!s->dct_quantize)
743         s->dct_quantize = ff_dct_quantize_c;
744     if (!s->denoise_dct)
745         s->denoise_dct  = denoise_dct_c;
746     s->fast_dct_quantize = s->dct_quantize;
747     if (avctx->trellis)
748         s->dct_quantize  = dct_quantize_trellis_c;
749
750     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
751         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
752
753     s->quant_precision = 5;
754
755     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
756     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
757
758     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
759         ff_h261_encode_init(s);
760     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
761         ff_h263_encode_init(s);
762     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
763         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
764             return ret;
765     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
766         && s->out_format == FMT_MPEG1)
767         ff_mpeg1_encode_init(s);
768
769     /* init q matrix */
770     for (i = 0; i < 64; i++) {
771         int j = s->idsp.idct_permutation[i];
772         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
773             s->mpeg_quant) {
774             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
775             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
776         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
777             s->intra_matrix[j] =
778             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
779         } else {
780             /* mpeg1/2 */
781             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
782             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
783         }
784         if (s->avctx->intra_matrix)
785             s->intra_matrix[j] = s->avctx->intra_matrix[i];
786         if (s->avctx->inter_matrix)
787             s->inter_matrix[j] = s->avctx->inter_matrix[i];
788     }
789
790     /* precompute matrix */
791     /* for mjpeg, we do include qscale in the matrix */
792     if (s->out_format != FMT_MJPEG) {
793         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
794                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
795                           31, 1);
796         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
797                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
798                           31, 0);
799     }
800
801     if (ff_rate_control_init(s) < 0)
802         return -1;
803
804 #if FF_API_ERROR_RATE
805     FF_DISABLE_DEPRECATION_WARNINGS
806     if (avctx->error_rate)
807         s->error_rate = avctx->error_rate;
808     FF_ENABLE_DEPRECATION_WARNINGS;
809 #endif
810
811 #if FF_API_NORMALIZE_AQP
812     FF_DISABLE_DEPRECATION_WARNINGS
813     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
814         s->mpv_flags |= FF_MPV_FLAG_NAQ;
815     FF_ENABLE_DEPRECATION_WARNINGS;
816 #endif
817
818 #if FF_API_MV0
819     FF_DISABLE_DEPRECATION_WARNINGS
820     if (avctx->flags & CODEC_FLAG_MV0)
821         s->mpv_flags |= FF_MPV_FLAG_MV0;
822     FF_ENABLE_DEPRECATION_WARNINGS
823 #endif
824
825 #if FF_API_MPV_OPT
826     FF_DISABLE_DEPRECATION_WARNINGS
827     if (avctx->rc_qsquish != 0.0)
828         s->rc_qsquish = avctx->rc_qsquish;
829     if (avctx->rc_qmod_amp != 0.0)
830         s->rc_qmod_amp = avctx->rc_qmod_amp;
831     if (avctx->rc_qmod_freq)
832         s->rc_qmod_freq = avctx->rc_qmod_freq;
833     if (avctx->rc_buffer_aggressivity != 1.0)
834         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
835     if (avctx->rc_initial_cplx != 0.0)
836         s->rc_initial_cplx = avctx->rc_initial_cplx;
837     if (avctx->lmin)
838         s->lmin = avctx->lmin;
839     if (avctx->lmax)
840         s->lmax = avctx->lmax;
841
842     if (avctx->rc_eq) {
843         av_freep(&s->rc_eq);
844         s->rc_eq = av_strdup(avctx->rc_eq);
845         if (!s->rc_eq)
846             return AVERROR(ENOMEM);
847     }
848     FF_ENABLE_DEPRECATION_WARNINGS
849 #endif
850
851     if (avctx->b_frame_strategy == 2) {
852         for (i = 0; i < s->max_b_frames + 2; i++) {
853             s->tmp_frames[i] = av_frame_alloc();
854             if (!s->tmp_frames[i])
855                 return AVERROR(ENOMEM);
856
857             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
858             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
859             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
860
861             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
862             if (ret < 0)
863                 return ret;
864         }
865     }
866
867     return 0;
868 fail:
869     ff_mpv_encode_end(avctx);
870     return AVERROR_UNKNOWN;
871 }
872
873 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
874 {
875     MpegEncContext *s = avctx->priv_data;
876     int i;
877
878     ff_rate_control_uninit(s);
879
880     ff_mpv_common_end(s);
881     if (CONFIG_MJPEG_ENCODER &&
882         s->out_format == FMT_MJPEG)
883         ff_mjpeg_encode_close(s);
884
885     av_freep(&avctx->extradata);
886
887     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
888         av_frame_free(&s->tmp_frames[i]);
889
890     ff_free_picture_tables(&s->new_picture);
891     ff_mpeg_unref_picture(s, &s->new_picture);
892
893     av_freep(&s->avctx->stats_out);
894     av_freep(&s->ac_stats);
895
896     av_freep(&s->q_intra_matrix);
897     av_freep(&s->q_inter_matrix);
898     av_freep(&s->q_intra_matrix16);
899     av_freep(&s->q_inter_matrix16);
900     av_freep(&s->input_picture);
901     av_freep(&s->reordered_input_picture);
902     av_freep(&s->dct_offset);
903
904     return 0;
905 }
906
907 static int get_sae(uint8_t *src, int ref, int stride)
908 {
909     int x,y;
910     int acc = 0;
911
912     for (y = 0; y < 16; y++) {
913         for (x = 0; x < 16; x++) {
914             acc += FFABS(src[x + y * stride] - ref);
915         }
916     }
917
918     return acc;
919 }
920
921 static int get_intra_count(MpegEncContext *s, uint8_t *src,
922                            uint8_t *ref, int stride)
923 {
924     int x, y, w, h;
925     int acc = 0;
926
927     w = s->width  & ~15;
928     h = s->height & ~15;
929
930     for (y = 0; y < h; y += 16) {
931         for (x = 0; x < w; x += 16) {
932             int offset = x + y * stride;
933             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
934                                       stride, 16);
935             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
936             int sae  = get_sae(src + offset, mean, stride);
937
938             acc += sae + 500 < sad;
939         }
940     }
941     return acc;
942 }
943
944
945 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
946 {
947     Picture *pic = NULL;
948     int64_t pts;
949     int i, display_picture_number = 0, ret;
950     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
951                                                  (s->low_delay ? 0 : 1);
952     int direct = 1;
953
954     if (pic_arg) {
955         pts = pic_arg->pts;
956         display_picture_number = s->input_picture_number++;
957
958         if (pts != AV_NOPTS_VALUE) {
959             if (s->user_specified_pts != AV_NOPTS_VALUE) {
960                 int64_t time = pts;
961                 int64_t last = s->user_specified_pts;
962
963                 if (time <= last) {
964                     av_log(s->avctx, AV_LOG_ERROR,
965                            "Error, Invalid timestamp=%"PRId64", "
966                            "last=%"PRId64"\n", pts, s->user_specified_pts);
967                     return -1;
968                 }
969
970                 if (!s->low_delay && display_picture_number == 1)
971                     s->dts_delta = time - last;
972             }
973             s->user_specified_pts = pts;
974         } else {
975             if (s->user_specified_pts != AV_NOPTS_VALUE) {
976                 s->user_specified_pts =
977                 pts = s->user_specified_pts + 1;
978                 av_log(s->avctx, AV_LOG_INFO,
979                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
980                        pts);
981             } else {
982                 pts = display_picture_number;
983             }
984         }
985     }
986
987     if (pic_arg) {
988         if (!pic_arg->buf[0] ||
989             pic_arg->linesize[0] != s->linesize ||
990             pic_arg->linesize[1] != s->uvlinesize ||
991             pic_arg->linesize[2] != s->uvlinesize)
992             direct = 0;
993
994         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
995                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
996
997         i = ff_find_unused_picture(s, direct);
998         if (i < 0)
999             return i;
1000
1001         pic = &s->picture[i];
1002         pic->reference = 3;
1003
1004         if (direct) {
1005             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1006                 return ret;
1007             if (ff_alloc_picture(s, pic, 1) < 0) {
1008                 return -1;
1009             }
1010         } else {
1011             if (ff_alloc_picture(s, pic, 0) < 0) {
1012                 return -1;
1013             }
1014
1015             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1016                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1017                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1018                 // empty
1019             } else {
1020                 int h_chroma_shift, v_chroma_shift;
1021                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1022                                                  &h_chroma_shift,
1023                                                  &v_chroma_shift);
1024
1025                 for (i = 0; i < 3; i++) {
1026                     int src_stride = pic_arg->linesize[i];
1027                     int dst_stride = i ? s->uvlinesize : s->linesize;
1028                     int h_shift = i ? h_chroma_shift : 0;
1029                     int v_shift = i ? v_chroma_shift : 0;
1030                     int w = s->width  >> h_shift;
1031                     int h = s->height >> v_shift;
1032                     uint8_t *src = pic_arg->data[i];
1033                     uint8_t *dst = pic->f->data[i];
1034
1035                     if (!s->avctx->rc_buffer_size)
1036                         dst += INPLACE_OFFSET;
1037
1038                     if (src_stride == dst_stride)
1039                         memcpy(dst, src, src_stride * h);
1040                     else {
1041                         while (h--) {
1042                             memcpy(dst, src, w);
1043                             dst += dst_stride;
1044                             src += src_stride;
1045                         }
1046                     }
1047                 }
1048             }
1049         }
1050         ret = av_frame_copy_props(pic->f, pic_arg);
1051         if (ret < 0)
1052             return ret;
1053
1054         pic->f->display_picture_number = display_picture_number;
1055         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1056     }
1057
1058     /* shift buffer entries */
1059     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1060         s->input_picture[i - 1] = s->input_picture[i];
1061
1062     s->input_picture[encoding_delay] = (Picture*) pic;
1063
1064     return 0;
1065 }
1066
1067 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1068 {
1069     int x, y, plane;
1070     int score = 0;
1071     int64_t score64 = 0;
1072
1073     for (plane = 0; plane < 3; plane++) {
1074         const int stride = p->f->linesize[plane];
1075         const int bw = plane ? 1 : 2;
1076         for (y = 0; y < s->mb_height * bw; y++) {
1077             for (x = 0; x < s->mb_width * bw; x++) {
1078                 int off = p->shared ? 0 : 16;
1079                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1080                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1081                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1082
1083                 switch (s->avctx->frame_skip_exp) {
1084                 case 0: score    =  FFMAX(score, v);          break;
1085                 case 1: score   += FFABS(v);                  break;
1086                 case 2: score   += v * v;                     break;
1087                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1088                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1089                 }
1090             }
1091         }
1092     }
1093
1094     if (score)
1095         score64 = score;
1096
1097     if (score64 < s->avctx->frame_skip_threshold)
1098         return 1;
1099     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1100         return 1;
1101     return 0;
1102 }
1103
1104 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1105 {
1106     AVPacket pkt = { 0 };
1107     int ret, got_output;
1108
1109     av_init_packet(&pkt);
1110     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1111     if (ret < 0)
1112         return ret;
1113
1114     ret = pkt.size;
1115     av_free_packet(&pkt);
1116     return ret;
1117 }
1118
1119 static int estimate_best_b_count(MpegEncContext *s)
1120 {
1121     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1122     AVCodecContext *c = avcodec_alloc_context3(NULL);
1123     const int scale = s->avctx->brd_scale;
1124     int i, j, out_size, p_lambda, b_lambda, lambda2;
1125     int64_t best_rd  = INT64_MAX;
1126     int best_b_count = -1;
1127
1128     assert(scale >= 0 && scale <= 3);
1129
1130     //emms_c();
1131     //s->next_picture_ptr->quality;
1132     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1133     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1134     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1135     if (!b_lambda) // FIXME we should do this somewhere else
1136         b_lambda = p_lambda;
1137     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1138                FF_LAMBDA_SHIFT;
1139
1140     c->width        = s->width  >> scale;
1141     c->height       = s->height >> scale;
1142     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1143     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1144     c->mb_decision  = s->avctx->mb_decision;
1145     c->me_cmp       = s->avctx->me_cmp;
1146     c->mb_cmp       = s->avctx->mb_cmp;
1147     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1148     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1149     c->time_base    = s->avctx->time_base;
1150     c->max_b_frames = s->max_b_frames;
1151
1152     if (avcodec_open2(c, codec, NULL) < 0)
1153         return -1;
1154
1155     for (i = 0; i < s->max_b_frames + 2; i++) {
1156         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1157                                                 s->next_picture_ptr;
1158
1159         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1160             pre_input = *pre_input_ptr;
1161
1162             if (!pre_input.shared && i) {
1163                 pre_input.f->data[0] += INPLACE_OFFSET;
1164                 pre_input.f->data[1] += INPLACE_OFFSET;
1165                 pre_input.f->data[2] += INPLACE_OFFSET;
1166             }
1167
1168             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1169                                        s->tmp_frames[i]->linesize[0],
1170                                        pre_input.f->data[0],
1171                                        pre_input.f->linesize[0],
1172                                        c->width, c->height);
1173             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1174                                        s->tmp_frames[i]->linesize[1],
1175                                        pre_input.f->data[1],
1176                                        pre_input.f->linesize[1],
1177                                        c->width >> 1, c->height >> 1);
1178             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1179                                        s->tmp_frames[i]->linesize[2],
1180                                        pre_input.f->data[2],
1181                                        pre_input.f->linesize[2],
1182                                        c->width >> 1, c->height >> 1);
1183         }
1184     }
1185
1186     for (j = 0; j < s->max_b_frames + 1; j++) {
1187         int64_t rd = 0;
1188
1189         if (!s->input_picture[j])
1190             break;
1191
1192         c->error[0] = c->error[1] = c->error[2] = 0;
1193
1194         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1195         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1196
1197         out_size = encode_frame(c, s->tmp_frames[0]);
1198
1199         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1200
1201         for (i = 0; i < s->max_b_frames + 1; i++) {
1202             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1203
1204             s->tmp_frames[i + 1]->pict_type = is_p ?
1205                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1206             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1207
1208             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1209
1210             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1211         }
1212
1213         /* get the delayed frames */
1214         while (out_size) {
1215             out_size = encode_frame(c, NULL);
1216             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1217         }
1218
1219         rd += c->error[0] + c->error[1] + c->error[2];
1220
1221         if (rd < best_rd) {
1222             best_rd = rd;
1223             best_b_count = j;
1224         }
1225     }
1226
1227     avcodec_close(c);
1228     av_freep(&c);
1229
1230     return best_b_count;
1231 }
1232
1233 static int select_input_picture(MpegEncContext *s)
1234 {
1235     int i, ret;
1236
1237     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1238         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1239     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1240
1241     /* set next picture type & ordering */
1242     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1243         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1244             !s->next_picture_ptr || s->intra_only) {
1245             s->reordered_input_picture[0] = s->input_picture[0];
1246             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1247             s->reordered_input_picture[0]->f->coded_picture_number =
1248                 s->coded_picture_number++;
1249         } else {
1250             int b_frames;
1251
1252             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1253                 if (s->picture_in_gop_number < s->gop_size &&
1254                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1255                     // FIXME check that te gop check above is +-1 correct
1256                     av_frame_unref(s->input_picture[0]->f);
1257
1258                     emms_c();
1259                     ff_vbv_update(s, 0);
1260
1261                     goto no_output_pic;
1262                 }
1263             }
1264
1265             if (s->flags & CODEC_FLAG_PASS2) {
1266                 for (i = 0; i < s->max_b_frames + 1; i++) {
1267                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1268
1269                     if (pict_num >= s->rc_context.num_entries)
1270                         break;
1271                     if (!s->input_picture[i]) {
1272                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1273                         break;
1274                     }
1275
1276                     s->input_picture[i]->f->pict_type =
1277                         s->rc_context.entry[pict_num].new_pict_type;
1278                 }
1279             }
1280
1281             if (s->avctx->b_frame_strategy == 0) {
1282                 b_frames = s->max_b_frames;
1283                 while (b_frames && !s->input_picture[b_frames])
1284                     b_frames--;
1285             } else if (s->avctx->b_frame_strategy == 1) {
1286                 for (i = 1; i < s->max_b_frames + 1; i++) {
1287                     if (s->input_picture[i] &&
1288                         s->input_picture[i]->b_frame_score == 0) {
1289                         s->input_picture[i]->b_frame_score =
1290                             get_intra_count(s,
1291                                             s->input_picture[i    ]->f->data[0],
1292                                             s->input_picture[i - 1]->f->data[0],
1293                                             s->linesize) + 1;
1294                     }
1295                 }
1296                 for (i = 0; i < s->max_b_frames + 1; i++) {
1297                     if (!s->input_picture[i] ||
1298                         s->input_picture[i]->b_frame_score - 1 >
1299                             s->mb_num / s->avctx->b_sensitivity)
1300                         break;
1301                 }
1302
1303                 b_frames = FFMAX(0, i - 1);
1304
1305                 /* reset scores */
1306                 for (i = 0; i < b_frames + 1; i++) {
1307                     s->input_picture[i]->b_frame_score = 0;
1308                 }
1309             } else if (s->avctx->b_frame_strategy == 2) {
1310                 b_frames = estimate_best_b_count(s);
1311             } else {
1312                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1313                 b_frames = 0;
1314             }
1315
1316             emms_c();
1317
1318             for (i = b_frames - 1; i >= 0; i--) {
1319                 int type = s->input_picture[i]->f->pict_type;
1320                 if (type && type != AV_PICTURE_TYPE_B)
1321                     b_frames = i;
1322             }
1323             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1324                 b_frames == s->max_b_frames) {
1325                 av_log(s->avctx, AV_LOG_ERROR,
1326                        "warning, too many b frames in a row\n");
1327             }
1328
1329             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1330                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1331                     s->gop_size > s->picture_in_gop_number) {
1332                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1333                 } else {
1334                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1335                         b_frames = 0;
1336                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1337                 }
1338             }
1339
1340             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1341                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1342                 b_frames--;
1343
1344             s->reordered_input_picture[0] = s->input_picture[b_frames];
1345             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1346                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1347             s->reordered_input_picture[0]->f->coded_picture_number =
1348                 s->coded_picture_number++;
1349             for (i = 0; i < b_frames; i++) {
1350                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1351                 s->reordered_input_picture[i + 1]->f->pict_type =
1352                     AV_PICTURE_TYPE_B;
1353                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1354                     s->coded_picture_number++;
1355             }
1356         }
1357     }
1358 no_output_pic:
1359     if (s->reordered_input_picture[0]) {
1360         s->reordered_input_picture[0]->reference =
1361            s->reordered_input_picture[0]->f->pict_type !=
1362                AV_PICTURE_TYPE_B ? 3 : 0;
1363
1364         ff_mpeg_unref_picture(s, &s->new_picture);
1365         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1366             return ret;
1367
1368         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1369             // input is a shared pix, so we can't modifiy it -> alloc a new
1370             // one & ensure that the shared one is reuseable
1371
1372             Picture *pic;
1373             int i = ff_find_unused_picture(s, 0);
1374             if (i < 0)
1375                 return i;
1376             pic = &s->picture[i];
1377
1378             pic->reference = s->reordered_input_picture[0]->reference;
1379             if (ff_alloc_picture(s, pic, 0) < 0) {
1380                 return -1;
1381             }
1382
1383             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1384             if (ret < 0)
1385                 return ret;
1386
1387             /* mark us unused / free shared pic */
1388             av_frame_unref(s->reordered_input_picture[0]->f);
1389             s->reordered_input_picture[0]->shared = 0;
1390
1391             s->current_picture_ptr = pic;
1392         } else {
1393             // input is not a shared pix -> reuse buffer for current_pix
1394             s->current_picture_ptr = s->reordered_input_picture[0];
1395             for (i = 0; i < 4; i++) {
1396                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1397             }
1398         }
1399         ff_mpeg_unref_picture(s, &s->current_picture);
1400         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1401                                        s->current_picture_ptr)) < 0)
1402             return ret;
1403
1404         s->picture_number = s->new_picture.f->display_picture_number;
1405     } else {
1406         ff_mpeg_unref_picture(s, &s->new_picture);
1407     }
1408     return 0;
1409 }
1410
1411 static void frame_end(MpegEncContext *s)
1412 {
1413     int i;
1414
1415     if (s->unrestricted_mv &&
1416         s->current_picture.reference &&
1417         !s->intra_only) {
1418         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1419         int hshift = desc->log2_chroma_w;
1420         int vshift = desc->log2_chroma_h;
1421         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1422                                 s->h_edge_pos, s->v_edge_pos,
1423                                 EDGE_WIDTH, EDGE_WIDTH,
1424                                 EDGE_TOP | EDGE_BOTTOM);
1425         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1426                                 s->h_edge_pos >> hshift,
1427                                 s->v_edge_pos >> vshift,
1428                                 EDGE_WIDTH >> hshift,
1429                                 EDGE_WIDTH >> vshift,
1430                                 EDGE_TOP | EDGE_BOTTOM);
1431         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1432                                 s->h_edge_pos >> hshift,
1433                                 s->v_edge_pos >> vshift,
1434                                 EDGE_WIDTH >> hshift,
1435                                 EDGE_WIDTH >> vshift,
1436                                 EDGE_TOP | EDGE_BOTTOM);
1437     }
1438
1439     emms_c();
1440
1441     s->last_pict_type                 = s->pict_type;
1442     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1443     if (s->pict_type!= AV_PICTURE_TYPE_B)
1444         s->last_non_b_pict_type = s->pict_type;
1445
1446     if (s->encoding) {
1447         /* release non-reference frames */
1448         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1449             if (!s->picture[i].reference)
1450                 ff_mpeg_unref_picture(s, &s->picture[i]);
1451         }
1452     }
1453
1454     s->avctx->coded_frame = s->current_picture_ptr->f;
1455
1456 }
1457
1458 static void update_noise_reduction(MpegEncContext *s)
1459 {
1460     int intra, i;
1461
1462     for (intra = 0; intra < 2; intra++) {
1463         if (s->dct_count[intra] > (1 << 16)) {
1464             for (i = 0; i < 64; i++) {
1465                 s->dct_error_sum[intra][i] >>= 1;
1466             }
1467             s->dct_count[intra] >>= 1;
1468         }
1469
1470         for (i = 0; i < 64; i++) {
1471             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1472                                        s->dct_count[intra] +
1473                                        s->dct_error_sum[intra][i] / 2) /
1474                                       (s->dct_error_sum[intra][i] + 1);
1475         }
1476     }
1477 }
1478
1479 static int frame_start(MpegEncContext *s)
1480 {
1481     int ret;
1482
1483     /* mark & release old frames */
1484     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1485         s->last_picture_ptr != s->next_picture_ptr &&
1486         s->last_picture_ptr->f->buf[0]) {
1487         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1488     }
1489
1490     s->current_picture_ptr->f->pict_type = s->pict_type;
1491     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1492
1493     ff_mpeg_unref_picture(s, &s->current_picture);
1494     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1495                                    s->current_picture_ptr)) < 0)
1496         return ret;
1497
1498     if (s->pict_type != AV_PICTURE_TYPE_B) {
1499         s->last_picture_ptr = s->next_picture_ptr;
1500         if (!s->droppable)
1501             s->next_picture_ptr = s->current_picture_ptr;
1502     }
1503
1504     if (s->last_picture_ptr) {
1505         ff_mpeg_unref_picture(s, &s->last_picture);
1506         if (s->last_picture_ptr->f->buf[0] &&
1507             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1508                                        s->last_picture_ptr)) < 0)
1509             return ret;
1510     }
1511     if (s->next_picture_ptr) {
1512         ff_mpeg_unref_picture(s, &s->next_picture);
1513         if (s->next_picture_ptr->f->buf[0] &&
1514             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1515                                        s->next_picture_ptr)) < 0)
1516             return ret;
1517     }
1518
1519     if (s->picture_structure!= PICT_FRAME) {
1520         int i;
1521         for (i = 0; i < 4; i++) {
1522             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1523                 s->current_picture.f->data[i] +=
1524                     s->current_picture.f->linesize[i];
1525             }
1526             s->current_picture.f->linesize[i] *= 2;
1527             s->last_picture.f->linesize[i]    *= 2;
1528             s->next_picture.f->linesize[i]    *= 2;
1529         }
1530     }
1531
1532     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1533         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1534         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1535     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1536         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1537         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1538     } else {
1539         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1540         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1541     }
1542
1543     if (s->dct_error_sum) {
1544         assert(s->avctx->noise_reduction && s->encoding);
1545         update_noise_reduction(s);
1546     }
1547
1548     return 0;
1549 }
1550
1551 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1552                           const AVFrame *pic_arg, int *got_packet)
1553 {
1554     MpegEncContext *s = avctx->priv_data;
1555     int i, stuffing_count, ret;
1556     int context_count = s->slice_context_count;
1557
1558     s->picture_in_gop_number++;
1559
1560     if (load_input_picture(s, pic_arg) < 0)
1561         return -1;
1562
1563     if (select_input_picture(s) < 0) {
1564         return -1;
1565     }
1566
1567     /* output? */
1568     if (s->new_picture.f->data[0]) {
1569         if (!pkt->data &&
1570             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1571             return ret;
1572         if (s->mb_info) {
1573             s->mb_info_ptr = av_packet_new_side_data(pkt,
1574                                  AV_PKT_DATA_H263_MB_INFO,
1575                                  s->mb_width*s->mb_height*12);
1576             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1577         }
1578
1579         for (i = 0; i < context_count; i++) {
1580             int start_y = s->thread_context[i]->start_mb_y;
1581             int   end_y = s->thread_context[i]->  end_mb_y;
1582             int h       = s->mb_height;
1583             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1584             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1585
1586             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1587         }
1588
1589         s->pict_type = s->new_picture.f->pict_type;
1590         //emms_c();
1591         ret = frame_start(s);
1592         if (ret < 0)
1593             return ret;
1594 vbv_retry:
1595         if (encode_picture(s, s->picture_number) < 0)
1596             return -1;
1597
1598         avctx->header_bits = s->header_bits;
1599         avctx->mv_bits     = s->mv_bits;
1600         avctx->misc_bits   = s->misc_bits;
1601         avctx->i_tex_bits  = s->i_tex_bits;
1602         avctx->p_tex_bits  = s->p_tex_bits;
1603         avctx->i_count     = s->i_count;
1604         // FIXME f/b_count in avctx
1605         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1606         avctx->skip_count  = s->skip_count;
1607
1608         frame_end(s);
1609
1610         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1611             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1612
1613         if (avctx->rc_buffer_size) {
1614             RateControlContext *rcc = &s->rc_context;
1615             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1616
1617             if (put_bits_count(&s->pb) > max_size &&
1618                 s->lambda < s->lmax) {
1619                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1620                                        (s->qscale + 1) / s->qscale);
1621                 if (s->adaptive_quant) {
1622                     int i;
1623                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1624                         s->lambda_table[i] =
1625                             FFMAX(s->lambda_table[i] + 1,
1626                                   s->lambda_table[i] * (s->qscale + 1) /
1627                                   s->qscale);
1628                 }
1629                 s->mb_skipped = 0;        // done in frame_start()
1630                 // done in encode_picture() so we must undo it
1631                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1632                     if (s->flipflop_rounding          ||
1633                         s->codec_id == AV_CODEC_ID_H263P ||
1634                         s->codec_id == AV_CODEC_ID_MPEG4)
1635                         s->no_rounding ^= 1;
1636                 }
1637                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1638                     s->time_base       = s->last_time_base;
1639                     s->last_non_b_time = s->time - s->pp_time;
1640                 }
1641                 for (i = 0; i < context_count; i++) {
1642                     PutBitContext *pb = &s->thread_context[i]->pb;
1643                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1644                 }
1645                 goto vbv_retry;
1646             }
1647
1648             assert(s->avctx->rc_max_rate);
1649         }
1650
1651         if (s->flags & CODEC_FLAG_PASS1)
1652             ff_write_pass1_stats(s);
1653
1654         for (i = 0; i < 4; i++) {
1655             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1656             avctx->error[i] += s->current_picture_ptr->f->error[i];
1657         }
1658
1659         if (s->flags & CODEC_FLAG_PASS1)
1660             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1661                    avctx->i_tex_bits + avctx->p_tex_bits ==
1662                        put_bits_count(&s->pb));
1663         flush_put_bits(&s->pb);
1664         s->frame_bits  = put_bits_count(&s->pb);
1665
1666         stuffing_count = ff_vbv_update(s, s->frame_bits);
1667         if (stuffing_count) {
1668             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1669                     stuffing_count + 50) {
1670                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1671                 return -1;
1672             }
1673
1674             switch (s->codec_id) {
1675             case AV_CODEC_ID_MPEG1VIDEO:
1676             case AV_CODEC_ID_MPEG2VIDEO:
1677                 while (stuffing_count--) {
1678                     put_bits(&s->pb, 8, 0);
1679                 }
1680             break;
1681             case AV_CODEC_ID_MPEG4:
1682                 put_bits(&s->pb, 16, 0);
1683                 put_bits(&s->pb, 16, 0x1C3);
1684                 stuffing_count -= 4;
1685                 while (stuffing_count--) {
1686                     put_bits(&s->pb, 8, 0xFF);
1687                 }
1688             break;
1689             default:
1690                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1691             }
1692             flush_put_bits(&s->pb);
1693             s->frame_bits  = put_bits_count(&s->pb);
1694         }
1695
1696         /* update mpeg1/2 vbv_delay for CBR */
1697         if (s->avctx->rc_max_rate                          &&
1698             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1699             s->out_format == FMT_MPEG1                     &&
1700             90000LL * (avctx->rc_buffer_size - 1) <=
1701                 s->avctx->rc_max_rate * 0xFFFFLL) {
1702             int vbv_delay, min_delay;
1703             double inbits  = s->avctx->rc_max_rate *
1704                              av_q2d(s->avctx->time_base);
1705             int    minbits = s->frame_bits - 8 *
1706                              (s->vbv_delay_ptr - s->pb.buf - 1);
1707             double bits    = s->rc_context.buffer_index + minbits - inbits;
1708
1709             if (bits < 0)
1710                 av_log(s->avctx, AV_LOG_ERROR,
1711                        "Internal error, negative bits\n");
1712
1713             assert(s->repeat_first_field == 0);
1714
1715             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1716             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1717                         s->avctx->rc_max_rate;
1718
1719             vbv_delay = FFMAX(vbv_delay, min_delay);
1720
1721             assert(vbv_delay < 0xFFFF);
1722
1723             s->vbv_delay_ptr[0] &= 0xF8;
1724             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1725             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1726             s->vbv_delay_ptr[2] &= 0x07;
1727             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1728             avctx->vbv_delay     = vbv_delay * 300;
1729         }
1730         s->total_bits     += s->frame_bits;
1731         avctx->frame_bits  = s->frame_bits;
1732
1733         pkt->pts = s->current_picture.f->pts;
1734         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1735             if (!s->current_picture.f->coded_picture_number)
1736                 pkt->dts = pkt->pts - s->dts_delta;
1737             else
1738                 pkt->dts = s->reordered_pts;
1739             s->reordered_pts = pkt->pts;
1740         } else
1741             pkt->dts = pkt->pts;
1742         if (s->current_picture.f->key_frame)
1743             pkt->flags |= AV_PKT_FLAG_KEY;
1744         if (s->mb_info)
1745             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1746     } else {
1747         s->frame_bits = 0;
1748     }
1749     assert((s->frame_bits & 7) == 0);
1750
1751     pkt->size = s->frame_bits / 8;
1752     *got_packet = !!pkt->size;
1753     return 0;
1754 }
1755
1756 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1757                                                 int n, int threshold)
1758 {
1759     static const char tab[64] = {
1760         3, 2, 2, 1, 1, 1, 1, 1,
1761         1, 1, 1, 1, 1, 1, 1, 1,
1762         1, 1, 1, 1, 1, 1, 1, 1,
1763         0, 0, 0, 0, 0, 0, 0, 0,
1764         0, 0, 0, 0, 0, 0, 0, 0,
1765         0, 0, 0, 0, 0, 0, 0, 0,
1766         0, 0, 0, 0, 0, 0, 0, 0,
1767         0, 0, 0, 0, 0, 0, 0, 0
1768     };
1769     int score = 0;
1770     int run = 0;
1771     int i;
1772     int16_t *block = s->block[n];
1773     const int last_index = s->block_last_index[n];
1774     int skip_dc;
1775
1776     if (threshold < 0) {
1777         skip_dc = 0;
1778         threshold = -threshold;
1779     } else
1780         skip_dc = 1;
1781
1782     /* Are all we could set to zero already zero? */
1783     if (last_index <= skip_dc - 1)
1784         return;
1785
1786     for (i = 0; i <= last_index; i++) {
1787         const int j = s->intra_scantable.permutated[i];
1788         const int level = FFABS(block[j]);
1789         if (level == 1) {
1790             if (skip_dc && i == 0)
1791                 continue;
1792             score += tab[run];
1793             run = 0;
1794         } else if (level > 1) {
1795             return;
1796         } else {
1797             run++;
1798         }
1799     }
1800     if (score >= threshold)
1801         return;
1802     for (i = skip_dc; i <= last_index; i++) {
1803         const int j = s->intra_scantable.permutated[i];
1804         block[j] = 0;
1805     }
1806     if (block[0])
1807         s->block_last_index[n] = 0;
1808     else
1809         s->block_last_index[n] = -1;
1810 }
1811
1812 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1813                                int last_index)
1814 {
1815     int i;
1816     const int maxlevel = s->max_qcoeff;
1817     const int minlevel = s->min_qcoeff;
1818     int overflow = 0;
1819
1820     if (s->mb_intra) {
1821         i = 1; // skip clipping of intra dc
1822     } else
1823         i = 0;
1824
1825     for (; i <= last_index; i++) {
1826         const int j = s->intra_scantable.permutated[i];
1827         int level = block[j];
1828
1829         if (level > maxlevel) {
1830             level = maxlevel;
1831             overflow++;
1832         } else if (level < minlevel) {
1833             level = minlevel;
1834             overflow++;
1835         }
1836
1837         block[j] = level;
1838     }
1839
1840     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1841         av_log(s->avctx, AV_LOG_INFO,
1842                "warning, clipping %d dct coefficients to %d..%d\n",
1843                overflow, minlevel, maxlevel);
1844 }
1845
1846 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1847 {
1848     int x, y;
1849     // FIXME optimize
1850     for (y = 0; y < 8; y++) {
1851         for (x = 0; x < 8; x++) {
1852             int x2, y2;
1853             int sum = 0;
1854             int sqr = 0;
1855             int count = 0;
1856
1857             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1858                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1859                     int v = ptr[x2 + y2 * stride];
1860                     sum += v;
1861                     sqr += v * v;
1862                     count++;
1863                 }
1864             }
1865             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1866         }
1867     }
1868 }
1869
1870 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1871                                                 int motion_x, int motion_y,
1872                                                 int mb_block_height,
1873                                                 int mb_block_count)
1874 {
1875     int16_t weight[8][64];
1876     int16_t orig[8][64];
1877     const int mb_x = s->mb_x;
1878     const int mb_y = s->mb_y;
1879     int i;
1880     int skip_dct[8];
1881     int dct_offset = s->linesize * 8; // default for progressive frames
1882     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1883     ptrdiff_t wrap_y, wrap_c;
1884
1885     for (i = 0; i < mb_block_count; i++)
1886         skip_dct[i] = s->skipdct;
1887
1888     if (s->adaptive_quant) {
1889         const int last_qp = s->qscale;
1890         const int mb_xy = mb_x + mb_y * s->mb_stride;
1891
1892         s->lambda = s->lambda_table[mb_xy];
1893         update_qscale(s);
1894
1895         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1896             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1897             s->dquant = s->qscale - last_qp;
1898
1899             if (s->out_format == FMT_H263) {
1900                 s->dquant = av_clip(s->dquant, -2, 2);
1901
1902                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1903                     if (!s->mb_intra) {
1904                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1905                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1906                                 s->dquant = 0;
1907                         }
1908                         if (s->mv_type == MV_TYPE_8X8)
1909                             s->dquant = 0;
1910                     }
1911                 }
1912             }
1913         }
1914         ff_set_qscale(s, last_qp + s->dquant);
1915     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1916         ff_set_qscale(s, s->qscale + s->dquant);
1917
1918     wrap_y = s->linesize;
1919     wrap_c = s->uvlinesize;
1920     ptr_y  = s->new_picture.f->data[0] +
1921              (mb_y * 16 * wrap_y)              + mb_x * 16;
1922     ptr_cb = s->new_picture.f->data[1] +
1923              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1924     ptr_cr = s->new_picture.f->data[2] +
1925              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1926
1927     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1928         uint8_t *ebuf = s->edge_emu_buffer + 32;
1929         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1930                                  wrap_y, wrap_y,
1931                                  16, 16, mb_x * 16, mb_y * 16,
1932                                  s->width, s->height);
1933         ptr_y = ebuf;
1934         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1935                                  wrap_c, wrap_c,
1936                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1937                                  s->width >> 1, s->height >> 1);
1938         ptr_cb = ebuf + 18 * wrap_y;
1939         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1940                                  wrap_c, wrap_c,
1941                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1942                                  s->width >> 1, s->height >> 1);
1943         ptr_cr = ebuf + 18 * wrap_y + 8;
1944     }
1945
1946     if (s->mb_intra) {
1947         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1948             int progressive_score, interlaced_score;
1949
1950             s->interlaced_dct = 0;
1951             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1952                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1953                                                      NULL, wrap_y, 8) - 400;
1954
1955             if (progressive_score > 0) {
1956                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1957                                                         NULL, wrap_y * 2, 8) +
1958                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1959                                                         NULL, wrap_y * 2, 8);
1960                 if (progressive_score > interlaced_score) {
1961                     s->interlaced_dct = 1;
1962
1963                     dct_offset = wrap_y;
1964                     wrap_y <<= 1;
1965                     if (s->chroma_format == CHROMA_422)
1966                         wrap_c <<= 1;
1967                 }
1968             }
1969         }
1970
1971         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1972         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1973         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1974         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1975
1976         if (s->flags & CODEC_FLAG_GRAY) {
1977             skip_dct[4] = 1;
1978             skip_dct[5] = 1;
1979         } else {
1980             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1981             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1982             if (!s->chroma_y_shift) { /* 422 */
1983                 s->pdsp.get_pixels(s->block[6],
1984                                    ptr_cb + (dct_offset >> 1), wrap_c);
1985                 s->pdsp.get_pixels(s->block[7],
1986                                    ptr_cr + (dct_offset >> 1), wrap_c);
1987             }
1988         }
1989     } else {
1990         op_pixels_func (*op_pix)[4];
1991         qpel_mc_func (*op_qpix)[16];
1992         uint8_t *dest_y, *dest_cb, *dest_cr;
1993
1994         dest_y  = s->dest[0];
1995         dest_cb = s->dest[1];
1996         dest_cr = s->dest[2];
1997
1998         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1999             op_pix  = s->hdsp.put_pixels_tab;
2000             op_qpix = s->qdsp.put_qpel_pixels_tab;
2001         } else {
2002             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2003             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2004         }
2005
2006         if (s->mv_dir & MV_DIR_FORWARD) {
2007             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2008                           s->last_picture.f->data,
2009                           op_pix, op_qpix);
2010             op_pix  = s->hdsp.avg_pixels_tab;
2011             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2012         }
2013         if (s->mv_dir & MV_DIR_BACKWARD) {
2014             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2015                           s->next_picture.f->data,
2016                           op_pix, op_qpix);
2017         }
2018
2019         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2020             int progressive_score, interlaced_score;
2021
2022             s->interlaced_dct = 0;
2023             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2024                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2025                                                      ptr_y + wrap_y * 8,
2026                                                      wrap_y, 8) - 400;
2027
2028             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2029                 progressive_score -= 400;
2030
2031             if (progressive_score > 0) {
2032                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2033                                                         wrap_y * 2, 8) +
2034                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2035                                                         ptr_y + wrap_y,
2036                                                         wrap_y * 2, 8);
2037
2038                 if (progressive_score > interlaced_score) {
2039                     s->interlaced_dct = 1;
2040
2041                     dct_offset = wrap_y;
2042                     wrap_y <<= 1;
2043                     if (s->chroma_format == CHROMA_422)
2044                         wrap_c <<= 1;
2045                 }
2046             }
2047         }
2048
2049         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2050         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2051         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2052                             dest_y + dct_offset, wrap_y);
2053         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2054                             dest_y + dct_offset + 8, wrap_y);
2055
2056         if (s->flags & CODEC_FLAG_GRAY) {
2057             skip_dct[4] = 1;
2058             skip_dct[5] = 1;
2059         } else {
2060             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2061             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2062             if (!s->chroma_y_shift) { /* 422 */
2063                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2064                                     dest_cb + (dct_offset >> 1), wrap_c);
2065                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2066                                     dest_cr + (dct_offset >> 1), wrap_c);
2067             }
2068         }
2069         /* pre quantization */
2070         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2071                 2 * s->qscale * s->qscale) {
2072             // FIXME optimize
2073             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2074                 skip_dct[0] = 1;
2075             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2076                 skip_dct[1] = 1;
2077             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2078                                wrap_y, 8) < 20 * s->qscale)
2079                 skip_dct[2] = 1;
2080             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2081                                wrap_y, 8) < 20 * s->qscale)
2082                 skip_dct[3] = 1;
2083             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2084                 skip_dct[4] = 1;
2085             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2086                 skip_dct[5] = 1;
2087             if (!s->chroma_y_shift) { /* 422 */
2088                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2089                                    dest_cb + (dct_offset >> 1),
2090                                    wrap_c, 8) < 20 * s->qscale)
2091                     skip_dct[6] = 1;
2092                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2093                                    dest_cr + (dct_offset >> 1),
2094                                    wrap_c, 8) < 20 * s->qscale)
2095                     skip_dct[7] = 1;
2096             }
2097         }
2098     }
2099
2100     if (s->quantizer_noise_shaping) {
2101         if (!skip_dct[0])
2102             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2103         if (!skip_dct[1])
2104             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2105         if (!skip_dct[2])
2106             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2107         if (!skip_dct[3])
2108             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2109         if (!skip_dct[4])
2110             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2111         if (!skip_dct[5])
2112             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2113         if (!s->chroma_y_shift) { /* 422 */
2114             if (!skip_dct[6])
2115                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2116                                   wrap_c);
2117             if (!skip_dct[7])
2118                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2119                                   wrap_c);
2120         }
2121         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2122     }
2123
2124     /* DCT & quantize */
2125     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2126     {
2127         for (i = 0; i < mb_block_count; i++) {
2128             if (!skip_dct[i]) {
2129                 int overflow;
2130                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2131                 // FIXME we could decide to change to quantizer instead of
2132                 // clipping
2133                 // JS: I don't think that would be a good idea it could lower
2134                 //     quality instead of improve it. Just INTRADC clipping
2135                 //     deserves changes in quantizer
2136                 if (overflow)
2137                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2138             } else
2139                 s->block_last_index[i] = -1;
2140         }
2141         if (s->quantizer_noise_shaping) {
2142             for (i = 0; i < mb_block_count; i++) {
2143                 if (!skip_dct[i]) {
2144                     s->block_last_index[i] =
2145                         dct_quantize_refine(s, s->block[i], weight[i],
2146                                             orig[i], i, s->qscale);
2147                 }
2148             }
2149         }
2150
2151         if (s->luma_elim_threshold && !s->mb_intra)
2152             for (i = 0; i < 4; i++)
2153                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2154         if (s->chroma_elim_threshold && !s->mb_intra)
2155             for (i = 4; i < mb_block_count; i++)
2156                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2157
2158         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2159             for (i = 0; i < mb_block_count; i++) {
2160                 if (s->block_last_index[i] == -1)
2161                     s->coded_score[i] = INT_MAX / 256;
2162             }
2163         }
2164     }
2165
2166     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2167         s->block_last_index[4] =
2168         s->block_last_index[5] = 0;
2169         s->block[4][0] =
2170         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2171     }
2172
2173     // non c quantize code returns incorrect block_last_index FIXME
2174     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2175         for (i = 0; i < mb_block_count; i++) {
2176             int j;
2177             if (s->block_last_index[i] > 0) {
2178                 for (j = 63; j > 0; j--) {
2179                     if (s->block[i][s->intra_scantable.permutated[j]])
2180                         break;
2181                 }
2182                 s->block_last_index[i] = j;
2183             }
2184         }
2185     }
2186
2187     /* huffman encode */
2188     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2189     case AV_CODEC_ID_MPEG1VIDEO:
2190     case AV_CODEC_ID_MPEG2VIDEO:
2191         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2192             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2193         break;
2194     case AV_CODEC_ID_MPEG4:
2195         if (CONFIG_MPEG4_ENCODER)
2196             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2197         break;
2198     case AV_CODEC_ID_MSMPEG4V2:
2199     case AV_CODEC_ID_MSMPEG4V3:
2200     case AV_CODEC_ID_WMV1:
2201         if (CONFIG_MSMPEG4_ENCODER)
2202             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2203         break;
2204     case AV_CODEC_ID_WMV2:
2205         if (CONFIG_WMV2_ENCODER)
2206             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2207         break;
2208     case AV_CODEC_ID_H261:
2209         if (CONFIG_H261_ENCODER)
2210             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2211         break;
2212     case AV_CODEC_ID_H263:
2213     case AV_CODEC_ID_H263P:
2214     case AV_CODEC_ID_FLV1:
2215     case AV_CODEC_ID_RV10:
2216     case AV_CODEC_ID_RV20:
2217         if (CONFIG_H263_ENCODER)
2218             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2219         break;
2220     case AV_CODEC_ID_MJPEG:
2221         if (CONFIG_MJPEG_ENCODER)
2222             ff_mjpeg_encode_mb(s, s->block);
2223         break;
2224     default:
2225         assert(0);
2226     }
2227 }
2228
2229 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2230 {
2231     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2232     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2233 }
2234
2235 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2236     int i;
2237
2238     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2239
2240     /* mpeg1 */
2241     d->mb_skip_run= s->mb_skip_run;
2242     for(i=0; i<3; i++)
2243         d->last_dc[i] = s->last_dc[i];
2244
2245     /* statistics */
2246     d->mv_bits= s->mv_bits;
2247     d->i_tex_bits= s->i_tex_bits;
2248     d->p_tex_bits= s->p_tex_bits;
2249     d->i_count= s->i_count;
2250     d->f_count= s->f_count;
2251     d->b_count= s->b_count;
2252     d->skip_count= s->skip_count;
2253     d->misc_bits= s->misc_bits;
2254     d->last_bits= 0;
2255
2256     d->mb_skipped= 0;
2257     d->qscale= s->qscale;
2258     d->dquant= s->dquant;
2259
2260     d->esc3_level_length= s->esc3_level_length;
2261 }
2262
2263 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2264     int i;
2265
2266     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2267     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2268
2269     /* mpeg1 */
2270     d->mb_skip_run= s->mb_skip_run;
2271     for(i=0; i<3; i++)
2272         d->last_dc[i] = s->last_dc[i];
2273
2274     /* statistics */
2275     d->mv_bits= s->mv_bits;
2276     d->i_tex_bits= s->i_tex_bits;
2277     d->p_tex_bits= s->p_tex_bits;
2278     d->i_count= s->i_count;
2279     d->f_count= s->f_count;
2280     d->b_count= s->b_count;
2281     d->skip_count= s->skip_count;
2282     d->misc_bits= s->misc_bits;
2283
2284     d->mb_intra= s->mb_intra;
2285     d->mb_skipped= s->mb_skipped;
2286     d->mv_type= s->mv_type;
2287     d->mv_dir= s->mv_dir;
2288     d->pb= s->pb;
2289     if(s->data_partitioning){
2290         d->pb2= s->pb2;
2291         d->tex_pb= s->tex_pb;
2292     }
2293     d->block= s->block;
2294     for(i=0; i<8; i++)
2295         d->block_last_index[i]= s->block_last_index[i];
2296     d->interlaced_dct= s->interlaced_dct;
2297     d->qscale= s->qscale;
2298
2299     d->esc3_level_length= s->esc3_level_length;
2300 }
2301
2302 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2303                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2304                            int *dmin, int *next_block, int motion_x, int motion_y)
2305 {
2306     int score;
2307     uint8_t *dest_backup[3];
2308
2309     copy_context_before_encode(s, backup, type);
2310
2311     s->block= s->blocks[*next_block];
2312     s->pb= pb[*next_block];
2313     if(s->data_partitioning){
2314         s->pb2   = pb2   [*next_block];
2315         s->tex_pb= tex_pb[*next_block];
2316     }
2317
2318     if(*next_block){
2319         memcpy(dest_backup, s->dest, sizeof(s->dest));
2320         s->dest[0] = s->rd_scratchpad;
2321         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2322         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2323         assert(s->linesize >= 32); //FIXME
2324     }
2325
2326     encode_mb(s, motion_x, motion_y);
2327
2328     score= put_bits_count(&s->pb);
2329     if(s->data_partitioning){
2330         score+= put_bits_count(&s->pb2);
2331         score+= put_bits_count(&s->tex_pb);
2332     }
2333
2334     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2335         ff_mpv_decode_mb(s, s->block);
2336
2337         score *= s->lambda2;
2338         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2339     }
2340
2341     if(*next_block){
2342         memcpy(s->dest, dest_backup, sizeof(s->dest));
2343     }
2344
2345     if(score<*dmin){
2346         *dmin= score;
2347         *next_block^=1;
2348
2349         copy_context_after_encode(best, s, type);
2350     }
2351 }
2352
2353 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2354     uint32_t *sq = ff_square_tab + 256;
2355     int acc=0;
2356     int x,y;
2357
2358     if(w==16 && h==16)
2359         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2360     else if(w==8 && h==8)
2361         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2362
2363     for(y=0; y<h; y++){
2364         for(x=0; x<w; x++){
2365             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2366         }
2367     }
2368
2369     assert(acc>=0);
2370
2371     return acc;
2372 }
2373
2374 static int sse_mb(MpegEncContext *s){
2375     int w= 16;
2376     int h= 16;
2377
2378     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2379     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2380
2381     if(w==16 && h==16)
2382       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2383         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2384                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2385                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2386       }else{
2387         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2388                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2389                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2390       }
2391     else
2392         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2393                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2394                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2395 }
2396
2397 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2398     MpegEncContext *s= *(void**)arg;
2399
2400
2401     s->me.pre_pass=1;
2402     s->me.dia_size= s->avctx->pre_dia_size;
2403     s->first_slice_line=1;
2404     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2405         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2406             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2407         }
2408         s->first_slice_line=0;
2409     }
2410
2411     s->me.pre_pass=0;
2412
2413     return 0;
2414 }
2415
2416 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2417     MpegEncContext *s= *(void**)arg;
2418
2419     s->me.dia_size= s->avctx->dia_size;
2420     s->first_slice_line=1;
2421     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2422         s->mb_x=0; //for block init below
2423         ff_init_block_index(s);
2424         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2425             s->block_index[0]+=2;
2426             s->block_index[1]+=2;
2427             s->block_index[2]+=2;
2428             s->block_index[3]+=2;
2429
2430             /* compute motion vector & mb_type and store in context */
2431             if(s->pict_type==AV_PICTURE_TYPE_B)
2432                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2433             else
2434                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2435         }
2436         s->first_slice_line=0;
2437     }
2438     return 0;
2439 }
2440
2441 static int mb_var_thread(AVCodecContext *c, void *arg){
2442     MpegEncContext *s= *(void**)arg;
2443     int mb_x, mb_y;
2444
2445     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2446         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2447             int xx = mb_x * 16;
2448             int yy = mb_y * 16;
2449             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2450             int varc;
2451             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2452
2453             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2454                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2455
2456             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2457             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2458             s->me.mb_var_sum_temp    += varc;
2459         }
2460     }
2461     return 0;
2462 }
2463
2464 static void write_slice_end(MpegEncContext *s){
2465     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2466         if(s->partitioned_frame){
2467             ff_mpeg4_merge_partitions(s);
2468         }
2469
2470         ff_mpeg4_stuffing(&s->pb);
2471     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2472         ff_mjpeg_encode_stuffing(&s->pb);
2473     }
2474
2475     avpriv_align_put_bits(&s->pb);
2476     flush_put_bits(&s->pb);
2477
2478     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2479         s->misc_bits+= get_bits_diff(s);
2480 }
2481
2482 static void write_mb_info(MpegEncContext *s)
2483 {
2484     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2485     int offset = put_bits_count(&s->pb);
2486     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2487     int gobn = s->mb_y / s->gob_index;
2488     int pred_x, pred_y;
2489     if (CONFIG_H263_ENCODER)
2490         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2491     bytestream_put_le32(&ptr, offset);
2492     bytestream_put_byte(&ptr, s->qscale);
2493     bytestream_put_byte(&ptr, gobn);
2494     bytestream_put_le16(&ptr, mba);
2495     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2496     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2497     /* 4MV not implemented */
2498     bytestream_put_byte(&ptr, 0); /* hmv2 */
2499     bytestream_put_byte(&ptr, 0); /* vmv2 */
2500 }
2501
2502 static void update_mb_info(MpegEncContext *s, int startcode)
2503 {
2504     if (!s->mb_info)
2505         return;
2506     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2507         s->mb_info_size += 12;
2508         s->prev_mb_info = s->last_mb_info;
2509     }
2510     if (startcode) {
2511         s->prev_mb_info = put_bits_count(&s->pb)/8;
2512         /* This might have incremented mb_info_size above, and we return without
2513          * actually writing any info into that slot yet. But in that case,
2514          * this will be called again at the start of the after writing the
2515          * start code, actually writing the mb info. */
2516         return;
2517     }
2518
2519     s->last_mb_info = put_bits_count(&s->pb)/8;
2520     if (!s->mb_info_size)
2521         s->mb_info_size += 12;
2522     write_mb_info(s);
2523 }
2524
2525 static int encode_thread(AVCodecContext *c, void *arg){
2526     MpegEncContext *s= *(void**)arg;
2527     int mb_x, mb_y, pdif = 0;
2528     int chr_h= 16>>s->chroma_y_shift;
2529     int i, j;
2530     MpegEncContext best_s, backup_s;
2531     uint8_t bit_buf[2][MAX_MB_BYTES];
2532     uint8_t bit_buf2[2][MAX_MB_BYTES];
2533     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2534     PutBitContext pb[2], pb2[2], tex_pb[2];
2535
2536     for(i=0; i<2; i++){
2537         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2538         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2539         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2540     }
2541
2542     s->last_bits= put_bits_count(&s->pb);
2543     s->mv_bits=0;
2544     s->misc_bits=0;
2545     s->i_tex_bits=0;
2546     s->p_tex_bits=0;
2547     s->i_count=0;
2548     s->f_count=0;
2549     s->b_count=0;
2550     s->skip_count=0;
2551
2552     for(i=0; i<3; i++){
2553         /* init last dc values */
2554         /* note: quant matrix value (8) is implied here */
2555         s->last_dc[i] = 128 << s->intra_dc_precision;
2556
2557         s->current_picture.f->error[i] = 0;
2558     }
2559     s->mb_skip_run = 0;
2560     memset(s->last_mv, 0, sizeof(s->last_mv));
2561
2562     s->last_mv_dir = 0;
2563
2564     switch(s->codec_id){
2565     case AV_CODEC_ID_H263:
2566     case AV_CODEC_ID_H263P:
2567     case AV_CODEC_ID_FLV1:
2568         if (CONFIG_H263_ENCODER)
2569             s->gob_index = ff_h263_get_gob_height(s);
2570         break;
2571     case AV_CODEC_ID_MPEG4:
2572         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2573             ff_mpeg4_init_partitions(s);
2574         break;
2575     }
2576
2577     s->resync_mb_x=0;
2578     s->resync_mb_y=0;
2579     s->first_slice_line = 1;
2580     s->ptr_lastgob = s->pb.buf;
2581     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2582         s->mb_x=0;
2583         s->mb_y= mb_y;
2584
2585         ff_set_qscale(s, s->qscale);
2586         ff_init_block_index(s);
2587
2588         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2589             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2590             int mb_type= s->mb_type[xy];
2591 //            int d;
2592             int dmin= INT_MAX;
2593             int dir;
2594
2595             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2596                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2597                 return -1;
2598             }
2599             if(s->data_partitioning){
2600                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2601                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2602                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2603                     return -1;
2604                 }
2605             }
2606
2607             s->mb_x = mb_x;
2608             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2609             ff_update_block_index(s);
2610
2611             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2612                 ff_h261_reorder_mb_index(s);
2613                 xy= s->mb_y*s->mb_stride + s->mb_x;
2614                 mb_type= s->mb_type[xy];
2615             }
2616
2617             /* write gob / video packet header  */
2618             if(s->rtp_mode){
2619                 int current_packet_size, is_gob_start;
2620
2621                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2622
2623                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2624
2625                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2626
2627                 switch(s->codec_id){
2628                 case AV_CODEC_ID_H263:
2629                 case AV_CODEC_ID_H263P:
2630                     if(!s->h263_slice_structured)
2631                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2632                     break;
2633                 case AV_CODEC_ID_MPEG2VIDEO:
2634                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2635                 case AV_CODEC_ID_MPEG1VIDEO:
2636                     if(s->mb_skip_run) is_gob_start=0;
2637                     break;
2638                 }
2639
2640                 if(is_gob_start){
2641                     if(s->start_mb_y != mb_y || mb_x!=0){
2642                         write_slice_end(s);
2643
2644                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2645                             ff_mpeg4_init_partitions(s);
2646                         }
2647                     }
2648
2649                     assert((put_bits_count(&s->pb)&7) == 0);
2650                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2651
2652                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2653                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2654                         int d = 100 / s->error_rate;
2655                         if(r % d == 0){
2656                             current_packet_size=0;
2657                             s->pb.buf_ptr= s->ptr_lastgob;
2658                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2659                         }
2660                     }
2661
2662                     if (s->avctx->rtp_callback){
2663                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2664                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2665                     }
2666                     update_mb_info(s, 1);
2667
2668                     switch(s->codec_id){
2669                     case AV_CODEC_ID_MPEG4:
2670                         if (CONFIG_MPEG4_ENCODER) {
2671                             ff_mpeg4_encode_video_packet_header(s);
2672                             ff_mpeg4_clean_buffers(s);
2673                         }
2674                     break;
2675                     case AV_CODEC_ID_MPEG1VIDEO:
2676                     case AV_CODEC_ID_MPEG2VIDEO:
2677                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2678                             ff_mpeg1_encode_slice_header(s);
2679                             ff_mpeg1_clean_buffers(s);
2680                         }
2681                     break;
2682                     case AV_CODEC_ID_H263:
2683                     case AV_CODEC_ID_H263P:
2684                         if (CONFIG_H263_ENCODER)
2685                             ff_h263_encode_gob_header(s, mb_y);
2686                     break;
2687                     }
2688
2689                     if(s->flags&CODEC_FLAG_PASS1){
2690                         int bits= put_bits_count(&s->pb);
2691                         s->misc_bits+= bits - s->last_bits;
2692                         s->last_bits= bits;
2693                     }
2694
2695                     s->ptr_lastgob += current_packet_size;
2696                     s->first_slice_line=1;
2697                     s->resync_mb_x=mb_x;
2698                     s->resync_mb_y=mb_y;
2699                 }
2700             }
2701
2702             if(  (s->resync_mb_x   == s->mb_x)
2703                && s->resync_mb_y+1 == s->mb_y){
2704                 s->first_slice_line=0;
2705             }
2706
2707             s->mb_skipped=0;
2708             s->dquant=0; //only for QP_RD
2709
2710             update_mb_info(s, 0);
2711
2712             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2713                 int next_block=0;
2714                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2715
2716                 copy_context_before_encode(&backup_s, s, -1);
2717                 backup_s.pb= s->pb;
2718                 best_s.data_partitioning= s->data_partitioning;
2719                 best_s.partitioned_frame= s->partitioned_frame;
2720                 if(s->data_partitioning){
2721                     backup_s.pb2= s->pb2;
2722                     backup_s.tex_pb= s->tex_pb;
2723                 }
2724
2725                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2726                     s->mv_dir = MV_DIR_FORWARD;
2727                     s->mv_type = MV_TYPE_16X16;
2728                     s->mb_intra= 0;
2729                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2730                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2731                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2732                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2733                 }
2734                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2735                     s->mv_dir = MV_DIR_FORWARD;
2736                     s->mv_type = MV_TYPE_FIELD;
2737                     s->mb_intra= 0;
2738                     for(i=0; i<2; i++){
2739                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2740                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2741                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2742                     }
2743                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2744                                  &dmin, &next_block, 0, 0);
2745                 }
2746                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2747                     s->mv_dir = MV_DIR_FORWARD;
2748                     s->mv_type = MV_TYPE_16X16;
2749                     s->mb_intra= 0;
2750                     s->mv[0][0][0] = 0;
2751                     s->mv[0][0][1] = 0;
2752                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2753                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2754                 }
2755                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2756                     s->mv_dir = MV_DIR_FORWARD;
2757                     s->mv_type = MV_TYPE_8X8;
2758                     s->mb_intra= 0;
2759                     for(i=0; i<4; i++){
2760                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2761                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2762                     }
2763                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2764                                  &dmin, &next_block, 0, 0);
2765                 }
2766                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2767                     s->mv_dir = MV_DIR_FORWARD;
2768                     s->mv_type = MV_TYPE_16X16;
2769                     s->mb_intra= 0;
2770                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2771                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2772                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2773                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2774                 }
2775                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2776                     s->mv_dir = MV_DIR_BACKWARD;
2777                     s->mv_type = MV_TYPE_16X16;
2778                     s->mb_intra= 0;
2779                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2780                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2781                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2782                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2783                 }
2784                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2785                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2786                     s->mv_type = MV_TYPE_16X16;
2787                     s->mb_intra= 0;
2788                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2789                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2790                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2791                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2792                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2793                                  &dmin, &next_block, 0, 0);
2794                 }
2795                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2796                     s->mv_dir = MV_DIR_FORWARD;
2797                     s->mv_type = MV_TYPE_FIELD;
2798                     s->mb_intra= 0;
2799                     for(i=0; i<2; i++){
2800                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2801                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2802                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2803                     }
2804                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2805                                  &dmin, &next_block, 0, 0);
2806                 }
2807                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2808                     s->mv_dir = MV_DIR_BACKWARD;
2809                     s->mv_type = MV_TYPE_FIELD;
2810                     s->mb_intra= 0;
2811                     for(i=0; i<2; i++){
2812                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2813                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2814                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2815                     }
2816                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2817                                  &dmin, &next_block, 0, 0);
2818                 }
2819                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2820                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2821                     s->mv_type = MV_TYPE_FIELD;
2822                     s->mb_intra= 0;
2823                     for(dir=0; dir<2; dir++){
2824                         for(i=0; i<2; i++){
2825                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2826                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2827                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2828                         }
2829                     }
2830                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2831                                  &dmin, &next_block, 0, 0);
2832                 }
2833                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2834                     s->mv_dir = 0;
2835                     s->mv_type = MV_TYPE_16X16;
2836                     s->mb_intra= 1;
2837                     s->mv[0][0][0] = 0;
2838                     s->mv[0][0][1] = 0;
2839                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2840                                  &dmin, &next_block, 0, 0);
2841                     if(s->h263_pred || s->h263_aic){
2842                         if(best_s.mb_intra)
2843                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2844                         else
2845                             ff_clean_intra_table_entries(s); //old mode?
2846                     }
2847                 }
2848
2849                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2850                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2851                         const int last_qp= backup_s.qscale;
2852                         int qpi, qp, dc[6];
2853                         int16_t ac[6][16];
2854                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2855                         static const int dquant_tab[4]={-1,1,-2,2};
2856
2857                         assert(backup_s.dquant == 0);
2858
2859                         //FIXME intra
2860                         s->mv_dir= best_s.mv_dir;
2861                         s->mv_type = MV_TYPE_16X16;
2862                         s->mb_intra= best_s.mb_intra;
2863                         s->mv[0][0][0] = best_s.mv[0][0][0];
2864                         s->mv[0][0][1] = best_s.mv[0][0][1];
2865                         s->mv[1][0][0] = best_s.mv[1][0][0];
2866                         s->mv[1][0][1] = best_s.mv[1][0][1];
2867
2868                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2869                         for(; qpi<4; qpi++){
2870                             int dquant= dquant_tab[qpi];
2871                             qp= last_qp + dquant;
2872                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2873                                 continue;
2874                             backup_s.dquant= dquant;
2875                             if(s->mb_intra && s->dc_val[0]){
2876                                 for(i=0; i<6; i++){
2877                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2878                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2879                                 }
2880                             }
2881
2882                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2883                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2884                             if(best_s.qscale != qp){
2885                                 if(s->mb_intra && s->dc_val[0]){
2886                                     for(i=0; i<6; i++){
2887                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2888                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2889                                     }
2890                                 }
2891                             }
2892                         }
2893                     }
2894                 }
2895                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2896                     int mx= s->b_direct_mv_table[xy][0];
2897                     int my= s->b_direct_mv_table[xy][1];
2898
2899                     backup_s.dquant = 0;
2900                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2901                     s->mb_intra= 0;
2902                     ff_mpeg4_set_direct_mv(s, mx, my);
2903                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2904                                  &dmin, &next_block, mx, my);
2905                 }
2906                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2907                     backup_s.dquant = 0;
2908                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2909                     s->mb_intra= 0;
2910                     ff_mpeg4_set_direct_mv(s, 0, 0);
2911                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2912                                  &dmin, &next_block, 0, 0);
2913                 }
2914                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2915                     int coded=0;
2916                     for(i=0; i<6; i++)
2917                         coded |= s->block_last_index[i];
2918                     if(coded){
2919                         int mx,my;
2920                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2921                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2922                             mx=my=0; //FIXME find the one we actually used
2923                             ff_mpeg4_set_direct_mv(s, mx, my);
2924                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2925                             mx= s->mv[1][0][0];
2926                             my= s->mv[1][0][1];
2927                         }else{
2928                             mx= s->mv[0][0][0];
2929                             my= s->mv[0][0][1];
2930                         }
2931
2932                         s->mv_dir= best_s.mv_dir;
2933                         s->mv_type = best_s.mv_type;
2934                         s->mb_intra= 0;
2935 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2936                         s->mv[0][0][1] = best_s.mv[0][0][1];
2937                         s->mv[1][0][0] = best_s.mv[1][0][0];
2938                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2939                         backup_s.dquant= 0;
2940                         s->skipdct=1;
2941                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2942                                         &dmin, &next_block, mx, my);
2943                         s->skipdct=0;
2944                     }
2945                 }
2946
2947                 s->current_picture.qscale_table[xy] = best_s.qscale;
2948
2949                 copy_context_after_encode(s, &best_s, -1);
2950
2951                 pb_bits_count= put_bits_count(&s->pb);
2952                 flush_put_bits(&s->pb);
2953                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2954                 s->pb= backup_s.pb;
2955
2956                 if(s->data_partitioning){
2957                     pb2_bits_count= put_bits_count(&s->pb2);
2958                     flush_put_bits(&s->pb2);
2959                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2960                     s->pb2= backup_s.pb2;
2961
2962                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2963                     flush_put_bits(&s->tex_pb);
2964                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2965                     s->tex_pb= backup_s.tex_pb;
2966                 }
2967                 s->last_bits= put_bits_count(&s->pb);
2968
2969                 if (CONFIG_H263_ENCODER &&
2970                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2971                     ff_h263_update_motion_val(s);
2972
2973                 if(next_block==0){ //FIXME 16 vs linesize16
2974                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2975                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2976                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2977                 }
2978
2979                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2980                     ff_mpv_decode_mb(s, s->block);
2981             } else {
2982                 int motion_x = 0, motion_y = 0;
2983                 s->mv_type=MV_TYPE_16X16;
2984                 // only one MB-Type possible
2985
2986                 switch(mb_type){
2987                 case CANDIDATE_MB_TYPE_INTRA:
2988                     s->mv_dir = 0;
2989                     s->mb_intra= 1;
2990                     motion_x= s->mv[0][0][0] = 0;
2991                     motion_y= s->mv[0][0][1] = 0;
2992                     break;
2993                 case CANDIDATE_MB_TYPE_INTER:
2994                     s->mv_dir = MV_DIR_FORWARD;
2995                     s->mb_intra= 0;
2996                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2997                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2998                     break;
2999                 case CANDIDATE_MB_TYPE_INTER_I:
3000                     s->mv_dir = MV_DIR_FORWARD;
3001                     s->mv_type = MV_TYPE_FIELD;
3002                     s->mb_intra= 0;
3003                     for(i=0; i<2; i++){
3004                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3005                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3006                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3007                     }
3008                     break;
3009                 case CANDIDATE_MB_TYPE_INTER4V:
3010                     s->mv_dir = MV_DIR_FORWARD;
3011                     s->mv_type = MV_TYPE_8X8;
3012                     s->mb_intra= 0;
3013                     for(i=0; i<4; i++){
3014                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3015                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3016                     }
3017                     break;
3018                 case CANDIDATE_MB_TYPE_DIRECT:
3019                     if (CONFIG_MPEG4_ENCODER) {
3020                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3021                         s->mb_intra= 0;
3022                         motion_x=s->b_direct_mv_table[xy][0];
3023                         motion_y=s->b_direct_mv_table[xy][1];
3024                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3025                     }
3026                     break;
3027                 case CANDIDATE_MB_TYPE_DIRECT0:
3028                     if (CONFIG_MPEG4_ENCODER) {
3029                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3030                         s->mb_intra= 0;
3031                         ff_mpeg4_set_direct_mv(s, 0, 0);
3032                     }
3033                     break;
3034                 case CANDIDATE_MB_TYPE_BIDIR:
3035                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3036                     s->mb_intra= 0;
3037                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3038                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3039                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3040                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3041                     break;
3042                 case CANDIDATE_MB_TYPE_BACKWARD:
3043                     s->mv_dir = MV_DIR_BACKWARD;
3044                     s->mb_intra= 0;
3045                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3046                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3047                     break;
3048                 case CANDIDATE_MB_TYPE_FORWARD:
3049                     s->mv_dir = MV_DIR_FORWARD;
3050                     s->mb_intra= 0;
3051                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3052                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3053                     break;
3054                 case CANDIDATE_MB_TYPE_FORWARD_I:
3055                     s->mv_dir = MV_DIR_FORWARD;
3056                     s->mv_type = MV_TYPE_FIELD;
3057                     s->mb_intra= 0;
3058                     for(i=0; i<2; i++){
3059                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3060                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3061                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3062                     }
3063                     break;
3064                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3065                     s->mv_dir = MV_DIR_BACKWARD;
3066                     s->mv_type = MV_TYPE_FIELD;
3067                     s->mb_intra= 0;
3068                     for(i=0; i<2; i++){
3069                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3070                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3071                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3072                     }
3073                     break;
3074                 case CANDIDATE_MB_TYPE_BIDIR_I:
3075                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3076                     s->mv_type = MV_TYPE_FIELD;
3077                     s->mb_intra= 0;
3078                     for(dir=0; dir<2; dir++){
3079                         for(i=0; i<2; i++){
3080                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3081                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3082                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3083                         }
3084                     }
3085                     break;
3086                 default:
3087                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3088                 }
3089
3090                 encode_mb(s, motion_x, motion_y);
3091
3092                 // RAL: Update last macroblock type
3093                 s->last_mv_dir = s->mv_dir;
3094
3095                 if (CONFIG_H263_ENCODER &&
3096                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3097                     ff_h263_update_motion_val(s);
3098
3099                 ff_mpv_decode_mb(s, s->block);
3100             }
3101
3102             /* clean the MV table in IPS frames for direct mode in B frames */
3103             if(s->mb_intra /* && I,P,S_TYPE */){
3104                 s->p_mv_table[xy][0]=0;
3105                 s->p_mv_table[xy][1]=0;
3106             }
3107
3108             if(s->flags&CODEC_FLAG_PSNR){
3109                 int w= 16;
3110                 int h= 16;
3111
3112                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3113                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3114
3115                 s->current_picture.f->error[0] += sse(
3116                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3117                     s->dest[0], w, h, s->linesize);
3118                 s->current_picture.f->error[1] += sse(
3119                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3120                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3121                 s->current_picture.f->error[2] += sse(
3122                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3123                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3124             }
3125             if(s->loop_filter){
3126                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3127                     ff_h263_loop_filter(s);
3128             }
3129             av_dlog(s->avctx, "MB %d %d bits\n",
3130                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3131         }
3132     }
3133
3134     //not beautiful here but we must write it before flushing so it has to be here
3135     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3136         ff_msmpeg4_encode_ext_header(s);
3137
3138     write_slice_end(s);
3139
3140     /* Send the last GOB if RTP */
3141     if (s->avctx->rtp_callback) {
3142         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3143         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3144         /* Call the RTP callback to send the last GOB */
3145         emms_c();
3146         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3147     }
3148
3149     return 0;
3150 }
3151
3152 #define MERGE(field) dst->field += src->field; src->field=0
3153 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3154     MERGE(me.scene_change_score);
3155     MERGE(me.mc_mb_var_sum_temp);
3156     MERGE(me.mb_var_sum_temp);
3157 }
3158
3159 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3160     int i;
3161
3162     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3163     MERGE(dct_count[1]);
3164     MERGE(mv_bits);
3165     MERGE(i_tex_bits);
3166     MERGE(p_tex_bits);
3167     MERGE(i_count);
3168     MERGE(f_count);
3169     MERGE(b_count);
3170     MERGE(skip_count);
3171     MERGE(misc_bits);
3172     MERGE(er.error_count);
3173     MERGE(padding_bug_score);
3174     MERGE(current_picture.f->error[0]);
3175     MERGE(current_picture.f->error[1]);
3176     MERGE(current_picture.f->error[2]);
3177
3178     if(dst->avctx->noise_reduction){
3179         for(i=0; i<64; i++){
3180             MERGE(dct_error_sum[0][i]);
3181             MERGE(dct_error_sum[1][i]);
3182         }
3183     }
3184
3185     assert(put_bits_count(&src->pb) % 8 ==0);
3186     assert(put_bits_count(&dst->pb) % 8 ==0);
3187     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3188     flush_put_bits(&dst->pb);
3189 }
3190
3191 static int estimate_qp(MpegEncContext *s, int dry_run){
3192     if (s->next_lambda){
3193         s->current_picture_ptr->f->quality =
3194         s->current_picture.f->quality = s->next_lambda;
3195         if(!dry_run) s->next_lambda= 0;
3196     } else if (!s->fixed_qscale) {
3197         s->current_picture_ptr->f->quality =
3198         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3199         if (s->current_picture.f->quality < 0)
3200             return -1;
3201     }
3202
3203     if(s->adaptive_quant){
3204         switch(s->codec_id){
3205         case AV_CODEC_ID_MPEG4:
3206             if (CONFIG_MPEG4_ENCODER)
3207                 ff_clean_mpeg4_qscales(s);
3208             break;
3209         case AV_CODEC_ID_H263:
3210         case AV_CODEC_ID_H263P:
3211         case AV_CODEC_ID_FLV1:
3212             if (CONFIG_H263_ENCODER)
3213                 ff_clean_h263_qscales(s);
3214             break;
3215         default:
3216             ff_init_qscale_tab(s);
3217         }
3218
3219         s->lambda= s->lambda_table[0];
3220         //FIXME broken
3221     }else
3222         s->lambda = s->current_picture.f->quality;
3223     update_qscale(s);
3224     return 0;
3225 }
3226
3227 /* must be called before writing the header */
3228 static void set_frame_distances(MpegEncContext * s){
3229     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3230     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3231
3232     if(s->pict_type==AV_PICTURE_TYPE_B){
3233         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3234         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3235     }else{
3236         s->pp_time= s->time - s->last_non_b_time;
3237         s->last_non_b_time= s->time;
3238         assert(s->picture_number==0 || s->pp_time > 0);
3239     }
3240 }
3241
3242 static int encode_picture(MpegEncContext *s, int picture_number)
3243 {
3244     int i, ret;
3245     int bits;
3246     int context_count = s->slice_context_count;
3247
3248     s->picture_number = picture_number;
3249
3250     /* Reset the average MB variance */
3251     s->me.mb_var_sum_temp    =
3252     s->me.mc_mb_var_sum_temp = 0;
3253
3254     /* we need to initialize some time vars before we can encode b-frames */
3255     // RAL: Condition added for MPEG1VIDEO
3256     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3257         set_frame_distances(s);
3258     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3259         ff_set_mpeg4_time(s);
3260
3261     s->me.scene_change_score=0;
3262
3263 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3264
3265     if(s->pict_type==AV_PICTURE_TYPE_I){
3266         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3267         else                        s->no_rounding=0;
3268     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3269         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3270             s->no_rounding ^= 1;
3271     }
3272
3273     if(s->flags & CODEC_FLAG_PASS2){
3274         if (estimate_qp(s,1) < 0)
3275             return -1;
3276         ff_get_2pass_fcode(s);
3277     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3278         if(s->pict_type==AV_PICTURE_TYPE_B)
3279             s->lambda= s->last_lambda_for[s->pict_type];
3280         else
3281             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3282         update_qscale(s);
3283     }
3284
3285     s->mb_intra=0; //for the rate distortion & bit compare functions
3286     for(i=1; i<context_count; i++){
3287         ret = ff_update_duplicate_context(s->thread_context[i], s);
3288         if (ret < 0)
3289             return ret;
3290     }
3291
3292     if(ff_init_me(s)<0)
3293         return -1;
3294
3295     /* Estimate motion for every MB */
3296     if(s->pict_type != AV_PICTURE_TYPE_I){
3297         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3298         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3299         if (s->pict_type != AV_PICTURE_TYPE_B) {
3300             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3301                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3302             }
3303         }
3304
3305         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3306     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3307         /* I-Frame */
3308         for(i=0; i<s->mb_stride*s->mb_height; i++)
3309             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3310
3311         if(!s->fixed_qscale){
3312             /* finding spatial complexity for I-frame rate control */
3313             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3314         }
3315     }
3316     for(i=1; i<context_count; i++){
3317         merge_context_after_me(s, s->thread_context[i]);
3318     }
3319     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3320     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3321     emms_c();
3322
3323     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3324         s->pict_type= AV_PICTURE_TYPE_I;
3325         for(i=0; i<s->mb_stride*s->mb_height; i++)
3326             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3327         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3328                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3329     }
3330
3331     if(!s->umvplus){
3332         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3333             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3334
3335             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3336                 int a,b;
3337                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3338                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3339                 s->f_code= FFMAX3(s->f_code, a, b);
3340             }
3341
3342             ff_fix_long_p_mvs(s);
3343             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3344             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3345                 int j;
3346                 for(i=0; i<2; i++){
3347                     for(j=0; j<2; j++)
3348                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3349                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3350                 }
3351             }
3352         }
3353
3354         if(s->pict_type==AV_PICTURE_TYPE_B){
3355             int a, b;
3356
3357             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3358             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3359             s->f_code = FFMAX(a, b);
3360
3361             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3362             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3363             s->b_code = FFMAX(a, b);
3364
3365             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3366             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3367             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3368             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3369             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3370                 int dir, j;
3371                 for(dir=0; dir<2; dir++){
3372                     for(i=0; i<2; i++){
3373                         for(j=0; j<2; j++){
3374                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3375                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3376                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3377                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3378                         }
3379                     }
3380                 }
3381             }
3382         }
3383     }
3384
3385     if (estimate_qp(s, 0) < 0)
3386         return -1;
3387
3388     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3389         s->qscale= 3; //reduce clipping problems
3390
3391     if (s->out_format == FMT_MJPEG) {
3392         /* for mjpeg, we do include qscale in the matrix */
3393         for(i=1;i<64;i++){
3394             int j = s->idsp.idct_permutation[i];
3395
3396             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3397         }
3398         s->y_dc_scale_table=
3399         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3400         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3401         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3402                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3403         s->qscale= 8;
3404     }
3405
3406     //FIXME var duplication
3407     s->current_picture_ptr->f->key_frame =
3408     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3409     s->current_picture_ptr->f->pict_type =
3410     s->current_picture.f->pict_type = s->pict_type;
3411
3412     if (s->current_picture.f->key_frame)
3413         s->picture_in_gop_number=0;
3414
3415     s->last_bits= put_bits_count(&s->pb);
3416     switch(s->out_format) {
3417     case FMT_MJPEG:
3418         if (CONFIG_MJPEG_ENCODER)
3419             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3420                                            s->intra_matrix);
3421         break;
3422     case FMT_H261:
3423         if (CONFIG_H261_ENCODER)
3424             ff_h261_encode_picture_header(s, picture_number);
3425         break;
3426     case FMT_H263:
3427         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3428             ff_wmv2_encode_picture_header(s, picture_number);
3429         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3430             ff_msmpeg4_encode_picture_header(s, picture_number);
3431         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3432             ff_mpeg4_encode_picture_header(s, picture_number);
3433         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3434             ff_rv10_encode_picture_header(s, picture_number);
3435         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3436             ff_rv20_encode_picture_header(s, picture_number);
3437         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3438             ff_flv_encode_picture_header(s, picture_number);
3439         else if (CONFIG_H263_ENCODER)
3440             ff_h263_encode_picture_header(s, picture_number);
3441         break;
3442     case FMT_MPEG1:
3443         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3444             ff_mpeg1_encode_picture_header(s, picture_number);
3445         break;
3446     default:
3447         assert(0);
3448     }
3449     bits= put_bits_count(&s->pb);
3450     s->header_bits= bits - s->last_bits;
3451
3452     for(i=1; i<context_count; i++){
3453         update_duplicate_context_after_me(s->thread_context[i], s);
3454     }
3455     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3456     for(i=1; i<context_count; i++){
3457         merge_context_after_encode(s, s->thread_context[i]);
3458     }
3459     emms_c();
3460     return 0;
3461 }
3462
3463 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3464     const int intra= s->mb_intra;
3465     int i;
3466
3467     s->dct_count[intra]++;
3468
3469     for(i=0; i<64; i++){
3470         int level= block[i];
3471
3472         if(level){
3473             if(level>0){
3474                 s->dct_error_sum[intra][i] += level;
3475                 level -= s->dct_offset[intra][i];
3476                 if(level<0) level=0;
3477             }else{
3478                 s->dct_error_sum[intra][i] -= level;
3479                 level += s->dct_offset[intra][i];
3480                 if(level>0) level=0;
3481             }
3482             block[i]= level;
3483         }
3484     }
3485 }
3486
3487 static int dct_quantize_trellis_c(MpegEncContext *s,
3488                                   int16_t *block, int n,
3489                                   int qscale, int *overflow){
3490     const int *qmat;
3491     const uint8_t *scantable= s->intra_scantable.scantable;
3492     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3493     int max=0;
3494     unsigned int threshold1, threshold2;
3495     int bias=0;
3496     int run_tab[65];
3497     int level_tab[65];
3498     int score_tab[65];
3499     int survivor[65];
3500     int survivor_count;
3501     int last_run=0;
3502     int last_level=0;
3503     int last_score= 0;
3504     int last_i;
3505     int coeff[2][64];
3506     int coeff_count[64];
3507     int qmul, qadd, start_i, last_non_zero, i, dc;
3508     const int esc_length= s->ac_esc_length;
3509     uint8_t * length;
3510     uint8_t * last_length;
3511     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3512
3513     s->fdsp.fdct(block);
3514
3515     if(s->dct_error_sum)
3516         s->denoise_dct(s, block);
3517     qmul= qscale*16;
3518     qadd= ((qscale-1)|1)*8;
3519
3520     if (s->mb_intra) {
3521         int q;
3522         if (!s->h263_aic) {
3523             if (n < 4)
3524                 q = s->y_dc_scale;
3525             else
3526                 q = s->c_dc_scale;
3527             q = q << 3;
3528         } else{
3529             /* For AIC we skip quant/dequant of INTRADC */
3530             q = 1 << 3;
3531             qadd=0;
3532         }
3533
3534         /* note: block[0] is assumed to be positive */
3535         block[0] = (block[0] + (q >> 1)) / q;
3536         start_i = 1;
3537         last_non_zero = 0;
3538         qmat = s->q_intra_matrix[qscale];
3539         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3540             bias= 1<<(QMAT_SHIFT-1);
3541         length     = s->intra_ac_vlc_length;
3542         last_length= s->intra_ac_vlc_last_length;
3543     } else {
3544         start_i = 0;
3545         last_non_zero = -1;
3546         qmat = s->q_inter_matrix[qscale];
3547         length     = s->inter_ac_vlc_length;
3548         last_length= s->inter_ac_vlc_last_length;
3549     }
3550     last_i= start_i;
3551
3552     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3553     threshold2= (threshold1<<1);
3554
3555     for(i=63; i>=start_i; i--) {
3556         const int j = scantable[i];
3557         int level = block[j] * qmat[j];
3558
3559         if(((unsigned)(level+threshold1))>threshold2){
3560             last_non_zero = i;
3561             break;
3562         }
3563     }
3564
3565     for(i=start_i; i<=last_non_zero; i++) {
3566         const int j = scantable[i];
3567         int level = block[j] * qmat[j];
3568
3569 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3570 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3571         if(((unsigned)(level+threshold1))>threshold2){
3572             if(level>0){
3573                 level= (bias + level)>>QMAT_SHIFT;
3574                 coeff[0][i]= level;
3575                 coeff[1][i]= level-1;
3576 //                coeff[2][k]= level-2;
3577             }else{
3578                 level= (bias - level)>>QMAT_SHIFT;
3579                 coeff[0][i]= -level;
3580                 coeff[1][i]= -level+1;
3581 //                coeff[2][k]= -level+2;
3582             }
3583             coeff_count[i]= FFMIN(level, 2);
3584             assert(coeff_count[i]);
3585             max |=level;
3586         }else{
3587             coeff[0][i]= (level>>31)|1;
3588             coeff_count[i]= 1;
3589         }
3590     }
3591
3592     *overflow= s->max_qcoeff < max; //overflow might have happened
3593
3594     if(last_non_zero < start_i){
3595         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3596         return last_non_zero;
3597     }
3598
3599     score_tab[start_i]= 0;
3600     survivor[0]= start_i;
3601     survivor_count= 1;
3602
3603     for(i=start_i; i<=last_non_zero; i++){
3604         int level_index, j, zero_distortion;
3605         int dct_coeff= FFABS(block[ scantable[i] ]);
3606         int best_score=256*256*256*120;
3607
3608         if (s->fdsp.fdct == ff_fdct_ifast)
3609             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3610         zero_distortion= dct_coeff*dct_coeff;
3611
3612         for(level_index=0; level_index < coeff_count[i]; level_index++){
3613             int distortion;
3614             int level= coeff[level_index][i];
3615             const int alevel= FFABS(level);
3616             int unquant_coeff;
3617
3618             assert(level);
3619
3620             if(s->out_format == FMT_H263){
3621                 unquant_coeff= alevel*qmul + qadd;
3622             }else{ //MPEG1
3623                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3624                 if(s->mb_intra){
3625                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3626                         unquant_coeff =   (unquant_coeff - 1) | 1;
3627                 }else{
3628                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3629                         unquant_coeff =   (unquant_coeff - 1) | 1;
3630                 }
3631                 unquant_coeff<<= 3;
3632             }
3633
3634             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3635             level+=64;
3636             if((level&(~127)) == 0){
3637                 for(j=survivor_count-1; j>=0; j--){
3638                     int run= i - survivor[j];
3639                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3640                     score += score_tab[i-run];
3641
3642                     if(score < best_score){
3643                         best_score= score;
3644                         run_tab[i+1]= run;
3645                         level_tab[i+1]= level-64;
3646                     }
3647                 }
3648
3649                 if(s->out_format == FMT_H263){
3650                     for(j=survivor_count-1; j>=0; j--){
3651                         int run= i - survivor[j];
3652                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3653                         score += score_tab[i-run];
3654                         if(score < last_score){
3655                             last_score= score;
3656                             last_run= run;
3657                             last_level= level-64;
3658                             last_i= i+1;
3659                         }
3660                     }
3661                 }
3662             }else{
3663                 distortion += esc_length*lambda;
3664                 for(j=survivor_count-1; j>=0; j--){
3665                     int run= i - survivor[j];
3666                     int score= distortion + score_tab[i-run];
3667
3668                     if(score < best_score){
3669                         best_score= score;
3670                         run_tab[i+1]= run;
3671                         level_tab[i+1]= level-64;
3672                     }
3673                 }
3674
3675                 if(s->out_format == FMT_H263){
3676                   for(j=survivor_count-1; j>=0; j--){
3677                         int run= i - survivor[j];
3678                         int score= distortion + score_tab[i-run];
3679                         if(score < last_score){
3680                             last_score= score;
3681                             last_run= run;
3682                             last_level= level-64;
3683                             last_i= i+1;
3684                         }
3685                     }
3686                 }
3687             }
3688         }
3689
3690         score_tab[i+1]= best_score;
3691
3692         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3693         if(last_non_zero <= 27){
3694             for(; survivor_count; survivor_count--){
3695                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3696                     break;
3697             }
3698         }else{
3699             for(; survivor_count; survivor_count--){
3700                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3701                     break;
3702             }
3703         }
3704
3705         survivor[ survivor_count++ ]= i+1;
3706     }
3707
3708     if(s->out_format != FMT_H263){
3709         last_score= 256*256*256*120;
3710         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3711             int score= score_tab[i];
3712             if(i) score += lambda*2; //FIXME exacter?
3713
3714             if(score < last_score){
3715                 last_score= score;
3716                 last_i= i;
3717                 last_level= level_tab[i];
3718                 last_run= run_tab[i];
3719             }
3720         }
3721     }
3722
3723     s->coded_score[n] = last_score;
3724
3725     dc= FFABS(block[0]);
3726     last_non_zero= last_i - 1;
3727     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3728
3729     if(last_non_zero < start_i)
3730         return last_non_zero;
3731
3732     if(last_non_zero == 0 && start_i == 0){
3733         int best_level= 0;
3734         int best_score= dc * dc;
3735
3736         for(i=0; i<coeff_count[0]; i++){
3737             int level= coeff[i][0];
3738             int alevel= FFABS(level);
3739             int unquant_coeff, score, distortion;
3740
3741             if(s->out_format == FMT_H263){
3742                     unquant_coeff= (alevel*qmul + qadd)>>3;
3743             }else{ //MPEG1
3744                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3745                     unquant_coeff =   (unquant_coeff - 1) | 1;
3746             }
3747             unquant_coeff = (unquant_coeff + 4) >> 3;
3748             unquant_coeff<<= 3 + 3;
3749
3750             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3751             level+=64;
3752             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3753             else                    score= distortion + esc_length*lambda;
3754
3755             if(score < best_score){
3756                 best_score= score;
3757                 best_level= level - 64;
3758             }
3759         }
3760         block[0]= best_level;
3761         s->coded_score[n] = best_score - dc*dc;
3762         if(best_level == 0) return -1;
3763         else                return last_non_zero;
3764     }
3765
3766     i= last_i;
3767     assert(last_level);
3768
3769     block[ perm_scantable[last_non_zero] ]= last_level;
3770     i -= last_run + 1;
3771
3772     for(; i>start_i; i -= run_tab[i] + 1){
3773         block[ perm_scantable[i-1] ]= level_tab[i];
3774     }
3775
3776     return last_non_zero;
3777 }
3778
3779 //#define REFINE_STATS 1
3780 static int16_t basis[64][64];
3781
3782 static void build_basis(uint8_t *perm){
3783     int i, j, x, y;
3784     emms_c();
3785     for(i=0; i<8; i++){
3786         for(j=0; j<8; j++){
3787             for(y=0; y<8; y++){
3788                 for(x=0; x<8; x++){
3789                     double s= 0.25*(1<<BASIS_SHIFT);
3790                     int index= 8*i + j;
3791                     int perm_index= perm[index];
3792                     if(i==0) s*= sqrt(0.5);
3793                     if(j==0) s*= sqrt(0.5);
3794                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3795                 }
3796             }
3797         }
3798     }
3799 }
3800
3801 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3802                         int16_t *block, int16_t *weight, int16_t *orig,
3803                         int n, int qscale){
3804     int16_t rem[64];
3805     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3806     const uint8_t *scantable= s->intra_scantable.scantable;
3807     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3808 //    unsigned int threshold1, threshold2;
3809 //    int bias=0;
3810     int run_tab[65];
3811     int prev_run=0;
3812     int prev_level=0;
3813     int qmul, qadd, start_i, last_non_zero, i, dc;
3814     uint8_t * length;
3815     uint8_t * last_length;
3816     int lambda;
3817     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3818 #ifdef REFINE_STATS
3819 static int count=0;
3820 static int after_last=0;
3821 static int to_zero=0;
3822 static int from_zero=0;
3823 static int raise=0;
3824 static int lower=0;
3825 static int messed_sign=0;
3826 #endif
3827
3828     if(basis[0][0] == 0)
3829         build_basis(s->idsp.idct_permutation);
3830
3831     qmul= qscale*2;
3832     qadd= (qscale-1)|1;
3833     if (s->mb_intra) {
3834         if (!s->h263_aic) {
3835             if (n < 4)
3836                 q = s->y_dc_scale;
3837             else
3838                 q = s->c_dc_scale;
3839         } else{
3840             /* For AIC we skip quant/dequant of INTRADC */
3841             q = 1;
3842             qadd=0;
3843         }
3844         q <<= RECON_SHIFT-3;
3845         /* note: block[0] is assumed to be positive */
3846         dc= block[0]*q;
3847 //        block[0] = (block[0] + (q >> 1)) / q;
3848         start_i = 1;
3849 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3850 //            bias= 1<<(QMAT_SHIFT-1);
3851         length     = s->intra_ac_vlc_length;
3852         last_length= s->intra_ac_vlc_last_length;
3853     } else {
3854         dc= 0;
3855         start_i = 0;
3856         length     = s->inter_ac_vlc_length;
3857         last_length= s->inter_ac_vlc_last_length;
3858     }
3859     last_non_zero = s->block_last_index[n];
3860
3861 #ifdef REFINE_STATS
3862 {START_TIMER
3863 #endif
3864     dc += (1<<(RECON_SHIFT-1));
3865     for(i=0; i<64; i++){
3866         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3867     }
3868 #ifdef REFINE_STATS
3869 STOP_TIMER("memset rem[]")}
3870 #endif
3871     sum=0;
3872     for(i=0; i<64; i++){
3873         int one= 36;
3874         int qns=4;
3875         int w;
3876
3877         w= FFABS(weight[i]) + qns*one;
3878         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3879
3880         weight[i] = w;
3881 //        w=weight[i] = (63*qns + (w/2)) / w;
3882
3883         assert(w>0);
3884         assert(w<(1<<6));
3885         sum += w*w;
3886     }
3887     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3888 #ifdef REFINE_STATS
3889 {START_TIMER
3890 #endif
3891     run=0;
3892     rle_index=0;
3893     for(i=start_i; i<=last_non_zero; i++){
3894         int j= perm_scantable[i];
3895         const int level= block[j];
3896         int coeff;
3897
3898         if(level){
3899             if(level<0) coeff= qmul*level - qadd;
3900             else        coeff= qmul*level + qadd;
3901             run_tab[rle_index++]=run;
3902             run=0;
3903
3904             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3905         }else{
3906             run++;
3907         }
3908     }
3909 #ifdef REFINE_STATS
3910 if(last_non_zero>0){
3911 STOP_TIMER("init rem[]")
3912 }
3913 }
3914
3915 {START_TIMER
3916 #endif
3917     for(;;){
3918         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3919         int best_coeff=0;
3920         int best_change=0;
3921         int run2, best_unquant_change=0, analyze_gradient;
3922 #ifdef REFINE_STATS
3923 {START_TIMER
3924 #endif
3925         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3926
3927         if(analyze_gradient){
3928 #ifdef REFINE_STATS
3929 {START_TIMER
3930 #endif
3931             for(i=0; i<64; i++){
3932                 int w= weight[i];
3933
3934                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3935             }
3936 #ifdef REFINE_STATS
3937 STOP_TIMER("rem*w*w")}
3938 {START_TIMER
3939 #endif
3940             s->fdsp.fdct(d1);
3941 #ifdef REFINE_STATS
3942 STOP_TIMER("dct")}
3943 #endif
3944         }
3945
3946         if(start_i){
3947             const int level= block[0];
3948             int change, old_coeff;
3949
3950             assert(s->mb_intra);
3951
3952             old_coeff= q*level;
3953
3954             for(change=-1; change<=1; change+=2){
3955                 int new_level= level + change;
3956                 int score, new_coeff;
3957
3958                 new_coeff= q*new_level;
3959                 if(new_coeff >= 2048 || new_coeff < 0)
3960                     continue;
3961
3962                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3963                                                   new_coeff - old_coeff);
3964                 if(score<best_score){
3965                     best_score= score;
3966                     best_coeff= 0;
3967                     best_change= change;
3968                     best_unquant_change= new_coeff - old_coeff;
3969                 }
3970             }
3971         }
3972
3973         run=0;
3974         rle_index=0;
3975         run2= run_tab[rle_index++];
3976         prev_level=0;
3977         prev_run=0;
3978
3979         for(i=start_i; i<64; i++){
3980             int j= perm_scantable[i];
3981             const int level= block[j];
3982             int change, old_coeff;
3983
3984             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3985                 break;
3986
3987             if(level){
3988                 if(level<0) old_coeff= qmul*level - qadd;
3989                 else        old_coeff= qmul*level + qadd;
3990                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3991             }else{
3992                 old_coeff=0;
3993                 run2--;
3994                 assert(run2>=0 || i >= last_non_zero );
3995             }
3996
3997             for(change=-1; change<=1; change+=2){
3998                 int new_level= level + change;
3999                 int score, new_coeff, unquant_change;
4000
4001                 score=0;
4002                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4003                    continue;
4004
4005                 if(new_level){
4006                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4007                     else            new_coeff= qmul*new_level + qadd;
4008                     if(new_coeff >= 2048 || new_coeff <= -2048)
4009                         continue;
4010                     //FIXME check for overflow
4011
4012                     if(level){
4013                         if(level < 63 && level > -63){
4014                             if(i < last_non_zero)
4015                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4016                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4017                             else
4018                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4019                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4020                         }
4021                     }else{
4022                         assert(FFABS(new_level)==1);
4023
4024                         if(analyze_gradient){
4025                             int g= d1[ scantable[i] ];
4026                             if(g && (g^new_level) >= 0)
4027                                 continue;
4028                         }
4029
4030                         if(i < last_non_zero){
4031                             int next_i= i + run2 + 1;
4032                             int next_level= block[ perm_scantable[next_i] ] + 64;
4033
4034                             if(next_level&(~127))
4035                                 next_level= 0;
4036
4037                             if(next_i < last_non_zero)
4038                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4039                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4040                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4041                             else
4042                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4043                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4044                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4045                         }else{
4046                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4047                             if(prev_level){
4048                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4049                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4050                             }
4051                         }
4052                     }
4053                 }else{
4054                     new_coeff=0;
4055                     assert(FFABS(level)==1);
4056
4057                     if(i < last_non_zero){
4058                         int next_i= i + run2 + 1;
4059                         int next_level= block[ perm_scantable[next_i] ] + 64;
4060
4061                         if(next_level&(~127))
4062                             next_level= 0;
4063
4064                         if(next_i < last_non_zero)
4065                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4066                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4067                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4068                         else
4069                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4070                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4071                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4072                     }else{
4073                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4074                         if(prev_level){
4075                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4076                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4077                         }
4078                     }
4079                 }
4080
4081                 score *= lambda;
4082
4083                 unquant_change= new_coeff - old_coeff;
4084                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4085
4086                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4087                                                    unquant_change);
4088                 if(score<best_score){
4089                     best_score= score;
4090                     best_coeff= i;
4091                     best_change= change;
4092                     best_unquant_change= unquant_change;
4093                 }
4094             }
4095             if(level){
4096                 prev_level= level + 64;
4097                 if(prev_level&(~127))
4098                     prev_level= 0;
4099                 prev_run= run;
4100                 run=0;
4101             }else{
4102                 run++;
4103             }
4104         }
4105 #ifdef REFINE_STATS
4106 STOP_TIMER("iterative step")}
4107 #endif
4108
4109         if(best_change){
4110             int j= perm_scantable[ best_coeff ];
4111
4112             block[j] += best_change;
4113
4114             if(best_coeff > last_non_zero){
4115                 last_non_zero= best_coeff;
4116                 assert(block[j]);
4117 #ifdef REFINE_STATS
4118 after_last++;
4119 #endif
4120             }else{
4121 #ifdef REFINE_STATS
4122 if(block[j]){
4123     if(block[j] - best_change){
4124         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4125             raise++;
4126         }else{
4127             lower++;
4128         }
4129     }else{
4130         from_zero++;
4131     }
4132 }else{
4133     to_zero++;
4134 }
4135 #endif
4136                 for(; last_non_zero>=start_i; last_non_zero--){
4137                     if(block[perm_scantable[last_non_zero]])
4138                         break;
4139                 }
4140             }
4141 #ifdef REFINE_STATS
4142 count++;
4143 if(256*256*256*64 % count == 0){
4144     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4145 }
4146 #endif
4147             run=0;
4148             rle_index=0;
4149             for(i=start_i; i<=last_non_zero; i++){
4150                 int j= perm_scantable[i];
4151                 const int level= block[j];
4152
4153                  if(level){
4154                      run_tab[rle_index++]=run;
4155                      run=0;
4156                  }else{
4157                      run++;
4158                  }
4159             }
4160
4161             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4162         }else{
4163             break;
4164         }
4165     }
4166 #ifdef REFINE_STATS
4167 if(last_non_zero>0){
4168 STOP_TIMER("iterative search")
4169 }
4170 }
4171 #endif
4172
4173     return last_non_zero;
4174 }
4175
4176 int ff_dct_quantize_c(MpegEncContext *s,
4177                         int16_t *block, int n,
4178                         int qscale, int *overflow)
4179 {
4180     int i, j, level, last_non_zero, q, start_i;
4181     const int *qmat;
4182     const uint8_t *scantable= s->intra_scantable.scantable;
4183     int bias;
4184     int max=0;
4185     unsigned int threshold1, threshold2;
4186
4187     s->fdsp.fdct(block);
4188
4189     if(s->dct_error_sum)
4190         s->denoise_dct(s, block);
4191
4192     if (s->mb_intra) {
4193         if (!s->h263_aic) {
4194             if (n < 4)
4195                 q = s->y_dc_scale;
4196             else
4197                 q = s->c_dc_scale;
4198             q = q << 3;
4199         } else
4200             /* For AIC we skip quant/dequant of INTRADC */
4201             q = 1 << 3;
4202
4203         /* note: block[0] is assumed to be positive */
4204         block[0] = (block[0] + (q >> 1)) / q;
4205         start_i = 1;
4206         last_non_zero = 0;
4207         qmat = s->q_intra_matrix[qscale];
4208         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4209     } else {
4210         start_i = 0;
4211         last_non_zero = -1;
4212         qmat = s->q_inter_matrix[qscale];
4213         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4214     }
4215     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4216     threshold2= (threshold1<<1);
4217     for(i=63;i>=start_i;i--) {
4218         j = scantable[i];
4219         level = block[j] * qmat[j];
4220
4221         if(((unsigned)(level+threshold1))>threshold2){
4222             last_non_zero = i;
4223             break;
4224         }else{
4225             block[j]=0;
4226         }
4227     }
4228     for(i=start_i; i<=last_non_zero; i++) {
4229         j = scantable[i];
4230         level = block[j] * qmat[j];
4231
4232 //        if(   bias+level >= (1<<QMAT_SHIFT)
4233 //           || bias-level >= (1<<QMAT_SHIFT)){
4234         if(((unsigned)(level+threshold1))>threshold2){
4235             if(level>0){
4236                 level= (bias + level)>>QMAT_SHIFT;
4237                 block[j]= level;
4238             }else{
4239                 level= (bias - level)>>QMAT_SHIFT;
4240                 block[j]= -level;
4241             }
4242             max |=level;
4243         }else{
4244             block[j]=0;
4245         }
4246     }
4247     *overflow= s->max_qcoeff < max; //overflow might have happened
4248
4249     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4250     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4251         ff_block_permute(block, s->idsp.idct_permutation,
4252                          scantable, last_non_zero);
4253
4254     return last_non_zero;
4255 }
4256
4257 #define OFFSET(x) offsetof(MpegEncContext, x)
4258 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4259 static const AVOption h263_options[] = {
4260     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4261     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4262     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4263     FF_MPV_COMMON_OPTS
4264     { NULL },
4265 };
4266
4267 static const AVClass h263_class = {
4268     .class_name = "H.263 encoder",
4269     .item_name  = av_default_item_name,
4270     .option     = h263_options,
4271     .version    = LIBAVUTIL_VERSION_INT,
4272 };
4273
4274 AVCodec ff_h263_encoder = {
4275     .name           = "h263",
4276     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4277     .type           = AVMEDIA_TYPE_VIDEO,
4278     .id             = AV_CODEC_ID_H263,
4279     .priv_data_size = sizeof(MpegEncContext),
4280     .init           = ff_mpv_encode_init,
4281     .encode2        = ff_mpv_encode_picture,
4282     .close          = ff_mpv_encode_end,
4283     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4284     .priv_class     = &h263_class,
4285 };
4286
4287 static const AVOption h263p_options[] = {
4288     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4289     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4290     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4291     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4292     FF_MPV_COMMON_OPTS
4293     { NULL },
4294 };
4295 static const AVClass h263p_class = {
4296     .class_name = "H.263p encoder",
4297     .item_name  = av_default_item_name,
4298     .option     = h263p_options,
4299     .version    = LIBAVUTIL_VERSION_INT,
4300 };
4301
4302 AVCodec ff_h263p_encoder = {
4303     .name           = "h263p",
4304     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4305     .type           = AVMEDIA_TYPE_VIDEO,
4306     .id             = AV_CODEC_ID_H263P,
4307     .priv_data_size = sizeof(MpegEncContext),
4308     .init           = ff_mpv_encode_init,
4309     .encode2        = ff_mpv_encode_picture,
4310     .close          = ff_mpv_encode_end,
4311     .capabilities   = CODEC_CAP_SLICE_THREADS,
4312     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4313     .priv_class     = &h263p_class,
4314 };
4315
4316 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4317
4318 AVCodec ff_msmpeg4v2_encoder = {
4319     .name           = "msmpeg4v2",
4320     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4321     .type           = AVMEDIA_TYPE_VIDEO,
4322     .id             = AV_CODEC_ID_MSMPEG4V2,
4323     .priv_data_size = sizeof(MpegEncContext),
4324     .init           = ff_mpv_encode_init,
4325     .encode2        = ff_mpv_encode_picture,
4326     .close          = ff_mpv_encode_end,
4327     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4328     .priv_class     = &msmpeg4v2_class,
4329 };
4330
4331 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4332
4333 AVCodec ff_msmpeg4v3_encoder = {
4334     .name           = "msmpeg4",
4335     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4336     .type           = AVMEDIA_TYPE_VIDEO,
4337     .id             = AV_CODEC_ID_MSMPEG4V3,
4338     .priv_data_size = sizeof(MpegEncContext),
4339     .init           = ff_mpv_encode_init,
4340     .encode2        = ff_mpv_encode_picture,
4341     .close          = ff_mpv_encode_end,
4342     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4343     .priv_class     = &msmpeg4v3_class,
4344 };
4345
4346 FF_MPV_GENERIC_CLASS(wmv1)
4347
4348 AVCodec ff_wmv1_encoder = {
4349     .name           = "wmv1",
4350     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4351     .type           = AVMEDIA_TYPE_VIDEO,
4352     .id             = AV_CODEC_ID_WMV1,
4353     .priv_data_size = sizeof(MpegEncContext),
4354     .init           = ff_mpv_encode_init,
4355     .encode2        = ff_mpv_encode_picture,
4356     .close          = ff_mpv_encode_end,
4357     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4358     .priv_class     = &wmv1_class,
4359 };