]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
38c42fe19210f2fa6c5154987dea688ac6f338b7
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92 #if CONFIG_FAANDCT
93             fdsp->fdct == ff_faandct            ||
94 #endif /* CONFIG_FAANDCT */
95             fdsp->fdct == ff_jpeg_fdct_islow_10) {
96             for (i = 0; i < 64; i++) {
97                 const int j = s->idsp.idct_permutation[i];
98                 int64_t den = (int64_t) qscale * quant_matrix[j];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
106             }
107         } else if (fdsp->fdct == ff_fdct_ifast) {
108             for (i = 0; i < 64; i++) {
109                 const int j = s->idsp.idct_permutation[i];
110                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
111                 /* 16 <= qscale * quant_matrix[i] <= 7905
112                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
113                  *             19952 <=              x  <= 249205026
114                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
115                  *           3444240 >= (1 << 36) / (x) >= 275 */
116
117                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
118             }
119         } else {
120             for (i = 0; i < 64; i++) {
121                 const int j = s->idsp.idct_permutation[i];
122                 int64_t den = (int64_t) qscale * quant_matrix[j];
123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
124                  * Assume x = qscale * quant_matrix[i]
125                  * So             16 <=              x  <= 7905
126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
127                  * so          32768 >= (1 << 19) / (x) >= 67 */
128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
129                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
130                 //                    (qscale * quant_matrix[i]);
131                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
132
133                 if (qmat16[qscale][0][i] == 0 ||
134                     qmat16[qscale][0][i] == 128 * 256)
135                     qmat16[qscale][0][i] = 128 * 256 - 1;
136                 qmat16[qscale][1][i] =
137                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
138                                 qmat16[qscale][0][i]);
139             }
140         }
141
142         for (i = intra; i < 64; i++) {
143             int64_t max = 8191;
144             if (fdsp->fdct == ff_fdct_ifast) {
145                 max = (8191LL * ff_aanscales[i]) >> 14;
146             }
147             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
148                 shift++;
149             }
150         }
151     }
152     if (shift) {
153         av_log(NULL, AV_LOG_INFO,
154                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
155                QMAT_SHIFT - shift);
156     }
157 }
158
159 static inline void update_qscale(MpegEncContext *s)
160 {
161     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
162                 (FF_LAMBDA_SHIFT + 7);
163     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
164
165     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
166                  FF_LAMBDA_SHIFT;
167 }
168
169 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
170 {
171     int i;
172
173     if (matrix) {
174         put_bits(pb, 1, 1);
175         for (i = 0; i < 64; i++) {
176             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
177         }
178     } else
179         put_bits(pb, 1, 0);
180 }
181
182 /**
183  * init s->current_picture.qscale_table from s->lambda_table
184  */
185 void ff_init_qscale_tab(MpegEncContext *s)
186 {
187     int8_t * const qscale_table = s->current_picture.qscale_table;
188     int i;
189
190     for (i = 0; i < s->mb_num; i++) {
191         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
192         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
193         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
194                                                   s->avctx->qmax);
195     }
196 }
197
198 static void update_duplicate_context_after_me(MpegEncContext *dst,
199                                               MpegEncContext *src)
200 {
201 #define COPY(a) dst->a= src->a
202     COPY(pict_type);
203     COPY(current_picture);
204     COPY(f_code);
205     COPY(b_code);
206     COPY(qscale);
207     COPY(lambda);
208     COPY(lambda2);
209     COPY(picture_in_gop_number);
210     COPY(gop_picture_number);
211     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
212     COPY(progressive_frame);    // FIXME don't set in encode_header
213     COPY(partitioned_frame);    // FIXME don't set in encode_header
214 #undef COPY
215 }
216
217 /**
218  * Set the given MpegEncContext to defaults for encoding.
219  * the changed fields will not depend upon the prior state of the MpegEncContext.
220  */
221 static void mpv_encode_defaults(MpegEncContext *s)
222 {
223     int i;
224     ff_mpv_common_defaults(s);
225
226     for (i = -16; i < 16; i++) {
227         default_fcode_tab[i + MAX_MV] = 1;
228     }
229     s->me.mv_penalty = default_mv_penalty;
230     s->fcode_tab     = default_fcode_tab;
231
232     s->input_picture_number  = 0;
233     s->picture_in_gop_number = 0;
234 }
235
236 /* init video encoder */
237 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
238 {
239     MpegEncContext *s = avctx->priv_data;
240     int i, ret, format_supported;
241
242     mpv_encode_defaults(s);
243
244     switch (avctx->codec_id) {
245     case AV_CODEC_ID_MPEG2VIDEO:
246         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
247             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
248             av_log(avctx, AV_LOG_ERROR,
249                    "only YUV420 and YUV422 are supported\n");
250             return -1;
251         }
252         break;
253     case AV_CODEC_ID_MJPEG:
254         format_supported = 0;
255         /* JPEG color space */
256         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
257             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
258             (avctx->color_range == AVCOL_RANGE_JPEG &&
259              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
260               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
261             format_supported = 1;
262         /* MPEG color space */
263         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
264                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
265                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
266             format_supported = 1;
267
268         if (!format_supported) {
269             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
270             return -1;
271         }
272         break;
273     default:
274         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
275             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
276             return -1;
277         }
278     }
279
280     switch (avctx->pix_fmt) {
281     case AV_PIX_FMT_YUVJ422P:
282     case AV_PIX_FMT_YUV422P:
283         s->chroma_format = CHROMA_422;
284         break;
285     case AV_PIX_FMT_YUVJ420P:
286     case AV_PIX_FMT_YUV420P:
287     default:
288         s->chroma_format = CHROMA_420;
289         break;
290     }
291
292     s->bit_rate = avctx->bit_rate;
293     s->width    = avctx->width;
294     s->height   = avctx->height;
295     if (avctx->gop_size > 600 &&
296         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
297         av_log(avctx, AV_LOG_ERROR,
298                "Warning keyframe interval too large! reducing it ...\n");
299         avctx->gop_size = 600;
300     }
301     s->gop_size     = avctx->gop_size;
302     s->avctx        = avctx;
303     s->flags        = avctx->flags;
304     s->flags2       = avctx->flags2;
305     if (avctx->max_b_frames > MAX_B_FRAMES) {
306         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
307                "is %d.\n", MAX_B_FRAMES);
308     }
309     s->max_b_frames = avctx->max_b_frames;
310     s->codec_id     = avctx->codec->id;
311     s->strict_std_compliance = avctx->strict_std_compliance;
312     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
313     s->mpeg_quant         = avctx->mpeg_quant;
314     s->rtp_mode           = !!avctx->rtp_payload_size;
315     s->intra_dc_precision = avctx->intra_dc_precision;
316     s->user_specified_pts = AV_NOPTS_VALUE;
317
318     if (s->gop_size <= 1) {
319         s->intra_only = 1;
320         s->gop_size   = 12;
321     } else {
322         s->intra_only = 0;
323     }
324
325     s->me_method = avctx->me_method;
326
327     /* Fixed QSCALE */
328     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
329
330 #if FF_API_MPV_OPT
331     FF_DISABLE_DEPRECATION_WARNINGS
332     if (avctx->border_masking != 0.0)
333         s->border_masking = avctx->border_masking;
334     FF_ENABLE_DEPRECATION_WARNINGS
335 #endif
336
337     s->adaptive_quant = (s->avctx->lumi_masking ||
338                          s->avctx->dark_masking ||
339                          s->avctx->temporal_cplx_masking ||
340                          s->avctx->spatial_cplx_masking  ||
341                          s->avctx->p_masking      ||
342                          s->border_masking ||
343                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
344                         !s->fixed_qscale;
345
346     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
347
348     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
349         av_log(avctx, AV_LOG_ERROR,
350                "a vbv buffer size is needed, "
351                "for encoding with a maximum bitrate\n");
352         return -1;
353     }
354
355     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
356         av_log(avctx, AV_LOG_INFO,
357                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
358     }
359
360     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
361         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
362         return -1;
363     }
364
365     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
366         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
367         return -1;
368     }
369
370     if (avctx->rc_max_rate &&
371         avctx->rc_max_rate == avctx->bit_rate &&
372         avctx->rc_max_rate != avctx->rc_min_rate) {
373         av_log(avctx, AV_LOG_INFO,
374                "impossible bitrate constraints, this will fail\n");
375     }
376
377     if (avctx->rc_buffer_size &&
378         avctx->bit_rate * (int64_t)avctx->time_base.num >
379             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
380         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
381         return -1;
382     }
383
384     if (!s->fixed_qscale &&
385         avctx->bit_rate * av_q2d(avctx->time_base) >
386             avctx->bit_rate_tolerance) {
387         av_log(avctx, AV_LOG_ERROR,
388                "bitrate tolerance too small for bitrate\n");
389         return -1;
390     }
391
392     if (s->avctx->rc_max_rate &&
393         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
394         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
395          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
396         90000LL * (avctx->rc_buffer_size - 1) >
397             s->avctx->rc_max_rate * 0xFFFFLL) {
398         av_log(avctx, AV_LOG_INFO,
399                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
400                "specified vbv buffer is too large for the given bitrate!\n");
401     }
402
403     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
404         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
405         s->codec_id != AV_CODEC_ID_FLV1) {
406         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
407         return -1;
408     }
409
410     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
411         av_log(avctx, AV_LOG_ERROR,
412                "OBMC is only supported with simple mb decision\n");
413         return -1;
414     }
415
416     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
417         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
418         return -1;
419     }
420
421     if (s->max_b_frames                    &&
422         s->codec_id != AV_CODEC_ID_MPEG4      &&
423         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
424         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
425         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
426         return -1;
427     }
428
429     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
430          s->codec_id == AV_CODEC_ID_H263  ||
431          s->codec_id == AV_CODEC_ID_H263P) &&
432         (avctx->sample_aspect_ratio.num > 255 ||
433          avctx->sample_aspect_ratio.den > 255)) {
434         av_log(avctx, AV_LOG_ERROR,
435                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
436                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
437         return -1;
438     }
439
440     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
441         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
442         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
443         return -1;
444     }
445
446     // FIXME mpeg2 uses that too
447     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
448         av_log(avctx, AV_LOG_ERROR,
449                "mpeg2 style quantization not supported by codec\n");
450         return -1;
451     }
452
453     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
454         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
455         return -1;
456     }
457
458     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
459         s->avctx->mb_decision != FF_MB_DECISION_RD) {
460         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
461         return -1;
462     }
463
464     if (s->avctx->scenechange_threshold < 1000000000 &&
465         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
466         av_log(avctx, AV_LOG_ERROR,
467                "closed gop with scene change detection are not supported yet, "
468                "set threshold to 1000000000\n");
469         return -1;
470     }
471
472     if (s->flags & CODEC_FLAG_LOW_DELAY) {
473         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
474             av_log(avctx, AV_LOG_ERROR,
475                   "low delay forcing is only available for mpeg2\n");
476             return -1;
477         }
478         if (s->max_b_frames != 0) {
479             av_log(avctx, AV_LOG_ERROR,
480                    "b frames cannot be used with low delay\n");
481             return -1;
482         }
483     }
484
485     if (s->q_scale_type == 1) {
486         if (avctx->qmax > 12) {
487             av_log(avctx, AV_LOG_ERROR,
488                    "non linear quant only supports qmax <= 12 currently\n");
489             return -1;
490         }
491     }
492
493     if (s->avctx->thread_count > 1         &&
494         s->codec_id != AV_CODEC_ID_MPEG4      &&
495         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
496         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
497         (s->codec_id != AV_CODEC_ID_H263P)) {
498         av_log(avctx, AV_LOG_ERROR,
499                "multi threaded encoding not supported by codec\n");
500         return -1;
501     }
502
503     if (s->avctx->thread_count < 1) {
504         av_log(avctx, AV_LOG_ERROR,
505                "automatic thread number detection not supported by codec,"
506                "patch welcome\n");
507         return -1;
508     }
509
510     if (s->avctx->thread_count > 1)
511         s->rtp_mode = 1;
512
513     if (!avctx->time_base.den || !avctx->time_base.num) {
514         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
515         return -1;
516     }
517
518     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
519         av_log(avctx, AV_LOG_INFO,
520                "notice: b_frame_strategy only affects the first pass\n");
521         avctx->b_frame_strategy = 0;
522     }
523
524     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
525     if (i > 1) {
526         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
527         avctx->time_base.den /= i;
528         avctx->time_base.num /= i;
529         //return -1;
530     }
531
532     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
533         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
534         // (a + x * 3 / 8) / x
535         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
536         s->inter_quant_bias = 0;
537     } else {
538         s->intra_quant_bias = 0;
539         // (a - x / 4) / x
540         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
541     }
542
543     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
544         s->intra_quant_bias = avctx->intra_quant_bias;
545     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
546         s->inter_quant_bias = avctx->inter_quant_bias;
547
548     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
549         s->avctx->time_base.den > (1 << 16) - 1) {
550         av_log(avctx, AV_LOG_ERROR,
551                "timebase %d/%d not supported by MPEG 4 standard, "
552                "the maximum admitted value for the timebase denominator "
553                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
554                (1 << 16) - 1);
555         return -1;
556     }
557     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
558
559     switch (avctx->codec->id) {
560     case AV_CODEC_ID_MPEG1VIDEO:
561         s->out_format = FMT_MPEG1;
562         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
563         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
564         break;
565     case AV_CODEC_ID_MPEG2VIDEO:
566         s->out_format = FMT_MPEG1;
567         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
568         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
569         s->rtp_mode   = 1;
570         break;
571     case AV_CODEC_ID_MJPEG:
572         s->out_format = FMT_MJPEG;
573         s->intra_only = 1; /* force intra only for jpeg */
574         if (!CONFIG_MJPEG_ENCODER ||
575             ff_mjpeg_encode_init(s) < 0)
576             return -1;
577         avctx->delay = 0;
578         s->low_delay = 1;
579         break;
580     case AV_CODEC_ID_H261:
581         if (!CONFIG_H261_ENCODER)
582             return -1;
583         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
584             av_log(avctx, AV_LOG_ERROR,
585                    "The specified picture size of %dx%d is not valid for the "
586                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
587                     s->width, s->height);
588             return -1;
589         }
590         s->out_format = FMT_H261;
591         avctx->delay  = 0;
592         s->low_delay  = 1;
593         s->rtp_mode   = 0; /* Sliced encoding not supported */
594         break;
595     case AV_CODEC_ID_H263:
596         if (!CONFIG_H263_ENCODER)
597         return -1;
598         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
599                              s->width, s->height) == 8) {
600             av_log(avctx, AV_LOG_INFO,
601                    "The specified picture size of %dx%d is not valid for "
602                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
603                    "352x288, 704x576, and 1408x1152."
604                    "Try H.263+.\n", s->width, s->height);
605             return -1;
606         }
607         s->out_format = FMT_H263;
608         avctx->delay  = 0;
609         s->low_delay  = 1;
610         break;
611     case AV_CODEC_ID_H263P:
612         s->out_format = FMT_H263;
613         s->h263_plus  = 1;
614         /* Fx */
615         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
616         s->modified_quant  = s->h263_aic;
617         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
618         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
619
620         /* /Fx */
621         /* These are just to be sure */
622         avctx->delay = 0;
623         s->low_delay = 1;
624         break;
625     case AV_CODEC_ID_FLV1:
626         s->out_format      = FMT_H263;
627         s->h263_flv        = 2; /* format = 1; 11-bit codes */
628         s->unrestricted_mv = 1;
629         s->rtp_mode  = 0; /* don't allow GOB */
630         avctx->delay = 0;
631         s->low_delay = 1;
632         break;
633     case AV_CODEC_ID_RV10:
634         s->out_format = FMT_H263;
635         avctx->delay  = 0;
636         s->low_delay  = 1;
637         break;
638     case AV_CODEC_ID_RV20:
639         s->out_format      = FMT_H263;
640         avctx->delay       = 0;
641         s->low_delay       = 1;
642         s->modified_quant  = 1;
643         s->h263_aic        = 1;
644         s->h263_plus       = 1;
645         s->loop_filter     = 1;
646         s->unrestricted_mv = 0;
647         break;
648     case AV_CODEC_ID_MPEG4:
649         s->out_format      = FMT_H263;
650         s->h263_pred       = 1;
651         s->unrestricted_mv = 1;
652         s->low_delay       = s->max_b_frames ? 0 : 1;
653         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
654         break;
655     case AV_CODEC_ID_MSMPEG4V2:
656         s->out_format      = FMT_H263;
657         s->h263_pred       = 1;
658         s->unrestricted_mv = 1;
659         s->msmpeg4_version = 2;
660         avctx->delay       = 0;
661         s->low_delay       = 1;
662         break;
663     case AV_CODEC_ID_MSMPEG4V3:
664         s->out_format        = FMT_H263;
665         s->h263_pred         = 1;
666         s->unrestricted_mv   = 1;
667         s->msmpeg4_version   = 3;
668         s->flipflop_rounding = 1;
669         avctx->delay         = 0;
670         s->low_delay         = 1;
671         break;
672     case AV_CODEC_ID_WMV1:
673         s->out_format        = FMT_H263;
674         s->h263_pred         = 1;
675         s->unrestricted_mv   = 1;
676         s->msmpeg4_version   = 4;
677         s->flipflop_rounding = 1;
678         avctx->delay         = 0;
679         s->low_delay         = 1;
680         break;
681     case AV_CODEC_ID_WMV2:
682         s->out_format        = FMT_H263;
683         s->h263_pred         = 1;
684         s->unrestricted_mv   = 1;
685         s->msmpeg4_version   = 5;
686         s->flipflop_rounding = 1;
687         avctx->delay         = 0;
688         s->low_delay         = 1;
689         break;
690     default:
691         return -1;
692     }
693
694     avctx->has_b_frames = !s->low_delay;
695
696     s->encoding = 1;
697
698     s->progressive_frame    =
699     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
700                                                 CODEC_FLAG_INTERLACED_ME) ||
701                                 s->alternate_scan);
702
703     /* init */
704     ff_mpv_idct_init(s);
705     if (ff_mpv_common_init(s) < 0)
706         return -1;
707
708     if (ARCH_X86)
709         ff_mpv_encode_init_x86(s);
710
711     ff_fdctdsp_init(&s->fdsp, avctx);
712     ff_me_cmp_init(&s->mecc, avctx);
713     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
714     ff_pixblockdsp_init(&s->pdsp, avctx);
715     ff_qpeldsp_init(&s->qdsp);
716
717     s->avctx->coded_frame = s->current_picture.f;
718
719     if (s->msmpeg4_version) {
720         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
721                           2 * 2 * (MAX_LEVEL + 1) *
722                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
723     }
724     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
725
726     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
730     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
731                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
732     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
733                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
734
735     if (s->avctx->noise_reduction) {
736         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
737                           2 * 64 * sizeof(uint16_t), fail);
738     }
739
740     if (CONFIG_H263_ENCODER)
741         ff_h263dsp_init(&s->h263dsp);
742     if (!s->dct_quantize)
743         s->dct_quantize = ff_dct_quantize_c;
744     if (!s->denoise_dct)
745         s->denoise_dct  = denoise_dct_c;
746     s->fast_dct_quantize = s->dct_quantize;
747     if (avctx->trellis)
748         s->dct_quantize  = dct_quantize_trellis_c;
749
750     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
751         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
752
753     s->quant_precision = 5;
754
755     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
756     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
757
758     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
759         ff_h261_encode_init(s);
760     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
761         ff_h263_encode_init(s);
762     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
763         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
764             return ret;
765     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
766         && s->out_format == FMT_MPEG1)
767         ff_mpeg1_encode_init(s);
768
769     /* init q matrix */
770     for (i = 0; i < 64; i++) {
771         int j = s->idsp.idct_permutation[i];
772         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
773             s->mpeg_quant) {
774             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
775             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
776         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
777             s->intra_matrix[j] =
778             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
779         } else {
780             /* mpeg1/2 */
781             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
782             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
783         }
784         if (s->avctx->intra_matrix)
785             s->intra_matrix[j] = s->avctx->intra_matrix[i];
786         if (s->avctx->inter_matrix)
787             s->inter_matrix[j] = s->avctx->inter_matrix[i];
788     }
789
790     /* precompute matrix */
791     /* for mjpeg, we do include qscale in the matrix */
792     if (s->out_format != FMT_MJPEG) {
793         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
794                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
795                           31, 1);
796         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
797                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
798                           31, 0);
799     }
800
801     if (ff_rate_control_init(s) < 0)
802         return -1;
803
804 #if FF_API_ERROR_RATE
805     FF_DISABLE_DEPRECATION_WARNINGS
806     if (avctx->error_rate)
807         s->error_rate = avctx->error_rate;
808     FF_ENABLE_DEPRECATION_WARNINGS;
809 #endif
810
811 #if FF_API_NORMALIZE_AQP
812     FF_DISABLE_DEPRECATION_WARNINGS
813     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
814         s->mpv_flags |= FF_MPV_FLAG_NAQ;
815     FF_ENABLE_DEPRECATION_WARNINGS;
816 #endif
817
818 #if FF_API_MV0
819     FF_DISABLE_DEPRECATION_WARNINGS
820     if (avctx->flags & CODEC_FLAG_MV0)
821         s->mpv_flags |= FF_MPV_FLAG_MV0;
822     FF_ENABLE_DEPRECATION_WARNINGS
823 #endif
824
825 #if FF_API_MPV_OPT
826     FF_DISABLE_DEPRECATION_WARNINGS
827     if (avctx->rc_qsquish != 0.0)
828         s->rc_qsquish = avctx->rc_qsquish;
829     if (avctx->rc_qmod_amp != 0.0)
830         s->rc_qmod_amp = avctx->rc_qmod_amp;
831     if (avctx->rc_qmod_freq)
832         s->rc_qmod_freq = avctx->rc_qmod_freq;
833     if (avctx->rc_buffer_aggressivity != 1.0)
834         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
835     if (avctx->rc_initial_cplx != 0.0)
836         s->rc_initial_cplx = avctx->rc_initial_cplx;
837     if (avctx->lmin)
838         s->lmin = avctx->lmin;
839     if (avctx->lmax)
840         s->lmax = avctx->lmax;
841
842     if (avctx->rc_eq) {
843         av_freep(&s->rc_eq);
844         s->rc_eq = av_strdup(avctx->rc_eq);
845         if (!s->rc_eq)
846             return AVERROR(ENOMEM);
847     }
848     FF_ENABLE_DEPRECATION_WARNINGS
849 #endif
850
851     if (avctx->b_frame_strategy == 2) {
852         for (i = 0; i < s->max_b_frames + 2; i++) {
853             s->tmp_frames[i] = av_frame_alloc();
854             if (!s->tmp_frames[i])
855                 return AVERROR(ENOMEM);
856
857             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
858             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
859             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
860
861             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
862             if (ret < 0)
863                 return ret;
864         }
865     }
866
867     return 0;
868 fail:
869     ff_mpv_encode_end(avctx);
870     return AVERROR_UNKNOWN;
871 }
872
873 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
874 {
875     MpegEncContext *s = avctx->priv_data;
876     int i;
877
878     ff_rate_control_uninit(s);
879
880     ff_mpv_common_end(s);
881     if (CONFIG_MJPEG_ENCODER &&
882         s->out_format == FMT_MJPEG)
883         ff_mjpeg_encode_close(s);
884
885     av_freep(&avctx->extradata);
886
887     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
888         av_frame_free(&s->tmp_frames[i]);
889
890     ff_free_picture_tables(&s->new_picture);
891     ff_mpeg_unref_picture(s, &s->new_picture);
892
893     av_freep(&s->avctx->stats_out);
894     av_freep(&s->ac_stats);
895
896     av_freep(&s->q_intra_matrix);
897     av_freep(&s->q_inter_matrix);
898     av_freep(&s->q_intra_matrix16);
899     av_freep(&s->q_inter_matrix16);
900     av_freep(&s->input_picture);
901     av_freep(&s->reordered_input_picture);
902     av_freep(&s->dct_offset);
903
904     return 0;
905 }
906
907 static int get_sae(uint8_t *src, int ref, int stride)
908 {
909     int x,y;
910     int acc = 0;
911
912     for (y = 0; y < 16; y++) {
913         for (x = 0; x < 16; x++) {
914             acc += FFABS(src[x + y * stride] - ref);
915         }
916     }
917
918     return acc;
919 }
920
921 static int get_intra_count(MpegEncContext *s, uint8_t *src,
922                            uint8_t *ref, int stride)
923 {
924     int x, y, w, h;
925     int acc = 0;
926
927     w = s->width  & ~15;
928     h = s->height & ~15;
929
930     for (y = 0; y < h; y += 16) {
931         for (x = 0; x < w; x += 16) {
932             int offset = x + y * stride;
933             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
934                                       stride, 16);
935             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
936             int sae  = get_sae(src + offset, mean, stride);
937
938             acc += sae + 500 < sad;
939         }
940     }
941     return acc;
942 }
943
944
945 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
946 {
947     Picture *pic = NULL;
948     int64_t pts;
949     int i, display_picture_number = 0, ret;
950     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
951                                                  (s->low_delay ? 0 : 1);
952     int direct = 1;
953
954     if (pic_arg) {
955         pts = pic_arg->pts;
956         display_picture_number = s->input_picture_number++;
957
958         if (pts != AV_NOPTS_VALUE) {
959             if (s->user_specified_pts != AV_NOPTS_VALUE) {
960                 int64_t time = pts;
961                 int64_t last = s->user_specified_pts;
962
963                 if (time <= last) {
964                     av_log(s->avctx, AV_LOG_ERROR,
965                            "Error, Invalid timestamp=%"PRId64", "
966                            "last=%"PRId64"\n", pts, s->user_specified_pts);
967                     return -1;
968                 }
969
970                 if (!s->low_delay && display_picture_number == 1)
971                     s->dts_delta = time - last;
972             }
973             s->user_specified_pts = pts;
974         } else {
975             if (s->user_specified_pts != AV_NOPTS_VALUE) {
976                 s->user_specified_pts =
977                 pts = s->user_specified_pts + 1;
978                 av_log(s->avctx, AV_LOG_INFO,
979                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
980                        pts);
981             } else {
982                 pts = display_picture_number;
983             }
984         }
985     }
986
987     if (pic_arg) {
988         if (!pic_arg->buf[0] ||
989             pic_arg->linesize[0] != s->linesize ||
990             pic_arg->linesize[1] != s->uvlinesize ||
991             pic_arg->linesize[2] != s->uvlinesize)
992             direct = 0;
993         if ((s->width & 15) || (s->height & 15))
994             direct = 0;
995
996         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
997                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
998
999         i = ff_find_unused_picture(s, direct);
1000         if (i < 0)
1001             return i;
1002
1003         pic = &s->picture[i];
1004         pic->reference = 3;
1005
1006         if (direct) {
1007             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1008                 return ret;
1009             if (ff_alloc_picture(s, pic, 1) < 0) {
1010                 return -1;
1011             }
1012         } else {
1013             if (ff_alloc_picture(s, pic, 0) < 0) {
1014                 return -1;
1015             }
1016
1017             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1018                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1019                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1020                 // empty
1021             } else {
1022                 int h_chroma_shift, v_chroma_shift;
1023                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1024                                                  &h_chroma_shift,
1025                                                  &v_chroma_shift);
1026
1027                 for (i = 0; i < 3; i++) {
1028                     int src_stride = pic_arg->linesize[i];
1029                     int dst_stride = i ? s->uvlinesize : s->linesize;
1030                     int h_shift = i ? h_chroma_shift : 0;
1031                     int v_shift = i ? v_chroma_shift : 0;
1032                     int w = s->width  >> h_shift;
1033                     int h = s->height >> v_shift;
1034                     uint8_t *src = pic_arg->data[i];
1035                     uint8_t *dst = pic->f->data[i];
1036
1037                     if (!s->avctx->rc_buffer_size)
1038                         dst += INPLACE_OFFSET;
1039
1040                     if (src_stride == dst_stride)
1041                         memcpy(dst, src, src_stride * h);
1042                     else {
1043                         int h2 = h;
1044                         uint8_t *dst2 = dst;
1045                         while (h2--) {
1046                             memcpy(dst2, src, w);
1047                             dst2 += dst_stride;
1048                             src += src_stride;
1049                         }
1050                     }
1051                     if ((s->width & 15) || (s->height & 15)) {
1052                         s->mpvencdsp.draw_edges(dst, dst_stride,
1053                                                 w, h,
1054                                                 16 >> h_shift,
1055                                                 16 >> v_shift,
1056                                                 EDGE_BOTTOM);
1057                     }
1058                 }
1059             }
1060         }
1061         ret = av_frame_copy_props(pic->f, pic_arg);
1062         if (ret < 0)
1063             return ret;
1064
1065         pic->f->display_picture_number = display_picture_number;
1066         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1067     }
1068
1069     /* shift buffer entries */
1070     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1071         s->input_picture[i - 1] = s->input_picture[i];
1072
1073     s->input_picture[encoding_delay] = (Picture*) pic;
1074
1075     return 0;
1076 }
1077
1078 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1079 {
1080     int x, y, plane;
1081     int score = 0;
1082     int64_t score64 = 0;
1083
1084     for (plane = 0; plane < 3; plane++) {
1085         const int stride = p->f->linesize[plane];
1086         const int bw = plane ? 1 : 2;
1087         for (y = 0; y < s->mb_height * bw; y++) {
1088             for (x = 0; x < s->mb_width * bw; x++) {
1089                 int off = p->shared ? 0 : 16;
1090                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1091                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1092                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1093
1094                 switch (s->avctx->frame_skip_exp) {
1095                 case 0: score    =  FFMAX(score, v);          break;
1096                 case 1: score   += FFABS(v);                  break;
1097                 case 2: score   += v * v;                     break;
1098                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1099                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1100                 }
1101             }
1102         }
1103     }
1104
1105     if (score)
1106         score64 = score;
1107
1108     if (score64 < s->avctx->frame_skip_threshold)
1109         return 1;
1110     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1111         return 1;
1112     return 0;
1113 }
1114
1115 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1116 {
1117     AVPacket pkt = { 0 };
1118     int ret, got_output;
1119
1120     av_init_packet(&pkt);
1121     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1122     if (ret < 0)
1123         return ret;
1124
1125     ret = pkt.size;
1126     av_free_packet(&pkt);
1127     return ret;
1128 }
1129
1130 static int estimate_best_b_count(MpegEncContext *s)
1131 {
1132     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1133     AVCodecContext *c = avcodec_alloc_context3(NULL);
1134     const int scale = s->avctx->brd_scale;
1135     int i, j, out_size, p_lambda, b_lambda, lambda2;
1136     int64_t best_rd  = INT64_MAX;
1137     int best_b_count = -1;
1138
1139     assert(scale >= 0 && scale <= 3);
1140
1141     //emms_c();
1142     //s->next_picture_ptr->quality;
1143     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1144     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1145     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1146     if (!b_lambda) // FIXME we should do this somewhere else
1147         b_lambda = p_lambda;
1148     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1149                FF_LAMBDA_SHIFT;
1150
1151     c->width        = s->width  >> scale;
1152     c->height       = s->height >> scale;
1153     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1154     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1155     c->mb_decision  = s->avctx->mb_decision;
1156     c->me_cmp       = s->avctx->me_cmp;
1157     c->mb_cmp       = s->avctx->mb_cmp;
1158     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1159     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1160     c->time_base    = s->avctx->time_base;
1161     c->max_b_frames = s->max_b_frames;
1162
1163     if (avcodec_open2(c, codec, NULL) < 0)
1164         return -1;
1165
1166     for (i = 0; i < s->max_b_frames + 2; i++) {
1167         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1168                                                 s->next_picture_ptr;
1169
1170         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1171             pre_input = *pre_input_ptr;
1172
1173             if (!pre_input.shared && i) {
1174                 pre_input.f->data[0] += INPLACE_OFFSET;
1175                 pre_input.f->data[1] += INPLACE_OFFSET;
1176                 pre_input.f->data[2] += INPLACE_OFFSET;
1177             }
1178
1179             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1180                                        s->tmp_frames[i]->linesize[0],
1181                                        pre_input.f->data[0],
1182                                        pre_input.f->linesize[0],
1183                                        c->width, c->height);
1184             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1185                                        s->tmp_frames[i]->linesize[1],
1186                                        pre_input.f->data[1],
1187                                        pre_input.f->linesize[1],
1188                                        c->width >> 1, c->height >> 1);
1189             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1190                                        s->tmp_frames[i]->linesize[2],
1191                                        pre_input.f->data[2],
1192                                        pre_input.f->linesize[2],
1193                                        c->width >> 1, c->height >> 1);
1194         }
1195     }
1196
1197     for (j = 0; j < s->max_b_frames + 1; j++) {
1198         int64_t rd = 0;
1199
1200         if (!s->input_picture[j])
1201             break;
1202
1203         c->error[0] = c->error[1] = c->error[2] = 0;
1204
1205         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1206         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1207
1208         out_size = encode_frame(c, s->tmp_frames[0]);
1209
1210         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1211
1212         for (i = 0; i < s->max_b_frames + 1; i++) {
1213             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1214
1215             s->tmp_frames[i + 1]->pict_type = is_p ?
1216                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1217             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1218
1219             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1220
1221             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1222         }
1223
1224         /* get the delayed frames */
1225         while (out_size) {
1226             out_size = encode_frame(c, NULL);
1227             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1228         }
1229
1230         rd += c->error[0] + c->error[1] + c->error[2];
1231
1232         if (rd < best_rd) {
1233             best_rd = rd;
1234             best_b_count = j;
1235         }
1236     }
1237
1238     avcodec_close(c);
1239     av_freep(&c);
1240
1241     return best_b_count;
1242 }
1243
1244 static int select_input_picture(MpegEncContext *s)
1245 {
1246     int i, ret;
1247
1248     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1249         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1250     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1251
1252     /* set next picture type & ordering */
1253     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1254         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1255             !s->next_picture_ptr || s->intra_only) {
1256             s->reordered_input_picture[0] = s->input_picture[0];
1257             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1258             s->reordered_input_picture[0]->f->coded_picture_number =
1259                 s->coded_picture_number++;
1260         } else {
1261             int b_frames;
1262
1263             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1264                 if (s->picture_in_gop_number < s->gop_size &&
1265                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1266                     // FIXME check that te gop check above is +-1 correct
1267                     av_frame_unref(s->input_picture[0]->f);
1268
1269                     emms_c();
1270                     ff_vbv_update(s, 0);
1271
1272                     goto no_output_pic;
1273                 }
1274             }
1275
1276             if (s->flags & CODEC_FLAG_PASS2) {
1277                 for (i = 0; i < s->max_b_frames + 1; i++) {
1278                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1279
1280                     if (pict_num >= s->rc_context.num_entries)
1281                         break;
1282                     if (!s->input_picture[i]) {
1283                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1284                         break;
1285                     }
1286
1287                     s->input_picture[i]->f->pict_type =
1288                         s->rc_context.entry[pict_num].new_pict_type;
1289                 }
1290             }
1291
1292             if (s->avctx->b_frame_strategy == 0) {
1293                 b_frames = s->max_b_frames;
1294                 while (b_frames && !s->input_picture[b_frames])
1295                     b_frames--;
1296             } else if (s->avctx->b_frame_strategy == 1) {
1297                 for (i = 1; i < s->max_b_frames + 1; i++) {
1298                     if (s->input_picture[i] &&
1299                         s->input_picture[i]->b_frame_score == 0) {
1300                         s->input_picture[i]->b_frame_score =
1301                             get_intra_count(s,
1302                                             s->input_picture[i    ]->f->data[0],
1303                                             s->input_picture[i - 1]->f->data[0],
1304                                             s->linesize) + 1;
1305                     }
1306                 }
1307                 for (i = 0; i < s->max_b_frames + 1; i++) {
1308                     if (!s->input_picture[i] ||
1309                         s->input_picture[i]->b_frame_score - 1 >
1310                             s->mb_num / s->avctx->b_sensitivity)
1311                         break;
1312                 }
1313
1314                 b_frames = FFMAX(0, i - 1);
1315
1316                 /* reset scores */
1317                 for (i = 0; i < b_frames + 1; i++) {
1318                     s->input_picture[i]->b_frame_score = 0;
1319                 }
1320             } else if (s->avctx->b_frame_strategy == 2) {
1321                 b_frames = estimate_best_b_count(s);
1322             } else {
1323                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1324                 b_frames = 0;
1325             }
1326
1327             emms_c();
1328
1329             for (i = b_frames - 1; i >= 0; i--) {
1330                 int type = s->input_picture[i]->f->pict_type;
1331                 if (type && type != AV_PICTURE_TYPE_B)
1332                     b_frames = i;
1333             }
1334             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1335                 b_frames == s->max_b_frames) {
1336                 av_log(s->avctx, AV_LOG_ERROR,
1337                        "warning, too many b frames in a row\n");
1338             }
1339
1340             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1341                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1342                     s->gop_size > s->picture_in_gop_number) {
1343                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1344                 } else {
1345                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1346                         b_frames = 0;
1347                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1348                 }
1349             }
1350
1351             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1352                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1353                 b_frames--;
1354
1355             s->reordered_input_picture[0] = s->input_picture[b_frames];
1356             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1357                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1358             s->reordered_input_picture[0]->f->coded_picture_number =
1359                 s->coded_picture_number++;
1360             for (i = 0; i < b_frames; i++) {
1361                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1362                 s->reordered_input_picture[i + 1]->f->pict_type =
1363                     AV_PICTURE_TYPE_B;
1364                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1365                     s->coded_picture_number++;
1366             }
1367         }
1368     }
1369 no_output_pic:
1370     if (s->reordered_input_picture[0]) {
1371         s->reordered_input_picture[0]->reference =
1372            s->reordered_input_picture[0]->f->pict_type !=
1373                AV_PICTURE_TYPE_B ? 3 : 0;
1374
1375         ff_mpeg_unref_picture(s, &s->new_picture);
1376         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1377             return ret;
1378
1379         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1380             // input is a shared pix, so we can't modifiy it -> alloc a new
1381             // one & ensure that the shared one is reuseable
1382
1383             Picture *pic;
1384             int i = ff_find_unused_picture(s, 0);
1385             if (i < 0)
1386                 return i;
1387             pic = &s->picture[i];
1388
1389             pic->reference = s->reordered_input_picture[0]->reference;
1390             if (ff_alloc_picture(s, pic, 0) < 0) {
1391                 return -1;
1392             }
1393
1394             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1395             if (ret < 0)
1396                 return ret;
1397
1398             /* mark us unused / free shared pic */
1399             av_frame_unref(s->reordered_input_picture[0]->f);
1400             s->reordered_input_picture[0]->shared = 0;
1401
1402             s->current_picture_ptr = pic;
1403         } else {
1404             // input is not a shared pix -> reuse buffer for current_pix
1405             s->current_picture_ptr = s->reordered_input_picture[0];
1406             for (i = 0; i < 4; i++) {
1407                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1408             }
1409         }
1410         ff_mpeg_unref_picture(s, &s->current_picture);
1411         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1412                                        s->current_picture_ptr)) < 0)
1413             return ret;
1414
1415         s->picture_number = s->new_picture.f->display_picture_number;
1416     } else {
1417         ff_mpeg_unref_picture(s, &s->new_picture);
1418     }
1419     return 0;
1420 }
1421
1422 static void frame_end(MpegEncContext *s)
1423 {
1424     int i;
1425
1426     if (s->unrestricted_mv &&
1427         s->current_picture.reference &&
1428         !s->intra_only) {
1429         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1430         int hshift = desc->log2_chroma_w;
1431         int vshift = desc->log2_chroma_h;
1432         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1433                                 s->h_edge_pos, s->v_edge_pos,
1434                                 EDGE_WIDTH, EDGE_WIDTH,
1435                                 EDGE_TOP | EDGE_BOTTOM);
1436         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1437                                 s->h_edge_pos >> hshift,
1438                                 s->v_edge_pos >> vshift,
1439                                 EDGE_WIDTH >> hshift,
1440                                 EDGE_WIDTH >> vshift,
1441                                 EDGE_TOP | EDGE_BOTTOM);
1442         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1443                                 s->h_edge_pos >> hshift,
1444                                 s->v_edge_pos >> vshift,
1445                                 EDGE_WIDTH >> hshift,
1446                                 EDGE_WIDTH >> vshift,
1447                                 EDGE_TOP | EDGE_BOTTOM);
1448     }
1449
1450     emms_c();
1451
1452     s->last_pict_type                 = s->pict_type;
1453     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1454     if (s->pict_type!= AV_PICTURE_TYPE_B)
1455         s->last_non_b_pict_type = s->pict_type;
1456
1457     if (s->encoding) {
1458         /* release non-reference frames */
1459         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1460             if (!s->picture[i].reference)
1461                 ff_mpeg_unref_picture(s, &s->picture[i]);
1462         }
1463     }
1464
1465     s->avctx->coded_frame = s->current_picture_ptr->f;
1466
1467 }
1468
1469 static void update_noise_reduction(MpegEncContext *s)
1470 {
1471     int intra, i;
1472
1473     for (intra = 0; intra < 2; intra++) {
1474         if (s->dct_count[intra] > (1 << 16)) {
1475             for (i = 0; i < 64; i++) {
1476                 s->dct_error_sum[intra][i] >>= 1;
1477             }
1478             s->dct_count[intra] >>= 1;
1479         }
1480
1481         for (i = 0; i < 64; i++) {
1482             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1483                                        s->dct_count[intra] +
1484                                        s->dct_error_sum[intra][i] / 2) /
1485                                       (s->dct_error_sum[intra][i] + 1);
1486         }
1487     }
1488 }
1489
1490 static int frame_start(MpegEncContext *s)
1491 {
1492     int ret;
1493
1494     /* mark & release old frames */
1495     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1496         s->last_picture_ptr != s->next_picture_ptr &&
1497         s->last_picture_ptr->f->buf[0]) {
1498         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1499     }
1500
1501     s->current_picture_ptr->f->pict_type = s->pict_type;
1502     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1503
1504     ff_mpeg_unref_picture(s, &s->current_picture);
1505     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1506                                    s->current_picture_ptr)) < 0)
1507         return ret;
1508
1509     if (s->pict_type != AV_PICTURE_TYPE_B) {
1510         s->last_picture_ptr = s->next_picture_ptr;
1511         if (!s->droppable)
1512             s->next_picture_ptr = s->current_picture_ptr;
1513     }
1514
1515     if (s->last_picture_ptr) {
1516         ff_mpeg_unref_picture(s, &s->last_picture);
1517         if (s->last_picture_ptr->f->buf[0] &&
1518             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1519                                        s->last_picture_ptr)) < 0)
1520             return ret;
1521     }
1522     if (s->next_picture_ptr) {
1523         ff_mpeg_unref_picture(s, &s->next_picture);
1524         if (s->next_picture_ptr->f->buf[0] &&
1525             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1526                                        s->next_picture_ptr)) < 0)
1527             return ret;
1528     }
1529
1530     if (s->picture_structure!= PICT_FRAME) {
1531         int i;
1532         for (i = 0; i < 4; i++) {
1533             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1534                 s->current_picture.f->data[i] +=
1535                     s->current_picture.f->linesize[i];
1536             }
1537             s->current_picture.f->linesize[i] *= 2;
1538             s->last_picture.f->linesize[i]    *= 2;
1539             s->next_picture.f->linesize[i]    *= 2;
1540         }
1541     }
1542
1543     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1544         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1545         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1546     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1547         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1548         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1549     } else {
1550         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1551         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1552     }
1553
1554     if (s->dct_error_sum) {
1555         assert(s->avctx->noise_reduction && s->encoding);
1556         update_noise_reduction(s);
1557     }
1558
1559     return 0;
1560 }
1561
1562 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1563                           const AVFrame *pic_arg, int *got_packet)
1564 {
1565     MpegEncContext *s = avctx->priv_data;
1566     int i, stuffing_count, ret;
1567     int context_count = s->slice_context_count;
1568
1569     s->picture_in_gop_number++;
1570
1571     if (load_input_picture(s, pic_arg) < 0)
1572         return -1;
1573
1574     if (select_input_picture(s) < 0) {
1575         return -1;
1576     }
1577
1578     /* output? */
1579     if (s->new_picture.f->data[0]) {
1580         if (!pkt->data &&
1581             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1582             return ret;
1583         if (s->mb_info) {
1584             s->mb_info_ptr = av_packet_new_side_data(pkt,
1585                                  AV_PKT_DATA_H263_MB_INFO,
1586                                  s->mb_width*s->mb_height*12);
1587             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1588         }
1589
1590         for (i = 0; i < context_count; i++) {
1591             int start_y = s->thread_context[i]->start_mb_y;
1592             int   end_y = s->thread_context[i]->  end_mb_y;
1593             int h       = s->mb_height;
1594             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1595             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1596
1597             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1598         }
1599
1600         s->pict_type = s->new_picture.f->pict_type;
1601         //emms_c();
1602         ret = frame_start(s);
1603         if (ret < 0)
1604             return ret;
1605 vbv_retry:
1606         if (encode_picture(s, s->picture_number) < 0)
1607             return -1;
1608
1609         avctx->header_bits = s->header_bits;
1610         avctx->mv_bits     = s->mv_bits;
1611         avctx->misc_bits   = s->misc_bits;
1612         avctx->i_tex_bits  = s->i_tex_bits;
1613         avctx->p_tex_bits  = s->p_tex_bits;
1614         avctx->i_count     = s->i_count;
1615         // FIXME f/b_count in avctx
1616         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1617         avctx->skip_count  = s->skip_count;
1618
1619         frame_end(s);
1620
1621         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1622             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1623
1624         if (avctx->rc_buffer_size) {
1625             RateControlContext *rcc = &s->rc_context;
1626             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1627
1628             if (put_bits_count(&s->pb) > max_size &&
1629                 s->lambda < s->lmax) {
1630                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1631                                        (s->qscale + 1) / s->qscale);
1632                 if (s->adaptive_quant) {
1633                     int i;
1634                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1635                         s->lambda_table[i] =
1636                             FFMAX(s->lambda_table[i] + 1,
1637                                   s->lambda_table[i] * (s->qscale + 1) /
1638                                   s->qscale);
1639                 }
1640                 s->mb_skipped = 0;        // done in frame_start()
1641                 // done in encode_picture() so we must undo it
1642                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1643                     if (s->flipflop_rounding          ||
1644                         s->codec_id == AV_CODEC_ID_H263P ||
1645                         s->codec_id == AV_CODEC_ID_MPEG4)
1646                         s->no_rounding ^= 1;
1647                 }
1648                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1649                     s->time_base       = s->last_time_base;
1650                     s->last_non_b_time = s->time - s->pp_time;
1651                 }
1652                 for (i = 0; i < context_count; i++) {
1653                     PutBitContext *pb = &s->thread_context[i]->pb;
1654                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1655                 }
1656                 goto vbv_retry;
1657             }
1658
1659             assert(s->avctx->rc_max_rate);
1660         }
1661
1662         if (s->flags & CODEC_FLAG_PASS1)
1663             ff_write_pass1_stats(s);
1664
1665         for (i = 0; i < 4; i++) {
1666             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1667             avctx->error[i] += s->current_picture_ptr->f->error[i];
1668         }
1669
1670         if (s->flags & CODEC_FLAG_PASS1)
1671             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1672                    avctx->i_tex_bits + avctx->p_tex_bits ==
1673                        put_bits_count(&s->pb));
1674         flush_put_bits(&s->pb);
1675         s->frame_bits  = put_bits_count(&s->pb);
1676
1677         stuffing_count = ff_vbv_update(s, s->frame_bits);
1678         if (stuffing_count) {
1679             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1680                     stuffing_count + 50) {
1681                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1682                 return -1;
1683             }
1684
1685             switch (s->codec_id) {
1686             case AV_CODEC_ID_MPEG1VIDEO:
1687             case AV_CODEC_ID_MPEG2VIDEO:
1688                 while (stuffing_count--) {
1689                     put_bits(&s->pb, 8, 0);
1690                 }
1691             break;
1692             case AV_CODEC_ID_MPEG4:
1693                 put_bits(&s->pb, 16, 0);
1694                 put_bits(&s->pb, 16, 0x1C3);
1695                 stuffing_count -= 4;
1696                 while (stuffing_count--) {
1697                     put_bits(&s->pb, 8, 0xFF);
1698                 }
1699             break;
1700             default:
1701                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1702             }
1703             flush_put_bits(&s->pb);
1704             s->frame_bits  = put_bits_count(&s->pb);
1705         }
1706
1707         /* update mpeg1/2 vbv_delay for CBR */
1708         if (s->avctx->rc_max_rate                          &&
1709             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1710             s->out_format == FMT_MPEG1                     &&
1711             90000LL * (avctx->rc_buffer_size - 1) <=
1712                 s->avctx->rc_max_rate * 0xFFFFLL) {
1713             int vbv_delay, min_delay;
1714             double inbits  = s->avctx->rc_max_rate *
1715                              av_q2d(s->avctx->time_base);
1716             int    minbits = s->frame_bits - 8 *
1717                              (s->vbv_delay_ptr - s->pb.buf - 1);
1718             double bits    = s->rc_context.buffer_index + minbits - inbits;
1719
1720             if (bits < 0)
1721                 av_log(s->avctx, AV_LOG_ERROR,
1722                        "Internal error, negative bits\n");
1723
1724             assert(s->repeat_first_field == 0);
1725
1726             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1727             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1728                         s->avctx->rc_max_rate;
1729
1730             vbv_delay = FFMAX(vbv_delay, min_delay);
1731
1732             assert(vbv_delay < 0xFFFF);
1733
1734             s->vbv_delay_ptr[0] &= 0xF8;
1735             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1736             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1737             s->vbv_delay_ptr[2] &= 0x07;
1738             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1739             avctx->vbv_delay     = vbv_delay * 300;
1740         }
1741         s->total_bits     += s->frame_bits;
1742         avctx->frame_bits  = s->frame_bits;
1743
1744         pkt->pts = s->current_picture.f->pts;
1745         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1746             if (!s->current_picture.f->coded_picture_number)
1747                 pkt->dts = pkt->pts - s->dts_delta;
1748             else
1749                 pkt->dts = s->reordered_pts;
1750             s->reordered_pts = pkt->pts;
1751         } else
1752             pkt->dts = pkt->pts;
1753         if (s->current_picture.f->key_frame)
1754             pkt->flags |= AV_PKT_FLAG_KEY;
1755         if (s->mb_info)
1756             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1757     } else {
1758         s->frame_bits = 0;
1759     }
1760     assert((s->frame_bits & 7) == 0);
1761
1762     pkt->size = s->frame_bits / 8;
1763     *got_packet = !!pkt->size;
1764     return 0;
1765 }
1766
1767 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1768                                                 int n, int threshold)
1769 {
1770     static const char tab[64] = {
1771         3, 2, 2, 1, 1, 1, 1, 1,
1772         1, 1, 1, 1, 1, 1, 1, 1,
1773         1, 1, 1, 1, 1, 1, 1, 1,
1774         0, 0, 0, 0, 0, 0, 0, 0,
1775         0, 0, 0, 0, 0, 0, 0, 0,
1776         0, 0, 0, 0, 0, 0, 0, 0,
1777         0, 0, 0, 0, 0, 0, 0, 0,
1778         0, 0, 0, 0, 0, 0, 0, 0
1779     };
1780     int score = 0;
1781     int run = 0;
1782     int i;
1783     int16_t *block = s->block[n];
1784     const int last_index = s->block_last_index[n];
1785     int skip_dc;
1786
1787     if (threshold < 0) {
1788         skip_dc = 0;
1789         threshold = -threshold;
1790     } else
1791         skip_dc = 1;
1792
1793     /* Are all we could set to zero already zero? */
1794     if (last_index <= skip_dc - 1)
1795         return;
1796
1797     for (i = 0; i <= last_index; i++) {
1798         const int j = s->intra_scantable.permutated[i];
1799         const int level = FFABS(block[j]);
1800         if (level == 1) {
1801             if (skip_dc && i == 0)
1802                 continue;
1803             score += tab[run];
1804             run = 0;
1805         } else if (level > 1) {
1806             return;
1807         } else {
1808             run++;
1809         }
1810     }
1811     if (score >= threshold)
1812         return;
1813     for (i = skip_dc; i <= last_index; i++) {
1814         const int j = s->intra_scantable.permutated[i];
1815         block[j] = 0;
1816     }
1817     if (block[0])
1818         s->block_last_index[n] = 0;
1819     else
1820         s->block_last_index[n] = -1;
1821 }
1822
1823 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1824                                int last_index)
1825 {
1826     int i;
1827     const int maxlevel = s->max_qcoeff;
1828     const int minlevel = s->min_qcoeff;
1829     int overflow = 0;
1830
1831     if (s->mb_intra) {
1832         i = 1; // skip clipping of intra dc
1833     } else
1834         i = 0;
1835
1836     for (; i <= last_index; i++) {
1837         const int j = s->intra_scantable.permutated[i];
1838         int level = block[j];
1839
1840         if (level > maxlevel) {
1841             level = maxlevel;
1842             overflow++;
1843         } else if (level < minlevel) {
1844             level = minlevel;
1845             overflow++;
1846         }
1847
1848         block[j] = level;
1849     }
1850
1851     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1852         av_log(s->avctx, AV_LOG_INFO,
1853                "warning, clipping %d dct coefficients to %d..%d\n",
1854                overflow, minlevel, maxlevel);
1855 }
1856
1857 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1858 {
1859     int x, y;
1860     // FIXME optimize
1861     for (y = 0; y < 8; y++) {
1862         for (x = 0; x < 8; x++) {
1863             int x2, y2;
1864             int sum = 0;
1865             int sqr = 0;
1866             int count = 0;
1867
1868             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1869                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1870                     int v = ptr[x2 + y2 * stride];
1871                     sum += v;
1872                     sqr += v * v;
1873                     count++;
1874                 }
1875             }
1876             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1877         }
1878     }
1879 }
1880
1881 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1882                                                 int motion_x, int motion_y,
1883                                                 int mb_block_height,
1884                                                 int mb_block_count)
1885 {
1886     int16_t weight[8][64];
1887     int16_t orig[8][64];
1888     const int mb_x = s->mb_x;
1889     const int mb_y = s->mb_y;
1890     int i;
1891     int skip_dct[8];
1892     int dct_offset = s->linesize * 8; // default for progressive frames
1893     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1894     ptrdiff_t wrap_y, wrap_c;
1895
1896     for (i = 0; i < mb_block_count; i++)
1897         skip_dct[i] = s->skipdct;
1898
1899     if (s->adaptive_quant) {
1900         const int last_qp = s->qscale;
1901         const int mb_xy = mb_x + mb_y * s->mb_stride;
1902
1903         s->lambda = s->lambda_table[mb_xy];
1904         update_qscale(s);
1905
1906         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1907             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1908             s->dquant = s->qscale - last_qp;
1909
1910             if (s->out_format == FMT_H263) {
1911                 s->dquant = av_clip(s->dquant, -2, 2);
1912
1913                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1914                     if (!s->mb_intra) {
1915                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1916                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1917                                 s->dquant = 0;
1918                         }
1919                         if (s->mv_type == MV_TYPE_8X8)
1920                             s->dquant = 0;
1921                     }
1922                 }
1923             }
1924         }
1925         ff_set_qscale(s, last_qp + s->dquant);
1926     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1927         ff_set_qscale(s, s->qscale + s->dquant);
1928
1929     wrap_y = s->linesize;
1930     wrap_c = s->uvlinesize;
1931     ptr_y  = s->new_picture.f->data[0] +
1932              (mb_y * 16 * wrap_y)              + mb_x * 16;
1933     ptr_cb = s->new_picture.f->data[1] +
1934              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1935     ptr_cr = s->new_picture.f->data[2] +
1936              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1937
1938     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1939         uint8_t *ebuf = s->edge_emu_buffer + 32;
1940         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1941                                  wrap_y, wrap_y,
1942                                  16, 16, mb_x * 16, mb_y * 16,
1943                                  s->width, s->height);
1944         ptr_y = ebuf;
1945         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1946                                  wrap_c, wrap_c,
1947                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1948                                  s->width >> 1, s->height >> 1);
1949         ptr_cb = ebuf + 18 * wrap_y;
1950         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1951                                  wrap_c, wrap_c,
1952                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1953                                  s->width >> 1, s->height >> 1);
1954         ptr_cr = ebuf + 18 * wrap_y + 8;
1955     }
1956
1957     if (s->mb_intra) {
1958         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1959             int progressive_score, interlaced_score;
1960
1961             s->interlaced_dct = 0;
1962             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1963                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1964                                                      NULL, wrap_y, 8) - 400;
1965
1966             if (progressive_score > 0) {
1967                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1968                                                         NULL, wrap_y * 2, 8) +
1969                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1970                                                         NULL, wrap_y * 2, 8);
1971                 if (progressive_score > interlaced_score) {
1972                     s->interlaced_dct = 1;
1973
1974                     dct_offset = wrap_y;
1975                     wrap_y <<= 1;
1976                     if (s->chroma_format == CHROMA_422)
1977                         wrap_c <<= 1;
1978                 }
1979             }
1980         }
1981
1982         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1983         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1984         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1985         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1986
1987         if (s->flags & CODEC_FLAG_GRAY) {
1988             skip_dct[4] = 1;
1989             skip_dct[5] = 1;
1990         } else {
1991             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1992             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1993             if (!s->chroma_y_shift) { /* 422 */
1994                 s->pdsp.get_pixels(s->block[6],
1995                                    ptr_cb + (dct_offset >> 1), wrap_c);
1996                 s->pdsp.get_pixels(s->block[7],
1997                                    ptr_cr + (dct_offset >> 1), wrap_c);
1998             }
1999         }
2000     } else {
2001         op_pixels_func (*op_pix)[4];
2002         qpel_mc_func (*op_qpix)[16];
2003         uint8_t *dest_y, *dest_cb, *dest_cr;
2004
2005         dest_y  = s->dest[0];
2006         dest_cb = s->dest[1];
2007         dest_cr = s->dest[2];
2008
2009         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2010             op_pix  = s->hdsp.put_pixels_tab;
2011             op_qpix = s->qdsp.put_qpel_pixels_tab;
2012         } else {
2013             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2014             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2015         }
2016
2017         if (s->mv_dir & MV_DIR_FORWARD) {
2018             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2019                           s->last_picture.f->data,
2020                           op_pix, op_qpix);
2021             op_pix  = s->hdsp.avg_pixels_tab;
2022             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2023         }
2024         if (s->mv_dir & MV_DIR_BACKWARD) {
2025             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2026                           s->next_picture.f->data,
2027                           op_pix, op_qpix);
2028         }
2029
2030         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2031             int progressive_score, interlaced_score;
2032
2033             s->interlaced_dct = 0;
2034             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2035                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2036                                                      ptr_y + wrap_y * 8,
2037                                                      wrap_y, 8) - 400;
2038
2039             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2040                 progressive_score -= 400;
2041
2042             if (progressive_score > 0) {
2043                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2044                                                         wrap_y * 2, 8) +
2045                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2046                                                         ptr_y + wrap_y,
2047                                                         wrap_y * 2, 8);
2048
2049                 if (progressive_score > interlaced_score) {
2050                     s->interlaced_dct = 1;
2051
2052                     dct_offset = wrap_y;
2053                     wrap_y <<= 1;
2054                     if (s->chroma_format == CHROMA_422)
2055                         wrap_c <<= 1;
2056                 }
2057             }
2058         }
2059
2060         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2061         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2062         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2063                             dest_y + dct_offset, wrap_y);
2064         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2065                             dest_y + dct_offset + 8, wrap_y);
2066
2067         if (s->flags & CODEC_FLAG_GRAY) {
2068             skip_dct[4] = 1;
2069             skip_dct[5] = 1;
2070         } else {
2071             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2072             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2073             if (!s->chroma_y_shift) { /* 422 */
2074                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2075                                     dest_cb + (dct_offset >> 1), wrap_c);
2076                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2077                                     dest_cr + (dct_offset >> 1), wrap_c);
2078             }
2079         }
2080         /* pre quantization */
2081         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2082                 2 * s->qscale * s->qscale) {
2083             // FIXME optimize
2084             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2085                 skip_dct[0] = 1;
2086             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2087                 skip_dct[1] = 1;
2088             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2089                                wrap_y, 8) < 20 * s->qscale)
2090                 skip_dct[2] = 1;
2091             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2092                                wrap_y, 8) < 20 * s->qscale)
2093                 skip_dct[3] = 1;
2094             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2095                 skip_dct[4] = 1;
2096             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2097                 skip_dct[5] = 1;
2098             if (!s->chroma_y_shift) { /* 422 */
2099                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2100                                    dest_cb + (dct_offset >> 1),
2101                                    wrap_c, 8) < 20 * s->qscale)
2102                     skip_dct[6] = 1;
2103                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2104                                    dest_cr + (dct_offset >> 1),
2105                                    wrap_c, 8) < 20 * s->qscale)
2106                     skip_dct[7] = 1;
2107             }
2108         }
2109     }
2110
2111     if (s->quantizer_noise_shaping) {
2112         if (!skip_dct[0])
2113             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2114         if (!skip_dct[1])
2115             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2116         if (!skip_dct[2])
2117             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2118         if (!skip_dct[3])
2119             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2120         if (!skip_dct[4])
2121             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2122         if (!skip_dct[5])
2123             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2124         if (!s->chroma_y_shift) { /* 422 */
2125             if (!skip_dct[6])
2126                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2127                                   wrap_c);
2128             if (!skip_dct[7])
2129                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2130                                   wrap_c);
2131         }
2132         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2133     }
2134
2135     /* DCT & quantize */
2136     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2137     {
2138         for (i = 0; i < mb_block_count; i++) {
2139             if (!skip_dct[i]) {
2140                 int overflow;
2141                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2142                 // FIXME we could decide to change to quantizer instead of
2143                 // clipping
2144                 // JS: I don't think that would be a good idea it could lower
2145                 //     quality instead of improve it. Just INTRADC clipping
2146                 //     deserves changes in quantizer
2147                 if (overflow)
2148                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2149             } else
2150                 s->block_last_index[i] = -1;
2151         }
2152         if (s->quantizer_noise_shaping) {
2153             for (i = 0; i < mb_block_count; i++) {
2154                 if (!skip_dct[i]) {
2155                     s->block_last_index[i] =
2156                         dct_quantize_refine(s, s->block[i], weight[i],
2157                                             orig[i], i, s->qscale);
2158                 }
2159             }
2160         }
2161
2162         if (s->luma_elim_threshold && !s->mb_intra)
2163             for (i = 0; i < 4; i++)
2164                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2165         if (s->chroma_elim_threshold && !s->mb_intra)
2166             for (i = 4; i < mb_block_count; i++)
2167                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2168
2169         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2170             for (i = 0; i < mb_block_count; i++) {
2171                 if (s->block_last_index[i] == -1)
2172                     s->coded_score[i] = INT_MAX / 256;
2173             }
2174         }
2175     }
2176
2177     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2178         s->block_last_index[4] =
2179         s->block_last_index[5] = 0;
2180         s->block[4][0] =
2181         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2182     }
2183
2184     // non c quantize code returns incorrect block_last_index FIXME
2185     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2186         for (i = 0; i < mb_block_count; i++) {
2187             int j;
2188             if (s->block_last_index[i] > 0) {
2189                 for (j = 63; j > 0; j--) {
2190                     if (s->block[i][s->intra_scantable.permutated[j]])
2191                         break;
2192                 }
2193                 s->block_last_index[i] = j;
2194             }
2195         }
2196     }
2197
2198     /* huffman encode */
2199     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2200     case AV_CODEC_ID_MPEG1VIDEO:
2201     case AV_CODEC_ID_MPEG2VIDEO:
2202         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2203             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2204         break;
2205     case AV_CODEC_ID_MPEG4:
2206         if (CONFIG_MPEG4_ENCODER)
2207             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2208         break;
2209     case AV_CODEC_ID_MSMPEG4V2:
2210     case AV_CODEC_ID_MSMPEG4V3:
2211     case AV_CODEC_ID_WMV1:
2212         if (CONFIG_MSMPEG4_ENCODER)
2213             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2214         break;
2215     case AV_CODEC_ID_WMV2:
2216         if (CONFIG_WMV2_ENCODER)
2217             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2218         break;
2219     case AV_CODEC_ID_H261:
2220         if (CONFIG_H261_ENCODER)
2221             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2222         break;
2223     case AV_CODEC_ID_H263:
2224     case AV_CODEC_ID_H263P:
2225     case AV_CODEC_ID_FLV1:
2226     case AV_CODEC_ID_RV10:
2227     case AV_CODEC_ID_RV20:
2228         if (CONFIG_H263_ENCODER)
2229             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2230         break;
2231     case AV_CODEC_ID_MJPEG:
2232         if (CONFIG_MJPEG_ENCODER)
2233             ff_mjpeg_encode_mb(s, s->block);
2234         break;
2235     default:
2236         assert(0);
2237     }
2238 }
2239
2240 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2241 {
2242     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2243     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2244 }
2245
2246 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2247     int i;
2248
2249     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2250
2251     /* mpeg1 */
2252     d->mb_skip_run= s->mb_skip_run;
2253     for(i=0; i<3; i++)
2254         d->last_dc[i] = s->last_dc[i];
2255
2256     /* statistics */
2257     d->mv_bits= s->mv_bits;
2258     d->i_tex_bits= s->i_tex_bits;
2259     d->p_tex_bits= s->p_tex_bits;
2260     d->i_count= s->i_count;
2261     d->f_count= s->f_count;
2262     d->b_count= s->b_count;
2263     d->skip_count= s->skip_count;
2264     d->misc_bits= s->misc_bits;
2265     d->last_bits= 0;
2266
2267     d->mb_skipped= 0;
2268     d->qscale= s->qscale;
2269     d->dquant= s->dquant;
2270
2271     d->esc3_level_length= s->esc3_level_length;
2272 }
2273
2274 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2275     int i;
2276
2277     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2278     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2279
2280     /* mpeg1 */
2281     d->mb_skip_run= s->mb_skip_run;
2282     for(i=0; i<3; i++)
2283         d->last_dc[i] = s->last_dc[i];
2284
2285     /* statistics */
2286     d->mv_bits= s->mv_bits;
2287     d->i_tex_bits= s->i_tex_bits;
2288     d->p_tex_bits= s->p_tex_bits;
2289     d->i_count= s->i_count;
2290     d->f_count= s->f_count;
2291     d->b_count= s->b_count;
2292     d->skip_count= s->skip_count;
2293     d->misc_bits= s->misc_bits;
2294
2295     d->mb_intra= s->mb_intra;
2296     d->mb_skipped= s->mb_skipped;
2297     d->mv_type= s->mv_type;
2298     d->mv_dir= s->mv_dir;
2299     d->pb= s->pb;
2300     if(s->data_partitioning){
2301         d->pb2= s->pb2;
2302         d->tex_pb= s->tex_pb;
2303     }
2304     d->block= s->block;
2305     for(i=0; i<8; i++)
2306         d->block_last_index[i]= s->block_last_index[i];
2307     d->interlaced_dct= s->interlaced_dct;
2308     d->qscale= s->qscale;
2309
2310     d->esc3_level_length= s->esc3_level_length;
2311 }
2312
2313 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2314                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2315                            int *dmin, int *next_block, int motion_x, int motion_y)
2316 {
2317     int score;
2318     uint8_t *dest_backup[3];
2319
2320     copy_context_before_encode(s, backup, type);
2321
2322     s->block= s->blocks[*next_block];
2323     s->pb= pb[*next_block];
2324     if(s->data_partitioning){
2325         s->pb2   = pb2   [*next_block];
2326         s->tex_pb= tex_pb[*next_block];
2327     }
2328
2329     if(*next_block){
2330         memcpy(dest_backup, s->dest, sizeof(s->dest));
2331         s->dest[0] = s->rd_scratchpad;
2332         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2333         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2334         assert(s->linesize >= 32); //FIXME
2335     }
2336
2337     encode_mb(s, motion_x, motion_y);
2338
2339     score= put_bits_count(&s->pb);
2340     if(s->data_partitioning){
2341         score+= put_bits_count(&s->pb2);
2342         score+= put_bits_count(&s->tex_pb);
2343     }
2344
2345     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2346         ff_mpv_decode_mb(s, s->block);
2347
2348         score *= s->lambda2;
2349         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2350     }
2351
2352     if(*next_block){
2353         memcpy(s->dest, dest_backup, sizeof(s->dest));
2354     }
2355
2356     if(score<*dmin){
2357         *dmin= score;
2358         *next_block^=1;
2359
2360         copy_context_after_encode(best, s, type);
2361     }
2362 }
2363
2364 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2365     uint32_t *sq = ff_square_tab + 256;
2366     int acc=0;
2367     int x,y;
2368
2369     if(w==16 && h==16)
2370         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2371     else if(w==8 && h==8)
2372         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2373
2374     for(y=0; y<h; y++){
2375         for(x=0; x<w; x++){
2376             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2377         }
2378     }
2379
2380     assert(acc>=0);
2381
2382     return acc;
2383 }
2384
2385 static int sse_mb(MpegEncContext *s){
2386     int w= 16;
2387     int h= 16;
2388
2389     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2390     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2391
2392     if(w==16 && h==16)
2393       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2394         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2395                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2396                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2397       }else{
2398         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2399                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2400                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2401       }
2402     else
2403         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2404                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2405                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2406 }
2407
2408 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2409     MpegEncContext *s= *(void**)arg;
2410
2411
2412     s->me.pre_pass=1;
2413     s->me.dia_size= s->avctx->pre_dia_size;
2414     s->first_slice_line=1;
2415     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2416         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2417             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2418         }
2419         s->first_slice_line=0;
2420     }
2421
2422     s->me.pre_pass=0;
2423
2424     return 0;
2425 }
2426
2427 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2428     MpegEncContext *s= *(void**)arg;
2429
2430     s->me.dia_size= s->avctx->dia_size;
2431     s->first_slice_line=1;
2432     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2433         s->mb_x=0; //for block init below
2434         ff_init_block_index(s);
2435         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2436             s->block_index[0]+=2;
2437             s->block_index[1]+=2;
2438             s->block_index[2]+=2;
2439             s->block_index[3]+=2;
2440
2441             /* compute motion vector & mb_type and store in context */
2442             if(s->pict_type==AV_PICTURE_TYPE_B)
2443                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2444             else
2445                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2446         }
2447         s->first_slice_line=0;
2448     }
2449     return 0;
2450 }
2451
2452 static int mb_var_thread(AVCodecContext *c, void *arg){
2453     MpegEncContext *s= *(void**)arg;
2454     int mb_x, mb_y;
2455
2456     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2457         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2458             int xx = mb_x * 16;
2459             int yy = mb_y * 16;
2460             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2461             int varc;
2462             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2463
2464             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2465                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2466
2467             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2468             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2469             s->me.mb_var_sum_temp    += varc;
2470         }
2471     }
2472     return 0;
2473 }
2474
2475 static void write_slice_end(MpegEncContext *s){
2476     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2477         if(s->partitioned_frame){
2478             ff_mpeg4_merge_partitions(s);
2479         }
2480
2481         ff_mpeg4_stuffing(&s->pb);
2482     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2483         ff_mjpeg_encode_stuffing(&s->pb);
2484     }
2485
2486     avpriv_align_put_bits(&s->pb);
2487     flush_put_bits(&s->pb);
2488
2489     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2490         s->misc_bits+= get_bits_diff(s);
2491 }
2492
2493 static void write_mb_info(MpegEncContext *s)
2494 {
2495     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2496     int offset = put_bits_count(&s->pb);
2497     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2498     int gobn = s->mb_y / s->gob_index;
2499     int pred_x, pred_y;
2500     if (CONFIG_H263_ENCODER)
2501         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2502     bytestream_put_le32(&ptr, offset);
2503     bytestream_put_byte(&ptr, s->qscale);
2504     bytestream_put_byte(&ptr, gobn);
2505     bytestream_put_le16(&ptr, mba);
2506     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2507     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2508     /* 4MV not implemented */
2509     bytestream_put_byte(&ptr, 0); /* hmv2 */
2510     bytestream_put_byte(&ptr, 0); /* vmv2 */
2511 }
2512
2513 static void update_mb_info(MpegEncContext *s, int startcode)
2514 {
2515     if (!s->mb_info)
2516         return;
2517     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2518         s->mb_info_size += 12;
2519         s->prev_mb_info = s->last_mb_info;
2520     }
2521     if (startcode) {
2522         s->prev_mb_info = put_bits_count(&s->pb)/8;
2523         /* This might have incremented mb_info_size above, and we return without
2524          * actually writing any info into that slot yet. But in that case,
2525          * this will be called again at the start of the after writing the
2526          * start code, actually writing the mb info. */
2527         return;
2528     }
2529
2530     s->last_mb_info = put_bits_count(&s->pb)/8;
2531     if (!s->mb_info_size)
2532         s->mb_info_size += 12;
2533     write_mb_info(s);
2534 }
2535
2536 static int encode_thread(AVCodecContext *c, void *arg){
2537     MpegEncContext *s= *(void**)arg;
2538     int mb_x, mb_y, pdif = 0;
2539     int chr_h= 16>>s->chroma_y_shift;
2540     int i, j;
2541     MpegEncContext best_s = { 0 }, backup_s;
2542     uint8_t bit_buf[2][MAX_MB_BYTES];
2543     uint8_t bit_buf2[2][MAX_MB_BYTES];
2544     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2545     PutBitContext pb[2], pb2[2], tex_pb[2];
2546
2547     for(i=0; i<2; i++){
2548         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2549         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2550         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2551     }
2552
2553     s->last_bits= put_bits_count(&s->pb);
2554     s->mv_bits=0;
2555     s->misc_bits=0;
2556     s->i_tex_bits=0;
2557     s->p_tex_bits=0;
2558     s->i_count=0;
2559     s->f_count=0;
2560     s->b_count=0;
2561     s->skip_count=0;
2562
2563     for(i=0; i<3; i++){
2564         /* init last dc values */
2565         /* note: quant matrix value (8) is implied here */
2566         s->last_dc[i] = 128 << s->intra_dc_precision;
2567
2568         s->current_picture.f->error[i] = 0;
2569     }
2570     s->mb_skip_run = 0;
2571     memset(s->last_mv, 0, sizeof(s->last_mv));
2572
2573     s->last_mv_dir = 0;
2574
2575     switch(s->codec_id){
2576     case AV_CODEC_ID_H263:
2577     case AV_CODEC_ID_H263P:
2578     case AV_CODEC_ID_FLV1:
2579         if (CONFIG_H263_ENCODER)
2580             s->gob_index = ff_h263_get_gob_height(s);
2581         break;
2582     case AV_CODEC_ID_MPEG4:
2583         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2584             ff_mpeg4_init_partitions(s);
2585         break;
2586     }
2587
2588     s->resync_mb_x=0;
2589     s->resync_mb_y=0;
2590     s->first_slice_line = 1;
2591     s->ptr_lastgob = s->pb.buf;
2592     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2593         s->mb_x=0;
2594         s->mb_y= mb_y;
2595
2596         ff_set_qscale(s, s->qscale);
2597         ff_init_block_index(s);
2598
2599         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2600             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2601             int mb_type= s->mb_type[xy];
2602 //            int d;
2603             int dmin= INT_MAX;
2604             int dir;
2605
2606             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2607                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2608                 return -1;
2609             }
2610             if(s->data_partitioning){
2611                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2612                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2613                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2614                     return -1;
2615                 }
2616             }
2617
2618             s->mb_x = mb_x;
2619             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2620             ff_update_block_index(s);
2621
2622             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2623                 ff_h261_reorder_mb_index(s);
2624                 xy= s->mb_y*s->mb_stride + s->mb_x;
2625                 mb_type= s->mb_type[xy];
2626             }
2627
2628             /* write gob / video packet header  */
2629             if(s->rtp_mode){
2630                 int current_packet_size, is_gob_start;
2631
2632                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2633
2634                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2635
2636                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2637
2638                 switch(s->codec_id){
2639                 case AV_CODEC_ID_H263:
2640                 case AV_CODEC_ID_H263P:
2641                     if(!s->h263_slice_structured)
2642                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2643                     break;
2644                 case AV_CODEC_ID_MPEG2VIDEO:
2645                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2646                 case AV_CODEC_ID_MPEG1VIDEO:
2647                     if(s->mb_skip_run) is_gob_start=0;
2648                     break;
2649                 }
2650
2651                 if(is_gob_start){
2652                     if(s->start_mb_y != mb_y || mb_x!=0){
2653                         write_slice_end(s);
2654
2655                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2656                             ff_mpeg4_init_partitions(s);
2657                         }
2658                     }
2659
2660                     assert((put_bits_count(&s->pb)&7) == 0);
2661                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2662
2663                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2664                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2665                         int d = 100 / s->error_rate;
2666                         if(r % d == 0){
2667                             current_packet_size=0;
2668                             s->pb.buf_ptr= s->ptr_lastgob;
2669                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2670                         }
2671                     }
2672
2673                     if (s->avctx->rtp_callback){
2674                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2675                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2676                     }
2677                     update_mb_info(s, 1);
2678
2679                     switch(s->codec_id){
2680                     case AV_CODEC_ID_MPEG4:
2681                         if (CONFIG_MPEG4_ENCODER) {
2682                             ff_mpeg4_encode_video_packet_header(s);
2683                             ff_mpeg4_clean_buffers(s);
2684                         }
2685                     break;
2686                     case AV_CODEC_ID_MPEG1VIDEO:
2687                     case AV_CODEC_ID_MPEG2VIDEO:
2688                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2689                             ff_mpeg1_encode_slice_header(s);
2690                             ff_mpeg1_clean_buffers(s);
2691                         }
2692                     break;
2693                     case AV_CODEC_ID_H263:
2694                     case AV_CODEC_ID_H263P:
2695                         if (CONFIG_H263_ENCODER)
2696                             ff_h263_encode_gob_header(s, mb_y);
2697                     break;
2698                     }
2699
2700                     if(s->flags&CODEC_FLAG_PASS1){
2701                         int bits= put_bits_count(&s->pb);
2702                         s->misc_bits+= bits - s->last_bits;
2703                         s->last_bits= bits;
2704                     }
2705
2706                     s->ptr_lastgob += current_packet_size;
2707                     s->first_slice_line=1;
2708                     s->resync_mb_x=mb_x;
2709                     s->resync_mb_y=mb_y;
2710                 }
2711             }
2712
2713             if(  (s->resync_mb_x   == s->mb_x)
2714                && s->resync_mb_y+1 == s->mb_y){
2715                 s->first_slice_line=0;
2716             }
2717
2718             s->mb_skipped=0;
2719             s->dquant=0; //only for QP_RD
2720
2721             update_mb_info(s, 0);
2722
2723             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2724                 int next_block=0;
2725                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2726
2727                 copy_context_before_encode(&backup_s, s, -1);
2728                 backup_s.pb= s->pb;
2729                 best_s.data_partitioning= s->data_partitioning;
2730                 best_s.partitioned_frame= s->partitioned_frame;
2731                 if(s->data_partitioning){
2732                     backup_s.pb2= s->pb2;
2733                     backup_s.tex_pb= s->tex_pb;
2734                 }
2735
2736                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2737                     s->mv_dir = MV_DIR_FORWARD;
2738                     s->mv_type = MV_TYPE_16X16;
2739                     s->mb_intra= 0;
2740                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2741                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2742                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2743                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2744                 }
2745                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2746                     s->mv_dir = MV_DIR_FORWARD;
2747                     s->mv_type = MV_TYPE_FIELD;
2748                     s->mb_intra= 0;
2749                     for(i=0; i<2; i++){
2750                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2751                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2752                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2753                     }
2754                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2755                                  &dmin, &next_block, 0, 0);
2756                 }
2757                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2758                     s->mv_dir = MV_DIR_FORWARD;
2759                     s->mv_type = MV_TYPE_16X16;
2760                     s->mb_intra= 0;
2761                     s->mv[0][0][0] = 0;
2762                     s->mv[0][0][1] = 0;
2763                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2764                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2765                 }
2766                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2767                     s->mv_dir = MV_DIR_FORWARD;
2768                     s->mv_type = MV_TYPE_8X8;
2769                     s->mb_intra= 0;
2770                     for(i=0; i<4; i++){
2771                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2772                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2773                     }
2774                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2775                                  &dmin, &next_block, 0, 0);
2776                 }
2777                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2778                     s->mv_dir = MV_DIR_FORWARD;
2779                     s->mv_type = MV_TYPE_16X16;
2780                     s->mb_intra= 0;
2781                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2782                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2783                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2784                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2785                 }
2786                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2787                     s->mv_dir = MV_DIR_BACKWARD;
2788                     s->mv_type = MV_TYPE_16X16;
2789                     s->mb_intra= 0;
2790                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2791                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2792                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2793                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2794                 }
2795                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2796                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2797                     s->mv_type = MV_TYPE_16X16;
2798                     s->mb_intra= 0;
2799                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2800                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2801                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2802                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2803                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2804                                  &dmin, &next_block, 0, 0);
2805                 }
2806                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2807                     s->mv_dir = MV_DIR_FORWARD;
2808                     s->mv_type = MV_TYPE_FIELD;
2809                     s->mb_intra= 0;
2810                     for(i=0; i<2; i++){
2811                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2812                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2813                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2814                     }
2815                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2816                                  &dmin, &next_block, 0, 0);
2817                 }
2818                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2819                     s->mv_dir = MV_DIR_BACKWARD;
2820                     s->mv_type = MV_TYPE_FIELD;
2821                     s->mb_intra= 0;
2822                     for(i=0; i<2; i++){
2823                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2824                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2825                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2826                     }
2827                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2828                                  &dmin, &next_block, 0, 0);
2829                 }
2830                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2831                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2832                     s->mv_type = MV_TYPE_FIELD;
2833                     s->mb_intra= 0;
2834                     for(dir=0; dir<2; dir++){
2835                         for(i=0; i<2; i++){
2836                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2837                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2838                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2839                         }
2840                     }
2841                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2842                                  &dmin, &next_block, 0, 0);
2843                 }
2844                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2845                     s->mv_dir = 0;
2846                     s->mv_type = MV_TYPE_16X16;
2847                     s->mb_intra= 1;
2848                     s->mv[0][0][0] = 0;
2849                     s->mv[0][0][1] = 0;
2850                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2851                                  &dmin, &next_block, 0, 0);
2852                     if(s->h263_pred || s->h263_aic){
2853                         if(best_s.mb_intra)
2854                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2855                         else
2856                             ff_clean_intra_table_entries(s); //old mode?
2857                     }
2858                 }
2859
2860                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2861                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2862                         const int last_qp= backup_s.qscale;
2863                         int qpi, qp, dc[6];
2864                         int16_t ac[6][16];
2865                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2866                         static const int dquant_tab[4]={-1,1,-2,2};
2867
2868                         assert(backup_s.dquant == 0);
2869
2870                         //FIXME intra
2871                         s->mv_dir= best_s.mv_dir;
2872                         s->mv_type = MV_TYPE_16X16;
2873                         s->mb_intra= best_s.mb_intra;
2874                         s->mv[0][0][0] = best_s.mv[0][0][0];
2875                         s->mv[0][0][1] = best_s.mv[0][0][1];
2876                         s->mv[1][0][0] = best_s.mv[1][0][0];
2877                         s->mv[1][0][1] = best_s.mv[1][0][1];
2878
2879                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2880                         for(; qpi<4; qpi++){
2881                             int dquant= dquant_tab[qpi];
2882                             qp= last_qp + dquant;
2883                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2884                                 continue;
2885                             backup_s.dquant= dquant;
2886                             if(s->mb_intra && s->dc_val[0]){
2887                                 for(i=0; i<6; i++){
2888                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2889                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2890                                 }
2891                             }
2892
2893                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2894                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2895                             if(best_s.qscale != qp){
2896                                 if(s->mb_intra && s->dc_val[0]){
2897                                     for(i=0; i<6; i++){
2898                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2899                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2900                                     }
2901                                 }
2902                             }
2903                         }
2904                     }
2905                 }
2906                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2907                     int mx= s->b_direct_mv_table[xy][0];
2908                     int my= s->b_direct_mv_table[xy][1];
2909
2910                     backup_s.dquant = 0;
2911                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2912                     s->mb_intra= 0;
2913                     ff_mpeg4_set_direct_mv(s, mx, my);
2914                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2915                                  &dmin, &next_block, mx, my);
2916                 }
2917                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2918                     backup_s.dquant = 0;
2919                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2920                     s->mb_intra= 0;
2921                     ff_mpeg4_set_direct_mv(s, 0, 0);
2922                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2923                                  &dmin, &next_block, 0, 0);
2924                 }
2925                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2926                     int coded=0;
2927                     for(i=0; i<6; i++)
2928                         coded |= s->block_last_index[i];
2929                     if(coded){
2930                         int mx,my;
2931                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2932                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2933                             mx=my=0; //FIXME find the one we actually used
2934                             ff_mpeg4_set_direct_mv(s, mx, my);
2935                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2936                             mx= s->mv[1][0][0];
2937                             my= s->mv[1][0][1];
2938                         }else{
2939                             mx= s->mv[0][0][0];
2940                             my= s->mv[0][0][1];
2941                         }
2942
2943                         s->mv_dir= best_s.mv_dir;
2944                         s->mv_type = best_s.mv_type;
2945                         s->mb_intra= 0;
2946 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2947                         s->mv[0][0][1] = best_s.mv[0][0][1];
2948                         s->mv[1][0][0] = best_s.mv[1][0][0];
2949                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2950                         backup_s.dquant= 0;
2951                         s->skipdct=1;
2952                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2953                                         &dmin, &next_block, mx, my);
2954                         s->skipdct=0;
2955                     }
2956                 }
2957
2958                 s->current_picture.qscale_table[xy] = best_s.qscale;
2959
2960                 copy_context_after_encode(s, &best_s, -1);
2961
2962                 pb_bits_count= put_bits_count(&s->pb);
2963                 flush_put_bits(&s->pb);
2964                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2965                 s->pb= backup_s.pb;
2966
2967                 if(s->data_partitioning){
2968                     pb2_bits_count= put_bits_count(&s->pb2);
2969                     flush_put_bits(&s->pb2);
2970                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2971                     s->pb2= backup_s.pb2;
2972
2973                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2974                     flush_put_bits(&s->tex_pb);
2975                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2976                     s->tex_pb= backup_s.tex_pb;
2977                 }
2978                 s->last_bits= put_bits_count(&s->pb);
2979
2980                 if (CONFIG_H263_ENCODER &&
2981                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2982                     ff_h263_update_motion_val(s);
2983
2984                 if(next_block==0){ //FIXME 16 vs linesize16
2985                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2986                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2987                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2988                 }
2989
2990                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2991                     ff_mpv_decode_mb(s, s->block);
2992             } else {
2993                 int motion_x = 0, motion_y = 0;
2994                 s->mv_type=MV_TYPE_16X16;
2995                 // only one MB-Type possible
2996
2997                 switch(mb_type){
2998                 case CANDIDATE_MB_TYPE_INTRA:
2999                     s->mv_dir = 0;
3000                     s->mb_intra= 1;
3001                     motion_x= s->mv[0][0][0] = 0;
3002                     motion_y= s->mv[0][0][1] = 0;
3003                     break;
3004                 case CANDIDATE_MB_TYPE_INTER:
3005                     s->mv_dir = MV_DIR_FORWARD;
3006                     s->mb_intra= 0;
3007                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3008                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3009                     break;
3010                 case CANDIDATE_MB_TYPE_INTER_I:
3011                     s->mv_dir = MV_DIR_FORWARD;
3012                     s->mv_type = MV_TYPE_FIELD;
3013                     s->mb_intra= 0;
3014                     for(i=0; i<2; i++){
3015                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3016                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3017                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3018                     }
3019                     break;
3020                 case CANDIDATE_MB_TYPE_INTER4V:
3021                     s->mv_dir = MV_DIR_FORWARD;
3022                     s->mv_type = MV_TYPE_8X8;
3023                     s->mb_intra= 0;
3024                     for(i=0; i<4; i++){
3025                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3026                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3027                     }
3028                     break;
3029                 case CANDIDATE_MB_TYPE_DIRECT:
3030                     if (CONFIG_MPEG4_ENCODER) {
3031                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3032                         s->mb_intra= 0;
3033                         motion_x=s->b_direct_mv_table[xy][0];
3034                         motion_y=s->b_direct_mv_table[xy][1];
3035                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3036                     }
3037                     break;
3038                 case CANDIDATE_MB_TYPE_DIRECT0:
3039                     if (CONFIG_MPEG4_ENCODER) {
3040                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3041                         s->mb_intra= 0;
3042                         ff_mpeg4_set_direct_mv(s, 0, 0);
3043                     }
3044                     break;
3045                 case CANDIDATE_MB_TYPE_BIDIR:
3046                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3047                     s->mb_intra= 0;
3048                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3049                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3050                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3051                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3052                     break;
3053                 case CANDIDATE_MB_TYPE_BACKWARD:
3054                     s->mv_dir = MV_DIR_BACKWARD;
3055                     s->mb_intra= 0;
3056                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3057                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3058                     break;
3059                 case CANDIDATE_MB_TYPE_FORWARD:
3060                     s->mv_dir = MV_DIR_FORWARD;
3061                     s->mb_intra= 0;
3062                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3063                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3064                     break;
3065                 case CANDIDATE_MB_TYPE_FORWARD_I:
3066                     s->mv_dir = MV_DIR_FORWARD;
3067                     s->mv_type = MV_TYPE_FIELD;
3068                     s->mb_intra= 0;
3069                     for(i=0; i<2; i++){
3070                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3071                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3072                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3073                     }
3074                     break;
3075                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3076                     s->mv_dir = MV_DIR_BACKWARD;
3077                     s->mv_type = MV_TYPE_FIELD;
3078                     s->mb_intra= 0;
3079                     for(i=0; i<2; i++){
3080                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3081                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3082                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3083                     }
3084                     break;
3085                 case CANDIDATE_MB_TYPE_BIDIR_I:
3086                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3087                     s->mv_type = MV_TYPE_FIELD;
3088                     s->mb_intra= 0;
3089                     for(dir=0; dir<2; dir++){
3090                         for(i=0; i<2; i++){
3091                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3092                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3093                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3094                         }
3095                     }
3096                     break;
3097                 default:
3098                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3099                 }
3100
3101                 encode_mb(s, motion_x, motion_y);
3102
3103                 // RAL: Update last macroblock type
3104                 s->last_mv_dir = s->mv_dir;
3105
3106                 if (CONFIG_H263_ENCODER &&
3107                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3108                     ff_h263_update_motion_val(s);
3109
3110                 ff_mpv_decode_mb(s, s->block);
3111             }
3112
3113             /* clean the MV table in IPS frames for direct mode in B frames */
3114             if(s->mb_intra /* && I,P,S_TYPE */){
3115                 s->p_mv_table[xy][0]=0;
3116                 s->p_mv_table[xy][1]=0;
3117             }
3118
3119             if(s->flags&CODEC_FLAG_PSNR){
3120                 int w= 16;
3121                 int h= 16;
3122
3123                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3124                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3125
3126                 s->current_picture.f->error[0] += sse(
3127                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3128                     s->dest[0], w, h, s->linesize);
3129                 s->current_picture.f->error[1] += sse(
3130                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3131                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3132                 s->current_picture.f->error[2] += sse(
3133                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3134                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3135             }
3136             if(s->loop_filter){
3137                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3138                     ff_h263_loop_filter(s);
3139             }
3140             av_dlog(s->avctx, "MB %d %d bits\n",
3141                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3142         }
3143     }
3144
3145     //not beautiful here but we must write it before flushing so it has to be here
3146     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3147         ff_msmpeg4_encode_ext_header(s);
3148
3149     write_slice_end(s);
3150
3151     /* Send the last GOB if RTP */
3152     if (s->avctx->rtp_callback) {
3153         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3154         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3155         /* Call the RTP callback to send the last GOB */
3156         emms_c();
3157         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3158     }
3159
3160     return 0;
3161 }
3162
3163 #define MERGE(field) dst->field += src->field; src->field=0
3164 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3165     MERGE(me.scene_change_score);
3166     MERGE(me.mc_mb_var_sum_temp);
3167     MERGE(me.mb_var_sum_temp);
3168 }
3169
3170 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3171     int i;
3172
3173     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3174     MERGE(dct_count[1]);
3175     MERGE(mv_bits);
3176     MERGE(i_tex_bits);
3177     MERGE(p_tex_bits);
3178     MERGE(i_count);
3179     MERGE(f_count);
3180     MERGE(b_count);
3181     MERGE(skip_count);
3182     MERGE(misc_bits);
3183     MERGE(er.error_count);
3184     MERGE(padding_bug_score);
3185     MERGE(current_picture.f->error[0]);
3186     MERGE(current_picture.f->error[1]);
3187     MERGE(current_picture.f->error[2]);
3188
3189     if(dst->avctx->noise_reduction){
3190         for(i=0; i<64; i++){
3191             MERGE(dct_error_sum[0][i]);
3192             MERGE(dct_error_sum[1][i]);
3193         }
3194     }
3195
3196     assert(put_bits_count(&src->pb) % 8 ==0);
3197     assert(put_bits_count(&dst->pb) % 8 ==0);
3198     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3199     flush_put_bits(&dst->pb);
3200 }
3201
3202 static int estimate_qp(MpegEncContext *s, int dry_run){
3203     if (s->next_lambda){
3204         s->current_picture_ptr->f->quality =
3205         s->current_picture.f->quality = s->next_lambda;
3206         if(!dry_run) s->next_lambda= 0;
3207     } else if (!s->fixed_qscale) {
3208         s->current_picture_ptr->f->quality =
3209         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3210         if (s->current_picture.f->quality < 0)
3211             return -1;
3212     }
3213
3214     if(s->adaptive_quant){
3215         switch(s->codec_id){
3216         case AV_CODEC_ID_MPEG4:
3217             if (CONFIG_MPEG4_ENCODER)
3218                 ff_clean_mpeg4_qscales(s);
3219             break;
3220         case AV_CODEC_ID_H263:
3221         case AV_CODEC_ID_H263P:
3222         case AV_CODEC_ID_FLV1:
3223             if (CONFIG_H263_ENCODER)
3224                 ff_clean_h263_qscales(s);
3225             break;
3226         default:
3227             ff_init_qscale_tab(s);
3228         }
3229
3230         s->lambda= s->lambda_table[0];
3231         //FIXME broken
3232     }else
3233         s->lambda = s->current_picture.f->quality;
3234     update_qscale(s);
3235     return 0;
3236 }
3237
3238 /* must be called before writing the header */
3239 static void set_frame_distances(MpegEncContext * s){
3240     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3241     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3242
3243     if(s->pict_type==AV_PICTURE_TYPE_B){
3244         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3245         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3246     }else{
3247         s->pp_time= s->time - s->last_non_b_time;
3248         s->last_non_b_time= s->time;
3249         assert(s->picture_number==0 || s->pp_time > 0);
3250     }
3251 }
3252
3253 static int encode_picture(MpegEncContext *s, int picture_number)
3254 {
3255     int i, ret;
3256     int bits;
3257     int context_count = s->slice_context_count;
3258
3259     s->picture_number = picture_number;
3260
3261     /* Reset the average MB variance */
3262     s->me.mb_var_sum_temp    =
3263     s->me.mc_mb_var_sum_temp = 0;
3264
3265     /* we need to initialize some time vars before we can encode b-frames */
3266     // RAL: Condition added for MPEG1VIDEO
3267     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3268         set_frame_distances(s);
3269     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3270         ff_set_mpeg4_time(s);
3271
3272     s->me.scene_change_score=0;
3273
3274 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3275
3276     if(s->pict_type==AV_PICTURE_TYPE_I){
3277         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3278         else                        s->no_rounding=0;
3279     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3280         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3281             s->no_rounding ^= 1;
3282     }
3283
3284     if(s->flags & CODEC_FLAG_PASS2){
3285         if (estimate_qp(s,1) < 0)
3286             return -1;
3287         ff_get_2pass_fcode(s);
3288     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3289         if(s->pict_type==AV_PICTURE_TYPE_B)
3290             s->lambda= s->last_lambda_for[s->pict_type];
3291         else
3292             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3293         update_qscale(s);
3294     }
3295
3296     s->mb_intra=0; //for the rate distortion & bit compare functions
3297     for(i=1; i<context_count; i++){
3298         ret = ff_update_duplicate_context(s->thread_context[i], s);
3299         if (ret < 0)
3300             return ret;
3301     }
3302
3303     if(ff_init_me(s)<0)
3304         return -1;
3305
3306     /* Estimate motion for every MB */
3307     if(s->pict_type != AV_PICTURE_TYPE_I){
3308         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3309         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3310         if (s->pict_type != AV_PICTURE_TYPE_B) {
3311             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3312                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3313             }
3314         }
3315
3316         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3317     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3318         /* I-Frame */
3319         for(i=0; i<s->mb_stride*s->mb_height; i++)
3320             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3321
3322         if(!s->fixed_qscale){
3323             /* finding spatial complexity for I-frame rate control */
3324             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3325         }
3326     }
3327     for(i=1; i<context_count; i++){
3328         merge_context_after_me(s, s->thread_context[i]);
3329     }
3330     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3331     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3332     emms_c();
3333
3334     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3335         s->pict_type= AV_PICTURE_TYPE_I;
3336         for(i=0; i<s->mb_stride*s->mb_height; i++)
3337             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3338         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3339                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3340     }
3341
3342     if(!s->umvplus){
3343         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3344             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3345
3346             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3347                 int a,b;
3348                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3349                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3350                 s->f_code= FFMAX3(s->f_code, a, b);
3351             }
3352
3353             ff_fix_long_p_mvs(s);
3354             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3355             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3356                 int j;
3357                 for(i=0; i<2; i++){
3358                     for(j=0; j<2; j++)
3359                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3360                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3361                 }
3362             }
3363         }
3364
3365         if(s->pict_type==AV_PICTURE_TYPE_B){
3366             int a, b;
3367
3368             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3369             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3370             s->f_code = FFMAX(a, b);
3371
3372             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3373             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3374             s->b_code = FFMAX(a, b);
3375
3376             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3377             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3378             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3379             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3380             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3381                 int dir, j;
3382                 for(dir=0; dir<2; dir++){
3383                     for(i=0; i<2; i++){
3384                         for(j=0; j<2; j++){
3385                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3386                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3387                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3388                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3389                         }
3390                     }
3391                 }
3392             }
3393         }
3394     }
3395
3396     if (estimate_qp(s, 0) < 0)
3397         return -1;
3398
3399     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3400         s->qscale= 3; //reduce clipping problems
3401
3402     if (s->out_format == FMT_MJPEG) {
3403         /* for mjpeg, we do include qscale in the matrix */
3404         for(i=1;i<64;i++){
3405             int j = s->idsp.idct_permutation[i];
3406
3407             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3408         }
3409         s->y_dc_scale_table=
3410         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3411         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3412         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3413                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3414         s->qscale= 8;
3415     }
3416
3417     //FIXME var duplication
3418     s->current_picture_ptr->f->key_frame =
3419     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3420     s->current_picture_ptr->f->pict_type =
3421     s->current_picture.f->pict_type = s->pict_type;
3422
3423     if (s->current_picture.f->key_frame)
3424         s->picture_in_gop_number=0;
3425
3426     s->last_bits= put_bits_count(&s->pb);
3427     switch(s->out_format) {
3428     case FMT_MJPEG:
3429         if (CONFIG_MJPEG_ENCODER)
3430             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3431                                            s->intra_matrix);
3432         break;
3433     case FMT_H261:
3434         if (CONFIG_H261_ENCODER)
3435             ff_h261_encode_picture_header(s, picture_number);
3436         break;
3437     case FMT_H263:
3438         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3439             ff_wmv2_encode_picture_header(s, picture_number);
3440         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3441             ff_msmpeg4_encode_picture_header(s, picture_number);
3442         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3443             ff_mpeg4_encode_picture_header(s, picture_number);
3444         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3445             ret = ff_rv10_encode_picture_header(s, picture_number);
3446             if (ret < 0)
3447                 return ret;
3448         }
3449         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3450             ff_rv20_encode_picture_header(s, picture_number);
3451         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3452             ff_flv_encode_picture_header(s, picture_number);
3453         else if (CONFIG_H263_ENCODER)
3454             ff_h263_encode_picture_header(s, picture_number);
3455         break;
3456     case FMT_MPEG1:
3457         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3458             ff_mpeg1_encode_picture_header(s, picture_number);
3459         break;
3460     default:
3461         assert(0);
3462     }
3463     bits= put_bits_count(&s->pb);
3464     s->header_bits= bits - s->last_bits;
3465
3466     for(i=1; i<context_count; i++){
3467         update_duplicate_context_after_me(s->thread_context[i], s);
3468     }
3469     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3470     for(i=1; i<context_count; i++){
3471         merge_context_after_encode(s, s->thread_context[i]);
3472     }
3473     emms_c();
3474     return 0;
3475 }
3476
3477 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3478     const int intra= s->mb_intra;
3479     int i;
3480
3481     s->dct_count[intra]++;
3482
3483     for(i=0; i<64; i++){
3484         int level= block[i];
3485
3486         if(level){
3487             if(level>0){
3488                 s->dct_error_sum[intra][i] += level;
3489                 level -= s->dct_offset[intra][i];
3490                 if(level<0) level=0;
3491             }else{
3492                 s->dct_error_sum[intra][i] -= level;
3493                 level += s->dct_offset[intra][i];
3494                 if(level>0) level=0;
3495             }
3496             block[i]= level;
3497         }
3498     }
3499 }
3500
3501 static int dct_quantize_trellis_c(MpegEncContext *s,
3502                                   int16_t *block, int n,
3503                                   int qscale, int *overflow){
3504     const int *qmat;
3505     const uint8_t *scantable= s->intra_scantable.scantable;
3506     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3507     int max=0;
3508     unsigned int threshold1, threshold2;
3509     int bias=0;
3510     int run_tab[65];
3511     int level_tab[65];
3512     int score_tab[65];
3513     int survivor[65];
3514     int survivor_count;
3515     int last_run=0;
3516     int last_level=0;
3517     int last_score= 0;
3518     int last_i;
3519     int coeff[2][64];
3520     int coeff_count[64];
3521     int qmul, qadd, start_i, last_non_zero, i, dc;
3522     const int esc_length= s->ac_esc_length;
3523     uint8_t * length;
3524     uint8_t * last_length;
3525     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3526
3527     s->fdsp.fdct(block);
3528
3529     if(s->dct_error_sum)
3530         s->denoise_dct(s, block);
3531     qmul= qscale*16;
3532     qadd= ((qscale-1)|1)*8;
3533
3534     if (s->mb_intra) {
3535         int q;
3536         if (!s->h263_aic) {
3537             if (n < 4)
3538                 q = s->y_dc_scale;
3539             else
3540                 q = s->c_dc_scale;
3541             q = q << 3;
3542         } else{
3543             /* For AIC we skip quant/dequant of INTRADC */
3544             q = 1 << 3;
3545             qadd=0;
3546         }
3547
3548         /* note: block[0] is assumed to be positive */
3549         block[0] = (block[0] + (q >> 1)) / q;
3550         start_i = 1;
3551         last_non_zero = 0;
3552         qmat = s->q_intra_matrix[qscale];
3553         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3554             bias= 1<<(QMAT_SHIFT-1);
3555         length     = s->intra_ac_vlc_length;
3556         last_length= s->intra_ac_vlc_last_length;
3557     } else {
3558         start_i = 0;
3559         last_non_zero = -1;
3560         qmat = s->q_inter_matrix[qscale];
3561         length     = s->inter_ac_vlc_length;
3562         last_length= s->inter_ac_vlc_last_length;
3563     }
3564     last_i= start_i;
3565
3566     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3567     threshold2= (threshold1<<1);
3568
3569     for(i=63; i>=start_i; i--) {
3570         const int j = scantable[i];
3571         int level = block[j] * qmat[j];
3572
3573         if(((unsigned)(level+threshold1))>threshold2){
3574             last_non_zero = i;
3575             break;
3576         }
3577     }
3578
3579     for(i=start_i; i<=last_non_zero; i++) {
3580         const int j = scantable[i];
3581         int level = block[j] * qmat[j];
3582
3583 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3584 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3585         if(((unsigned)(level+threshold1))>threshold2){
3586             if(level>0){
3587                 level= (bias + level)>>QMAT_SHIFT;
3588                 coeff[0][i]= level;
3589                 coeff[1][i]= level-1;
3590 //                coeff[2][k]= level-2;
3591             }else{
3592                 level= (bias - level)>>QMAT_SHIFT;
3593                 coeff[0][i]= -level;
3594                 coeff[1][i]= -level+1;
3595 //                coeff[2][k]= -level+2;
3596             }
3597             coeff_count[i]= FFMIN(level, 2);
3598             assert(coeff_count[i]);
3599             max |=level;
3600         }else{
3601             coeff[0][i]= (level>>31)|1;
3602             coeff_count[i]= 1;
3603         }
3604     }
3605
3606     *overflow= s->max_qcoeff < max; //overflow might have happened
3607
3608     if(last_non_zero < start_i){
3609         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3610         return last_non_zero;
3611     }
3612
3613     score_tab[start_i]= 0;
3614     survivor[0]= start_i;
3615     survivor_count= 1;
3616
3617     for(i=start_i; i<=last_non_zero; i++){
3618         int level_index, j, zero_distortion;
3619         int dct_coeff= FFABS(block[ scantable[i] ]);
3620         int best_score=256*256*256*120;
3621
3622         if (s->fdsp.fdct == ff_fdct_ifast)
3623             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3624         zero_distortion= dct_coeff*dct_coeff;
3625
3626         for(level_index=0; level_index < coeff_count[i]; level_index++){
3627             int distortion;
3628             int level= coeff[level_index][i];
3629             const int alevel= FFABS(level);
3630             int unquant_coeff;
3631
3632             assert(level);
3633
3634             if(s->out_format == FMT_H263){
3635                 unquant_coeff= alevel*qmul + qadd;
3636             }else{ //MPEG1
3637                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3638                 if(s->mb_intra){
3639                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3640                         unquant_coeff =   (unquant_coeff - 1) | 1;
3641                 }else{
3642                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3643                         unquant_coeff =   (unquant_coeff - 1) | 1;
3644                 }
3645                 unquant_coeff<<= 3;
3646             }
3647
3648             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3649             level+=64;
3650             if((level&(~127)) == 0){
3651                 for(j=survivor_count-1; j>=0; j--){
3652                     int run= i - survivor[j];
3653                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3654                     score += score_tab[i-run];
3655
3656                     if(score < best_score){
3657                         best_score= score;
3658                         run_tab[i+1]= run;
3659                         level_tab[i+1]= level-64;
3660                     }
3661                 }
3662
3663                 if(s->out_format == FMT_H263){
3664                     for(j=survivor_count-1; j>=0; j--){
3665                         int run= i - survivor[j];
3666                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3667                         score += score_tab[i-run];
3668                         if(score < last_score){
3669                             last_score= score;
3670                             last_run= run;
3671                             last_level= level-64;
3672                             last_i= i+1;
3673                         }
3674                     }
3675                 }
3676             }else{
3677                 distortion += esc_length*lambda;
3678                 for(j=survivor_count-1; j>=0; j--){
3679                     int run= i - survivor[j];
3680                     int score= distortion + score_tab[i-run];
3681
3682                     if(score < best_score){
3683                         best_score= score;
3684                         run_tab[i+1]= run;
3685                         level_tab[i+1]= level-64;
3686                     }
3687                 }
3688
3689                 if(s->out_format == FMT_H263){
3690                   for(j=survivor_count-1; j>=0; j--){
3691                         int run= i - survivor[j];
3692                         int score= distortion + score_tab[i-run];
3693                         if(score < last_score){
3694                             last_score= score;
3695                             last_run= run;
3696                             last_level= level-64;
3697                             last_i= i+1;
3698                         }
3699                     }
3700                 }
3701             }
3702         }
3703
3704         score_tab[i+1]= best_score;
3705
3706         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3707         if(last_non_zero <= 27){
3708             for(; survivor_count; survivor_count--){
3709                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3710                     break;
3711             }
3712         }else{
3713             for(; survivor_count; survivor_count--){
3714                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3715                     break;
3716             }
3717         }
3718
3719         survivor[ survivor_count++ ]= i+1;
3720     }
3721
3722     if(s->out_format != FMT_H263){
3723         last_score= 256*256*256*120;
3724         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3725             int score= score_tab[i];
3726             if(i) score += lambda*2; //FIXME exacter?
3727
3728             if(score < last_score){
3729                 last_score= score;
3730                 last_i= i;
3731                 last_level= level_tab[i];
3732                 last_run= run_tab[i];
3733             }
3734         }
3735     }
3736
3737     s->coded_score[n] = last_score;
3738
3739     dc= FFABS(block[0]);
3740     last_non_zero= last_i - 1;
3741     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3742
3743     if(last_non_zero < start_i)
3744         return last_non_zero;
3745
3746     if(last_non_zero == 0 && start_i == 0){
3747         int best_level= 0;
3748         int best_score= dc * dc;
3749
3750         for(i=0; i<coeff_count[0]; i++){
3751             int level= coeff[i][0];
3752             int alevel= FFABS(level);
3753             int unquant_coeff, score, distortion;
3754
3755             if(s->out_format == FMT_H263){
3756                     unquant_coeff= (alevel*qmul + qadd)>>3;
3757             }else{ //MPEG1
3758                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3759                     unquant_coeff =   (unquant_coeff - 1) | 1;
3760             }
3761             unquant_coeff = (unquant_coeff + 4) >> 3;
3762             unquant_coeff<<= 3 + 3;
3763
3764             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3765             level+=64;
3766             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3767             else                    score= distortion + esc_length*lambda;
3768
3769             if(score < best_score){
3770                 best_score= score;
3771                 best_level= level - 64;
3772             }
3773         }
3774         block[0]= best_level;
3775         s->coded_score[n] = best_score - dc*dc;
3776         if(best_level == 0) return -1;
3777         else                return last_non_zero;
3778     }
3779
3780     i= last_i;
3781     assert(last_level);
3782
3783     block[ perm_scantable[last_non_zero] ]= last_level;
3784     i -= last_run + 1;
3785
3786     for(; i>start_i; i -= run_tab[i] + 1){
3787         block[ perm_scantable[i-1] ]= level_tab[i];
3788     }
3789
3790     return last_non_zero;
3791 }
3792
3793 //#define REFINE_STATS 1
3794 static int16_t basis[64][64];
3795
3796 static void build_basis(uint8_t *perm){
3797     int i, j, x, y;
3798     emms_c();
3799     for(i=0; i<8; i++){
3800         for(j=0; j<8; j++){
3801             for(y=0; y<8; y++){
3802                 for(x=0; x<8; x++){
3803                     double s= 0.25*(1<<BASIS_SHIFT);
3804                     int index= 8*i + j;
3805                     int perm_index= perm[index];
3806                     if(i==0) s*= sqrt(0.5);
3807                     if(j==0) s*= sqrt(0.5);
3808                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3809                 }
3810             }
3811         }
3812     }
3813 }
3814
3815 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3816                         int16_t *block, int16_t *weight, int16_t *orig,
3817                         int n, int qscale){
3818     int16_t rem[64];
3819     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3820     const uint8_t *scantable= s->intra_scantable.scantable;
3821     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3822 //    unsigned int threshold1, threshold2;
3823 //    int bias=0;
3824     int run_tab[65];
3825     int prev_run=0;
3826     int prev_level=0;
3827     int qmul, qadd, start_i, last_non_zero, i, dc;
3828     uint8_t * length;
3829     uint8_t * last_length;
3830     int lambda;
3831     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3832 #ifdef REFINE_STATS
3833 static int count=0;
3834 static int after_last=0;
3835 static int to_zero=0;
3836 static int from_zero=0;
3837 static int raise=0;
3838 static int lower=0;
3839 static int messed_sign=0;
3840 #endif
3841
3842     if(basis[0][0] == 0)
3843         build_basis(s->idsp.idct_permutation);
3844
3845     qmul= qscale*2;
3846     qadd= (qscale-1)|1;
3847     if (s->mb_intra) {
3848         if (!s->h263_aic) {
3849             if (n < 4)
3850                 q = s->y_dc_scale;
3851             else
3852                 q = s->c_dc_scale;
3853         } else{
3854             /* For AIC we skip quant/dequant of INTRADC */
3855             q = 1;
3856             qadd=0;
3857         }
3858         q <<= RECON_SHIFT-3;
3859         /* note: block[0] is assumed to be positive */
3860         dc= block[0]*q;
3861 //        block[0] = (block[0] + (q >> 1)) / q;
3862         start_i = 1;
3863 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3864 //            bias= 1<<(QMAT_SHIFT-1);
3865         length     = s->intra_ac_vlc_length;
3866         last_length= s->intra_ac_vlc_last_length;
3867     } else {
3868         dc= 0;
3869         start_i = 0;
3870         length     = s->inter_ac_vlc_length;
3871         last_length= s->inter_ac_vlc_last_length;
3872     }
3873     last_non_zero = s->block_last_index[n];
3874
3875 #ifdef REFINE_STATS
3876 {START_TIMER
3877 #endif
3878     dc += (1<<(RECON_SHIFT-1));
3879     for(i=0; i<64; i++){
3880         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3881     }
3882 #ifdef REFINE_STATS
3883 STOP_TIMER("memset rem[]")}
3884 #endif
3885     sum=0;
3886     for(i=0; i<64; i++){
3887         int one= 36;
3888         int qns=4;
3889         int w;
3890
3891         w= FFABS(weight[i]) + qns*one;
3892         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3893
3894         weight[i] = w;
3895 //        w=weight[i] = (63*qns + (w/2)) / w;
3896
3897         assert(w>0);
3898         assert(w<(1<<6));
3899         sum += w*w;
3900     }
3901     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3902 #ifdef REFINE_STATS
3903 {START_TIMER
3904 #endif
3905     run=0;
3906     rle_index=0;
3907     for(i=start_i; i<=last_non_zero; i++){
3908         int j= perm_scantable[i];
3909         const int level= block[j];
3910         int coeff;
3911
3912         if(level){
3913             if(level<0) coeff= qmul*level - qadd;
3914             else        coeff= qmul*level + qadd;
3915             run_tab[rle_index++]=run;
3916             run=0;
3917
3918             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3919         }else{
3920             run++;
3921         }
3922     }
3923 #ifdef REFINE_STATS
3924 if(last_non_zero>0){
3925 STOP_TIMER("init rem[]")
3926 }
3927 }
3928
3929 {START_TIMER
3930 #endif
3931     for(;;){
3932         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3933         int best_coeff=0;
3934         int best_change=0;
3935         int run2, best_unquant_change=0, analyze_gradient;
3936 #ifdef REFINE_STATS
3937 {START_TIMER
3938 #endif
3939         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3940
3941         if(analyze_gradient){
3942 #ifdef REFINE_STATS
3943 {START_TIMER
3944 #endif
3945             for(i=0; i<64; i++){
3946                 int w= weight[i];
3947
3948                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3949             }
3950 #ifdef REFINE_STATS
3951 STOP_TIMER("rem*w*w")}
3952 {START_TIMER
3953 #endif
3954             s->fdsp.fdct(d1);
3955 #ifdef REFINE_STATS
3956 STOP_TIMER("dct")}
3957 #endif
3958         }
3959
3960         if(start_i){
3961             const int level= block[0];
3962             int change, old_coeff;
3963
3964             assert(s->mb_intra);
3965
3966             old_coeff= q*level;
3967
3968             for(change=-1; change<=1; change+=2){
3969                 int new_level= level + change;
3970                 int score, new_coeff;
3971
3972                 new_coeff= q*new_level;
3973                 if(new_coeff >= 2048 || new_coeff < 0)
3974                     continue;
3975
3976                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3977                                                   new_coeff - old_coeff);
3978                 if(score<best_score){
3979                     best_score= score;
3980                     best_coeff= 0;
3981                     best_change= change;
3982                     best_unquant_change= new_coeff - old_coeff;
3983                 }
3984             }
3985         }
3986
3987         run=0;
3988         rle_index=0;
3989         run2= run_tab[rle_index++];
3990         prev_level=0;
3991         prev_run=0;
3992
3993         for(i=start_i; i<64; i++){
3994             int j= perm_scantable[i];
3995             const int level= block[j];
3996             int change, old_coeff;
3997
3998             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3999                 break;
4000
4001             if(level){
4002                 if(level<0) old_coeff= qmul*level - qadd;
4003                 else        old_coeff= qmul*level + qadd;
4004                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4005             }else{
4006                 old_coeff=0;
4007                 run2--;
4008                 assert(run2>=0 || i >= last_non_zero );
4009             }
4010
4011             for(change=-1; change<=1; change+=2){
4012                 int new_level= level + change;
4013                 int score, new_coeff, unquant_change;
4014
4015                 score=0;
4016                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4017                    continue;
4018
4019                 if(new_level){
4020                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4021                     else            new_coeff= qmul*new_level + qadd;
4022                     if(new_coeff >= 2048 || new_coeff <= -2048)
4023                         continue;
4024                     //FIXME check for overflow
4025
4026                     if(level){
4027                         if(level < 63 && level > -63){
4028                             if(i < last_non_zero)
4029                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4030                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4031                             else
4032                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4033                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4034                         }
4035                     }else{
4036                         assert(FFABS(new_level)==1);
4037
4038                         if(analyze_gradient){
4039                             int g= d1[ scantable[i] ];
4040                             if(g && (g^new_level) >= 0)
4041                                 continue;
4042                         }
4043
4044                         if(i < last_non_zero){
4045                             int next_i= i + run2 + 1;
4046                             int next_level= block[ perm_scantable[next_i] ] + 64;
4047
4048                             if(next_level&(~127))
4049                                 next_level= 0;
4050
4051                             if(next_i < last_non_zero)
4052                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4053                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4054                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4055                             else
4056                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4057                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4058                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4059                         }else{
4060                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4061                             if(prev_level){
4062                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4063                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4064                             }
4065                         }
4066                     }
4067                 }else{
4068                     new_coeff=0;
4069                     assert(FFABS(level)==1);
4070
4071                     if(i < last_non_zero){
4072                         int next_i= i + run2 + 1;
4073                         int next_level= block[ perm_scantable[next_i] ] + 64;
4074
4075                         if(next_level&(~127))
4076                             next_level= 0;
4077
4078                         if(next_i < last_non_zero)
4079                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4080                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4081                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4082                         else
4083                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4084                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4085                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4086                     }else{
4087                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4088                         if(prev_level){
4089                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4090                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4091                         }
4092                     }
4093                 }
4094
4095                 score *= lambda;
4096
4097                 unquant_change= new_coeff - old_coeff;
4098                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4099
4100                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4101                                                    unquant_change);
4102                 if(score<best_score){
4103                     best_score= score;
4104                     best_coeff= i;
4105                     best_change= change;
4106                     best_unquant_change= unquant_change;
4107                 }
4108             }
4109             if(level){
4110                 prev_level= level + 64;
4111                 if(prev_level&(~127))
4112                     prev_level= 0;
4113                 prev_run= run;
4114                 run=0;
4115             }else{
4116                 run++;
4117             }
4118         }
4119 #ifdef REFINE_STATS
4120 STOP_TIMER("iterative step")}
4121 #endif
4122
4123         if(best_change){
4124             int j= perm_scantable[ best_coeff ];
4125
4126             block[j] += best_change;
4127
4128             if(best_coeff > last_non_zero){
4129                 last_non_zero= best_coeff;
4130                 assert(block[j]);
4131 #ifdef REFINE_STATS
4132 after_last++;
4133 #endif
4134             }else{
4135 #ifdef REFINE_STATS
4136 if(block[j]){
4137     if(block[j] - best_change){
4138         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4139             raise++;
4140         }else{
4141             lower++;
4142         }
4143     }else{
4144         from_zero++;
4145     }
4146 }else{
4147     to_zero++;
4148 }
4149 #endif
4150                 for(; last_non_zero>=start_i; last_non_zero--){
4151                     if(block[perm_scantable[last_non_zero]])
4152                         break;
4153                 }
4154             }
4155 #ifdef REFINE_STATS
4156 count++;
4157 if(256*256*256*64 % count == 0){
4158     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4159 }
4160 #endif
4161             run=0;
4162             rle_index=0;
4163             for(i=start_i; i<=last_non_zero; i++){
4164                 int j= perm_scantable[i];
4165                 const int level= block[j];
4166
4167                  if(level){
4168                      run_tab[rle_index++]=run;
4169                      run=0;
4170                  }else{
4171                      run++;
4172                  }
4173             }
4174
4175             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4176         }else{
4177             break;
4178         }
4179     }
4180 #ifdef REFINE_STATS
4181 if(last_non_zero>0){
4182 STOP_TIMER("iterative search")
4183 }
4184 }
4185 #endif
4186
4187     return last_non_zero;
4188 }
4189
4190 int ff_dct_quantize_c(MpegEncContext *s,
4191                         int16_t *block, int n,
4192                         int qscale, int *overflow)
4193 {
4194     int i, j, level, last_non_zero, q, start_i;
4195     const int *qmat;
4196     const uint8_t *scantable= s->intra_scantable.scantable;
4197     int bias;
4198     int max=0;
4199     unsigned int threshold1, threshold2;
4200
4201     s->fdsp.fdct(block);
4202
4203     if(s->dct_error_sum)
4204         s->denoise_dct(s, block);
4205
4206     if (s->mb_intra) {
4207         if (!s->h263_aic) {
4208             if (n < 4)
4209                 q = s->y_dc_scale;
4210             else
4211                 q = s->c_dc_scale;
4212             q = q << 3;
4213         } else
4214             /* For AIC we skip quant/dequant of INTRADC */
4215             q = 1 << 3;
4216
4217         /* note: block[0] is assumed to be positive */
4218         block[0] = (block[0] + (q >> 1)) / q;
4219         start_i = 1;
4220         last_non_zero = 0;
4221         qmat = s->q_intra_matrix[qscale];
4222         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4223     } else {
4224         start_i = 0;
4225         last_non_zero = -1;
4226         qmat = s->q_inter_matrix[qscale];
4227         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4228     }
4229     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4230     threshold2= (threshold1<<1);
4231     for(i=63;i>=start_i;i--) {
4232         j = scantable[i];
4233         level = block[j] * qmat[j];
4234
4235         if(((unsigned)(level+threshold1))>threshold2){
4236             last_non_zero = i;
4237             break;
4238         }else{
4239             block[j]=0;
4240         }
4241     }
4242     for(i=start_i; i<=last_non_zero; i++) {
4243         j = scantable[i];
4244         level = block[j] * qmat[j];
4245
4246 //        if(   bias+level >= (1<<QMAT_SHIFT)
4247 //           || bias-level >= (1<<QMAT_SHIFT)){
4248         if(((unsigned)(level+threshold1))>threshold2){
4249             if(level>0){
4250                 level= (bias + level)>>QMAT_SHIFT;
4251                 block[j]= level;
4252             }else{
4253                 level= (bias - level)>>QMAT_SHIFT;
4254                 block[j]= -level;
4255             }
4256             max |=level;
4257         }else{
4258             block[j]=0;
4259         }
4260     }
4261     *overflow= s->max_qcoeff < max; //overflow might have happened
4262
4263     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4264     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4265         ff_block_permute(block, s->idsp.idct_permutation,
4266                          scantable, last_non_zero);
4267
4268     return last_non_zero;
4269 }
4270
4271 #define OFFSET(x) offsetof(MpegEncContext, x)
4272 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4273 static const AVOption h263_options[] = {
4274     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4275     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4276     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4277     FF_MPV_COMMON_OPTS
4278     { NULL },
4279 };
4280
4281 static const AVClass h263_class = {
4282     .class_name = "H.263 encoder",
4283     .item_name  = av_default_item_name,
4284     .option     = h263_options,
4285     .version    = LIBAVUTIL_VERSION_INT,
4286 };
4287
4288 AVCodec ff_h263_encoder = {
4289     .name           = "h263",
4290     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4291     .type           = AVMEDIA_TYPE_VIDEO,
4292     .id             = AV_CODEC_ID_H263,
4293     .priv_data_size = sizeof(MpegEncContext),
4294     .init           = ff_mpv_encode_init,
4295     .encode2        = ff_mpv_encode_picture,
4296     .close          = ff_mpv_encode_end,
4297     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4298     .priv_class     = &h263_class,
4299 };
4300
4301 static const AVOption h263p_options[] = {
4302     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4303     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4304     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4305     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4306     FF_MPV_COMMON_OPTS
4307     { NULL },
4308 };
4309 static const AVClass h263p_class = {
4310     .class_name = "H.263p encoder",
4311     .item_name  = av_default_item_name,
4312     .option     = h263p_options,
4313     .version    = LIBAVUTIL_VERSION_INT,
4314 };
4315
4316 AVCodec ff_h263p_encoder = {
4317     .name           = "h263p",
4318     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4319     .type           = AVMEDIA_TYPE_VIDEO,
4320     .id             = AV_CODEC_ID_H263P,
4321     .priv_data_size = sizeof(MpegEncContext),
4322     .init           = ff_mpv_encode_init,
4323     .encode2        = ff_mpv_encode_picture,
4324     .close          = ff_mpv_encode_end,
4325     .capabilities   = CODEC_CAP_SLICE_THREADS,
4326     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4327     .priv_class     = &h263p_class,
4328 };
4329
4330 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4331
4332 AVCodec ff_msmpeg4v2_encoder = {
4333     .name           = "msmpeg4v2",
4334     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4335     .type           = AVMEDIA_TYPE_VIDEO,
4336     .id             = AV_CODEC_ID_MSMPEG4V2,
4337     .priv_data_size = sizeof(MpegEncContext),
4338     .init           = ff_mpv_encode_init,
4339     .encode2        = ff_mpv_encode_picture,
4340     .close          = ff_mpv_encode_end,
4341     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4342     .priv_class     = &msmpeg4v2_class,
4343 };
4344
4345 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4346
4347 AVCodec ff_msmpeg4v3_encoder = {
4348     .name           = "msmpeg4",
4349     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4350     .type           = AVMEDIA_TYPE_VIDEO,
4351     .id             = AV_CODEC_ID_MSMPEG4V3,
4352     .priv_data_size = sizeof(MpegEncContext),
4353     .init           = ff_mpv_encode_init,
4354     .encode2        = ff_mpv_encode_picture,
4355     .close          = ff_mpv_encode_end,
4356     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4357     .priv_class     = &msmpeg4v3_class,
4358 };
4359
4360 FF_MPV_GENERIC_CLASS(wmv1)
4361
4362 AVCodec ff_wmv1_encoder = {
4363     .name           = "wmv1",
4364     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4365     .type           = AVMEDIA_TYPE_VIDEO,
4366     .id             = AV_CODEC_ID_WMV1,
4367     .priv_data_size = sizeof(MpegEncContext),
4368     .init           = ff_mpv_encode_init,
4369     .encode2        = ff_mpv_encode_picture,
4370     .close          = ff_mpv_encode_end,
4371     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4372     .priv_class     = &wmv1_class,
4373 };