]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mpegvideo_enc: Draw edges on input for non-multiple of 16 resolutions
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60
61 #define QUANT_BIAS_SHIFT 8
62
63 #define QMAT_SHIFT_MMX 16
64 #define QMAT_SHIFT 22
65
66 static int encode_picture(MpegEncContext *s, int picture_number);
67 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
68 static int sse_mb(MpegEncContext *s);
69 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
70 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
71
72 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
73 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
74
75 const AVOption ff_mpv_generic_options[] = {
76     FF_MPV_COMMON_OPTS
77     { NULL },
78 };
79
80 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
81                        uint16_t (*qmat16)[2][64],
82                        const uint16_t *quant_matrix,
83                        int bias, int qmin, int qmax, int intra)
84 {
85     FDCTDSPContext *fdsp = &s->fdsp;
86     int qscale;
87     int shift = 0;
88
89     for (qscale = qmin; qscale <= qmax; qscale++) {
90         int i;
91         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
92 #if CONFIG_FAANDCT
93             fdsp->fdct == ff_faandct            ||
94 #endif /* CONFIG_FAANDCT */
95             fdsp->fdct == ff_jpeg_fdct_islow_10) {
96             for (i = 0; i < 64; i++) {
97                 const int j = s->idsp.idct_permutation[i];
98                 int64_t den = (int64_t) qscale * quant_matrix[j];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
106             }
107         } else if (fdsp->fdct == ff_fdct_ifast) {
108             for (i = 0; i < 64; i++) {
109                 const int j = s->idsp.idct_permutation[i];
110                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
111                 /* 16 <= qscale * quant_matrix[i] <= 7905
112                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
113                  *             19952 <=              x  <= 249205026
114                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
115                  *           3444240 >= (1 << 36) / (x) >= 275 */
116
117                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
118             }
119         } else {
120             for (i = 0; i < 64; i++) {
121                 const int j = s->idsp.idct_permutation[i];
122                 int64_t den = (int64_t) qscale * quant_matrix[j];
123                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
124                  * Assume x = qscale * quant_matrix[i]
125                  * So             16 <=              x  <= 7905
126                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
127                  * so          32768 >= (1 << 19) / (x) >= 67 */
128                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
129                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
130                 //                    (qscale * quant_matrix[i]);
131                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
132
133                 if (qmat16[qscale][0][i] == 0 ||
134                     qmat16[qscale][0][i] == 128 * 256)
135                     qmat16[qscale][0][i] = 128 * 256 - 1;
136                 qmat16[qscale][1][i] =
137                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
138                                 qmat16[qscale][0][i]);
139             }
140         }
141
142         for (i = intra; i < 64; i++) {
143             int64_t max = 8191;
144             if (fdsp->fdct == ff_fdct_ifast) {
145                 max = (8191LL * ff_aanscales[i]) >> 14;
146             }
147             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
148                 shift++;
149             }
150         }
151     }
152     if (shift) {
153         av_log(NULL, AV_LOG_INFO,
154                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
155                QMAT_SHIFT - shift);
156     }
157 }
158
159 static inline void update_qscale(MpegEncContext *s)
160 {
161     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
162                 (FF_LAMBDA_SHIFT + 7);
163     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
164
165     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
166                  FF_LAMBDA_SHIFT;
167 }
168
169 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
170 {
171     int i;
172
173     if (matrix) {
174         put_bits(pb, 1, 1);
175         for (i = 0; i < 64; i++) {
176             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
177         }
178     } else
179         put_bits(pb, 1, 0);
180 }
181
182 /**
183  * init s->current_picture.qscale_table from s->lambda_table
184  */
185 void ff_init_qscale_tab(MpegEncContext *s)
186 {
187     int8_t * const qscale_table = s->current_picture.qscale_table;
188     int i;
189
190     for (i = 0; i < s->mb_num; i++) {
191         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
192         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
193         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
194                                                   s->avctx->qmax);
195     }
196 }
197
198 static void update_duplicate_context_after_me(MpegEncContext *dst,
199                                               MpegEncContext *src)
200 {
201 #define COPY(a) dst->a= src->a
202     COPY(pict_type);
203     COPY(current_picture);
204     COPY(f_code);
205     COPY(b_code);
206     COPY(qscale);
207     COPY(lambda);
208     COPY(lambda2);
209     COPY(picture_in_gop_number);
210     COPY(gop_picture_number);
211     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
212     COPY(progressive_frame);    // FIXME don't set in encode_header
213     COPY(partitioned_frame);    // FIXME don't set in encode_header
214 #undef COPY
215 }
216
217 /**
218  * Set the given MpegEncContext to defaults for encoding.
219  * the changed fields will not depend upon the prior state of the MpegEncContext.
220  */
221 static void mpv_encode_defaults(MpegEncContext *s)
222 {
223     int i;
224     ff_mpv_common_defaults(s);
225
226     for (i = -16; i < 16; i++) {
227         default_fcode_tab[i + MAX_MV] = 1;
228     }
229     s->me.mv_penalty = default_mv_penalty;
230     s->fcode_tab     = default_fcode_tab;
231
232     s->input_picture_number  = 0;
233     s->picture_in_gop_number = 0;
234 }
235
236 /* init video encoder */
237 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
238 {
239     MpegEncContext *s = avctx->priv_data;
240     int i, ret, format_supported;
241
242     mpv_encode_defaults(s);
243
244     switch (avctx->codec_id) {
245     case AV_CODEC_ID_MPEG2VIDEO:
246         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
247             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
248             av_log(avctx, AV_LOG_ERROR,
249                    "only YUV420 and YUV422 are supported\n");
250             return -1;
251         }
252         break;
253     case AV_CODEC_ID_MJPEG:
254         format_supported = 0;
255         /* JPEG color space */
256         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
257             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
258             (avctx->color_range == AVCOL_RANGE_JPEG &&
259              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
260               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
261             format_supported = 1;
262         /* MPEG color space */
263         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
264                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
265                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
266             format_supported = 1;
267
268         if (!format_supported) {
269             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
270             return -1;
271         }
272         break;
273     default:
274         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
275             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
276             return -1;
277         }
278     }
279
280     switch (avctx->pix_fmt) {
281     case AV_PIX_FMT_YUVJ422P:
282     case AV_PIX_FMT_YUV422P:
283         s->chroma_format = CHROMA_422;
284         break;
285     case AV_PIX_FMT_YUVJ420P:
286     case AV_PIX_FMT_YUV420P:
287     default:
288         s->chroma_format = CHROMA_420;
289         break;
290     }
291
292     s->bit_rate = avctx->bit_rate;
293     s->width    = avctx->width;
294     s->height   = avctx->height;
295     if (avctx->gop_size > 600 &&
296         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
297         av_log(avctx, AV_LOG_ERROR,
298                "Warning keyframe interval too large! reducing it ...\n");
299         avctx->gop_size = 600;
300     }
301     s->gop_size     = avctx->gop_size;
302     s->avctx        = avctx;
303     s->flags        = avctx->flags;
304     s->flags2       = avctx->flags2;
305     if (avctx->max_b_frames > MAX_B_FRAMES) {
306         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
307                "is %d.\n", MAX_B_FRAMES);
308     }
309     s->max_b_frames = avctx->max_b_frames;
310     s->codec_id     = avctx->codec->id;
311     s->strict_std_compliance = avctx->strict_std_compliance;
312     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
313     s->mpeg_quant         = avctx->mpeg_quant;
314     s->rtp_mode           = !!avctx->rtp_payload_size;
315     s->intra_dc_precision = avctx->intra_dc_precision;
316     s->user_specified_pts = AV_NOPTS_VALUE;
317
318     if (s->gop_size <= 1) {
319         s->intra_only = 1;
320         s->gop_size   = 12;
321     } else {
322         s->intra_only = 0;
323     }
324
325     s->me_method = avctx->me_method;
326
327     /* Fixed QSCALE */
328     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
329
330 #if FF_API_MPV_OPT
331     FF_DISABLE_DEPRECATION_WARNINGS
332     if (avctx->border_masking != 0.0)
333         s->border_masking = avctx->border_masking;
334     FF_ENABLE_DEPRECATION_WARNINGS
335 #endif
336
337     s->adaptive_quant = (s->avctx->lumi_masking ||
338                          s->avctx->dark_masking ||
339                          s->avctx->temporal_cplx_masking ||
340                          s->avctx->spatial_cplx_masking  ||
341                          s->avctx->p_masking      ||
342                          s->border_masking ||
343                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
344                         !s->fixed_qscale;
345
346     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
347
348     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
349         av_log(avctx, AV_LOG_ERROR,
350                "a vbv buffer size is needed, "
351                "for encoding with a maximum bitrate\n");
352         return -1;
353     }
354
355     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
356         av_log(avctx, AV_LOG_INFO,
357                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
358     }
359
360     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
361         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
362         return -1;
363     }
364
365     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
366         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
367         return -1;
368     }
369
370     if (avctx->rc_max_rate &&
371         avctx->rc_max_rate == avctx->bit_rate &&
372         avctx->rc_max_rate != avctx->rc_min_rate) {
373         av_log(avctx, AV_LOG_INFO,
374                "impossible bitrate constraints, this will fail\n");
375     }
376
377     if (avctx->rc_buffer_size &&
378         avctx->bit_rate * (int64_t)avctx->time_base.num >
379             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
380         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
381         return -1;
382     }
383
384     if (!s->fixed_qscale &&
385         avctx->bit_rate * av_q2d(avctx->time_base) >
386             avctx->bit_rate_tolerance) {
387         av_log(avctx, AV_LOG_ERROR,
388                "bitrate tolerance too small for bitrate\n");
389         return -1;
390     }
391
392     if (s->avctx->rc_max_rate &&
393         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
394         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
395          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
396         90000LL * (avctx->rc_buffer_size - 1) >
397             s->avctx->rc_max_rate * 0xFFFFLL) {
398         av_log(avctx, AV_LOG_INFO,
399                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
400                "specified vbv buffer is too large for the given bitrate!\n");
401     }
402
403     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
404         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
405         s->codec_id != AV_CODEC_ID_FLV1) {
406         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
407         return -1;
408     }
409
410     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
411         av_log(avctx, AV_LOG_ERROR,
412                "OBMC is only supported with simple mb decision\n");
413         return -1;
414     }
415
416     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
417         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
418         return -1;
419     }
420
421     if (s->max_b_frames                    &&
422         s->codec_id != AV_CODEC_ID_MPEG4      &&
423         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
424         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
425         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
426         return -1;
427     }
428
429     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
430          s->codec_id == AV_CODEC_ID_H263  ||
431          s->codec_id == AV_CODEC_ID_H263P) &&
432         (avctx->sample_aspect_ratio.num > 255 ||
433          avctx->sample_aspect_ratio.den > 255)) {
434         av_log(avctx, AV_LOG_ERROR,
435                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
436                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
437         return -1;
438     }
439
440     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
441         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
442         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
443         return -1;
444     }
445
446     // FIXME mpeg2 uses that too
447     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
448         av_log(avctx, AV_LOG_ERROR,
449                "mpeg2 style quantization not supported by codec\n");
450         return -1;
451     }
452
453     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
454         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
455         return -1;
456     }
457
458     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
459         s->avctx->mb_decision != FF_MB_DECISION_RD) {
460         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
461         return -1;
462     }
463
464     if (s->avctx->scenechange_threshold < 1000000000 &&
465         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
466         av_log(avctx, AV_LOG_ERROR,
467                "closed gop with scene change detection are not supported yet, "
468                "set threshold to 1000000000\n");
469         return -1;
470     }
471
472     if (s->flags & CODEC_FLAG_LOW_DELAY) {
473         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
474             av_log(avctx, AV_LOG_ERROR,
475                   "low delay forcing is only available for mpeg2\n");
476             return -1;
477         }
478         if (s->max_b_frames != 0) {
479             av_log(avctx, AV_LOG_ERROR,
480                    "b frames cannot be used with low delay\n");
481             return -1;
482         }
483     }
484
485     if (s->q_scale_type == 1) {
486         if (avctx->qmax > 12) {
487             av_log(avctx, AV_LOG_ERROR,
488                    "non linear quant only supports qmax <= 12 currently\n");
489             return -1;
490         }
491     }
492
493     if (s->avctx->thread_count > 1         &&
494         s->codec_id != AV_CODEC_ID_MPEG4      &&
495         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
496         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
497         (s->codec_id != AV_CODEC_ID_H263P)) {
498         av_log(avctx, AV_LOG_ERROR,
499                "multi threaded encoding not supported by codec\n");
500         return -1;
501     }
502
503     if (s->avctx->thread_count < 1) {
504         av_log(avctx, AV_LOG_ERROR,
505                "automatic thread number detection not supported by codec,"
506                "patch welcome\n");
507         return -1;
508     }
509
510     if (s->avctx->thread_count > 1)
511         s->rtp_mode = 1;
512
513     if (!avctx->time_base.den || !avctx->time_base.num) {
514         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
515         return -1;
516     }
517
518     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
519         av_log(avctx, AV_LOG_INFO,
520                "notice: b_frame_strategy only affects the first pass\n");
521         avctx->b_frame_strategy = 0;
522     }
523
524     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
525     if (i > 1) {
526         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
527         avctx->time_base.den /= i;
528         avctx->time_base.num /= i;
529         //return -1;
530     }
531
532     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
533         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
534         // (a + x * 3 / 8) / x
535         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
536         s->inter_quant_bias = 0;
537     } else {
538         s->intra_quant_bias = 0;
539         // (a - x / 4) / x
540         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
541     }
542
543     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
544         s->intra_quant_bias = avctx->intra_quant_bias;
545     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
546         s->inter_quant_bias = avctx->inter_quant_bias;
547
548     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
549         s->avctx->time_base.den > (1 << 16) - 1) {
550         av_log(avctx, AV_LOG_ERROR,
551                "timebase %d/%d not supported by MPEG 4 standard, "
552                "the maximum admitted value for the timebase denominator "
553                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
554                (1 << 16) - 1);
555         return -1;
556     }
557     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
558
559     switch (avctx->codec->id) {
560     case AV_CODEC_ID_MPEG1VIDEO:
561         s->out_format = FMT_MPEG1;
562         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
563         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
564         break;
565     case AV_CODEC_ID_MPEG2VIDEO:
566         s->out_format = FMT_MPEG1;
567         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
568         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
569         s->rtp_mode   = 1;
570         break;
571     case AV_CODEC_ID_MJPEG:
572         s->out_format = FMT_MJPEG;
573         s->intra_only = 1; /* force intra only for jpeg */
574         if (!CONFIG_MJPEG_ENCODER ||
575             ff_mjpeg_encode_init(s) < 0)
576             return -1;
577         avctx->delay = 0;
578         s->low_delay = 1;
579         break;
580     case AV_CODEC_ID_H261:
581         if (!CONFIG_H261_ENCODER)
582             return -1;
583         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
584             av_log(avctx, AV_LOG_ERROR,
585                    "The specified picture size of %dx%d is not valid for the "
586                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
587                     s->width, s->height);
588             return -1;
589         }
590         s->out_format = FMT_H261;
591         avctx->delay  = 0;
592         s->low_delay  = 1;
593         s->rtp_mode   = 0; /* Sliced encoding not supported */
594         break;
595     case AV_CODEC_ID_H263:
596         if (!CONFIG_H263_ENCODER)
597         return -1;
598         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
599                              s->width, s->height) == 8) {
600             av_log(avctx, AV_LOG_INFO,
601                    "The specified picture size of %dx%d is not valid for "
602                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
603                    "352x288, 704x576, and 1408x1152."
604                    "Try H.263+.\n", s->width, s->height);
605             return -1;
606         }
607         s->out_format = FMT_H263;
608         avctx->delay  = 0;
609         s->low_delay  = 1;
610         break;
611     case AV_CODEC_ID_H263P:
612         s->out_format = FMT_H263;
613         s->h263_plus  = 1;
614         /* Fx */
615         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
616         s->modified_quant  = s->h263_aic;
617         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
618         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
619
620         /* /Fx */
621         /* These are just to be sure */
622         avctx->delay = 0;
623         s->low_delay = 1;
624         break;
625     case AV_CODEC_ID_FLV1:
626         s->out_format      = FMT_H263;
627         s->h263_flv        = 2; /* format = 1; 11-bit codes */
628         s->unrestricted_mv = 1;
629         s->rtp_mode  = 0; /* don't allow GOB */
630         avctx->delay = 0;
631         s->low_delay = 1;
632         break;
633     case AV_CODEC_ID_RV10:
634         s->out_format = FMT_H263;
635         avctx->delay  = 0;
636         s->low_delay  = 1;
637         break;
638     case AV_CODEC_ID_RV20:
639         s->out_format      = FMT_H263;
640         avctx->delay       = 0;
641         s->low_delay       = 1;
642         s->modified_quant  = 1;
643         s->h263_aic        = 1;
644         s->h263_plus       = 1;
645         s->loop_filter     = 1;
646         s->unrestricted_mv = 0;
647         break;
648     case AV_CODEC_ID_MPEG4:
649         s->out_format      = FMT_H263;
650         s->h263_pred       = 1;
651         s->unrestricted_mv = 1;
652         s->low_delay       = s->max_b_frames ? 0 : 1;
653         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
654         break;
655     case AV_CODEC_ID_MSMPEG4V2:
656         s->out_format      = FMT_H263;
657         s->h263_pred       = 1;
658         s->unrestricted_mv = 1;
659         s->msmpeg4_version = 2;
660         avctx->delay       = 0;
661         s->low_delay       = 1;
662         break;
663     case AV_CODEC_ID_MSMPEG4V3:
664         s->out_format        = FMT_H263;
665         s->h263_pred         = 1;
666         s->unrestricted_mv   = 1;
667         s->msmpeg4_version   = 3;
668         s->flipflop_rounding = 1;
669         avctx->delay         = 0;
670         s->low_delay         = 1;
671         break;
672     case AV_CODEC_ID_WMV1:
673         s->out_format        = FMT_H263;
674         s->h263_pred         = 1;
675         s->unrestricted_mv   = 1;
676         s->msmpeg4_version   = 4;
677         s->flipflop_rounding = 1;
678         avctx->delay         = 0;
679         s->low_delay         = 1;
680         break;
681     case AV_CODEC_ID_WMV2:
682         s->out_format        = FMT_H263;
683         s->h263_pred         = 1;
684         s->unrestricted_mv   = 1;
685         s->msmpeg4_version   = 5;
686         s->flipflop_rounding = 1;
687         avctx->delay         = 0;
688         s->low_delay         = 1;
689         break;
690     default:
691         return -1;
692     }
693
694     avctx->has_b_frames = !s->low_delay;
695
696     s->encoding = 1;
697
698     s->progressive_frame    =
699     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
700                                                 CODEC_FLAG_INTERLACED_ME) ||
701                                 s->alternate_scan);
702
703     /* init */
704     ff_mpv_idct_init(s);
705     if (ff_mpv_common_init(s) < 0)
706         return -1;
707
708     if (ARCH_X86)
709         ff_mpv_encode_init_x86(s);
710
711     ff_fdctdsp_init(&s->fdsp, avctx);
712     ff_me_cmp_init(&s->mecc, avctx);
713     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
714     ff_pixblockdsp_init(&s->pdsp, avctx);
715     ff_qpeldsp_init(&s->qdsp);
716
717     s->avctx->coded_frame = s->current_picture.f;
718
719     if (s->msmpeg4_version) {
720         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
721                           2 * 2 * (MAX_LEVEL + 1) *
722                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
723     }
724     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
725
726     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
727     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
728     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
729     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
730     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
731                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
732     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
733                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
734
735     if (s->avctx->noise_reduction) {
736         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
737                           2 * 64 * sizeof(uint16_t), fail);
738     }
739
740     if (CONFIG_H263_ENCODER)
741         ff_h263dsp_init(&s->h263dsp);
742     if (!s->dct_quantize)
743         s->dct_quantize = ff_dct_quantize_c;
744     if (!s->denoise_dct)
745         s->denoise_dct  = denoise_dct_c;
746     s->fast_dct_quantize = s->dct_quantize;
747     if (avctx->trellis)
748         s->dct_quantize  = dct_quantize_trellis_c;
749
750     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
751         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
752
753     s->quant_precision = 5;
754
755     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
756     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
757
758     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
759         ff_h261_encode_init(s);
760     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
761         ff_h263_encode_init(s);
762     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
763         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
764             return ret;
765     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
766         && s->out_format == FMT_MPEG1)
767         ff_mpeg1_encode_init(s);
768
769     /* init q matrix */
770     for (i = 0; i < 64; i++) {
771         int j = s->idsp.idct_permutation[i];
772         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
773             s->mpeg_quant) {
774             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
775             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
776         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
777             s->intra_matrix[j] =
778             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
779         } else {
780             /* mpeg1/2 */
781             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
782             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
783         }
784         if (s->avctx->intra_matrix)
785             s->intra_matrix[j] = s->avctx->intra_matrix[i];
786         if (s->avctx->inter_matrix)
787             s->inter_matrix[j] = s->avctx->inter_matrix[i];
788     }
789
790     /* precompute matrix */
791     /* for mjpeg, we do include qscale in the matrix */
792     if (s->out_format != FMT_MJPEG) {
793         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
794                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
795                           31, 1);
796         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
797                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
798                           31, 0);
799     }
800
801     if (ff_rate_control_init(s) < 0)
802         return -1;
803
804 #if FF_API_ERROR_RATE
805     FF_DISABLE_DEPRECATION_WARNINGS
806     if (avctx->error_rate)
807         s->error_rate = avctx->error_rate;
808     FF_ENABLE_DEPRECATION_WARNINGS;
809 #endif
810
811 #if FF_API_NORMALIZE_AQP
812     FF_DISABLE_DEPRECATION_WARNINGS
813     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
814         s->mpv_flags |= FF_MPV_FLAG_NAQ;
815     FF_ENABLE_DEPRECATION_WARNINGS;
816 #endif
817
818 #if FF_API_MV0
819     FF_DISABLE_DEPRECATION_WARNINGS
820     if (avctx->flags & CODEC_FLAG_MV0)
821         s->mpv_flags |= FF_MPV_FLAG_MV0;
822     FF_ENABLE_DEPRECATION_WARNINGS
823 #endif
824
825 #if FF_API_MPV_OPT
826     FF_DISABLE_DEPRECATION_WARNINGS
827     if (avctx->rc_qsquish != 0.0)
828         s->rc_qsquish = avctx->rc_qsquish;
829     if (avctx->rc_qmod_amp != 0.0)
830         s->rc_qmod_amp = avctx->rc_qmod_amp;
831     if (avctx->rc_qmod_freq)
832         s->rc_qmod_freq = avctx->rc_qmod_freq;
833     if (avctx->rc_buffer_aggressivity != 1.0)
834         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
835     if (avctx->rc_initial_cplx != 0.0)
836         s->rc_initial_cplx = avctx->rc_initial_cplx;
837     if (avctx->lmin)
838         s->lmin = avctx->lmin;
839     if (avctx->lmax)
840         s->lmax = avctx->lmax;
841
842     if (avctx->rc_eq) {
843         av_freep(&s->rc_eq);
844         s->rc_eq = av_strdup(avctx->rc_eq);
845         if (!s->rc_eq)
846             return AVERROR(ENOMEM);
847     }
848     FF_ENABLE_DEPRECATION_WARNINGS
849 #endif
850
851     if (avctx->b_frame_strategy == 2) {
852         for (i = 0; i < s->max_b_frames + 2; i++) {
853             s->tmp_frames[i] = av_frame_alloc();
854             if (!s->tmp_frames[i])
855                 return AVERROR(ENOMEM);
856
857             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
858             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
859             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
860
861             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
862             if (ret < 0)
863                 return ret;
864         }
865     }
866
867     return 0;
868 fail:
869     ff_mpv_encode_end(avctx);
870     return AVERROR_UNKNOWN;
871 }
872
873 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
874 {
875     MpegEncContext *s = avctx->priv_data;
876     int i;
877
878     ff_rate_control_uninit(s);
879
880     ff_mpv_common_end(s);
881     if (CONFIG_MJPEG_ENCODER &&
882         s->out_format == FMT_MJPEG)
883         ff_mjpeg_encode_close(s);
884
885     av_freep(&avctx->extradata);
886
887     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
888         av_frame_free(&s->tmp_frames[i]);
889
890     ff_free_picture_tables(&s->new_picture);
891     ff_mpeg_unref_picture(s, &s->new_picture);
892
893     av_freep(&s->avctx->stats_out);
894     av_freep(&s->ac_stats);
895
896     av_freep(&s->q_intra_matrix);
897     av_freep(&s->q_inter_matrix);
898     av_freep(&s->q_intra_matrix16);
899     av_freep(&s->q_inter_matrix16);
900     av_freep(&s->input_picture);
901     av_freep(&s->reordered_input_picture);
902     av_freep(&s->dct_offset);
903
904     return 0;
905 }
906
907 static int get_sae(uint8_t *src, int ref, int stride)
908 {
909     int x,y;
910     int acc = 0;
911
912     for (y = 0; y < 16; y++) {
913         for (x = 0; x < 16; x++) {
914             acc += FFABS(src[x + y * stride] - ref);
915         }
916     }
917
918     return acc;
919 }
920
921 static int get_intra_count(MpegEncContext *s, uint8_t *src,
922                            uint8_t *ref, int stride)
923 {
924     int x, y, w, h;
925     int acc = 0;
926
927     w = s->width  & ~15;
928     h = s->height & ~15;
929
930     for (y = 0; y < h; y += 16) {
931         for (x = 0; x < w; x += 16) {
932             int offset = x + y * stride;
933             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
934                                       stride, 16);
935             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
936             int sae  = get_sae(src + offset, mean, stride);
937
938             acc += sae + 500 < sad;
939         }
940     }
941     return acc;
942 }
943
944
945 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
946 {
947     Picture *pic = NULL;
948     int64_t pts;
949     int i, display_picture_number = 0, ret;
950     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
951                                                  (s->low_delay ? 0 : 1);
952     int direct = 1;
953
954     if (pic_arg) {
955         pts = pic_arg->pts;
956         display_picture_number = s->input_picture_number++;
957
958         if (pts != AV_NOPTS_VALUE) {
959             if (s->user_specified_pts != AV_NOPTS_VALUE) {
960                 int64_t time = pts;
961                 int64_t last = s->user_specified_pts;
962
963                 if (time <= last) {
964                     av_log(s->avctx, AV_LOG_ERROR,
965                            "Error, Invalid timestamp=%"PRId64", "
966                            "last=%"PRId64"\n", pts, s->user_specified_pts);
967                     return -1;
968                 }
969
970                 if (!s->low_delay && display_picture_number == 1)
971                     s->dts_delta = time - last;
972             }
973             s->user_specified_pts = pts;
974         } else {
975             if (s->user_specified_pts != AV_NOPTS_VALUE) {
976                 s->user_specified_pts =
977                 pts = s->user_specified_pts + 1;
978                 av_log(s->avctx, AV_LOG_INFO,
979                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
980                        pts);
981             } else {
982                 pts = display_picture_number;
983             }
984         }
985     }
986
987     if (pic_arg) {
988         if (!pic_arg->buf[0] ||
989             pic_arg->linesize[0] != s->linesize ||
990             pic_arg->linesize[1] != s->uvlinesize ||
991             pic_arg->linesize[2] != s->uvlinesize)
992             direct = 0;
993         if ((s->width & 15) || (s->height & 15))
994             direct = 0;
995
996         av_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
997                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
998
999         i = ff_find_unused_picture(s, direct);
1000         if (i < 0)
1001             return i;
1002
1003         pic = &s->picture[i];
1004         pic->reference = 3;
1005
1006         if (direct) {
1007             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1008                 return ret;
1009             if (ff_alloc_picture(s, pic, 1) < 0) {
1010                 return -1;
1011             }
1012         } else {
1013             if (ff_alloc_picture(s, pic, 0) < 0) {
1014                 return -1;
1015             }
1016
1017             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1018                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1019                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1020                 // empty
1021             } else {
1022                 int h_chroma_shift, v_chroma_shift;
1023                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1024                                                  &h_chroma_shift,
1025                                                  &v_chroma_shift);
1026
1027                 for (i = 0; i < 3; i++) {
1028                     int src_stride = pic_arg->linesize[i];
1029                     int dst_stride = i ? s->uvlinesize : s->linesize;
1030                     int h_shift = i ? h_chroma_shift : 0;
1031                     int v_shift = i ? v_chroma_shift : 0;
1032                     int w = s->width  >> h_shift;
1033                     int h = s->height >> v_shift;
1034                     uint8_t *src = pic_arg->data[i];
1035                     uint8_t *dst = pic->f->data[i];
1036
1037                     if (!s->avctx->rc_buffer_size)
1038                         dst += INPLACE_OFFSET;
1039
1040                     if (src_stride == dst_stride)
1041                         memcpy(dst, src, src_stride * h);
1042                     else {
1043                         int h2 = h;
1044                         uint8_t *dst2 = dst;
1045                         while (h2--) {
1046                             memcpy(dst2, src, w);
1047                             dst2 += dst_stride;
1048                             src += src_stride;
1049                         }
1050                     }
1051                     if ((s->width & 15) || (s->height & 15)) {
1052                         s->mpvencdsp.draw_edges(dst, dst_stride,
1053                                                 w, h,
1054                                                 16 >> h_shift,
1055                                                 16 >> v_shift,
1056                                                 EDGE_BOTTOM);
1057                     }
1058                 }
1059             }
1060         }
1061         ret = av_frame_copy_props(pic->f, pic_arg);
1062         if (ret < 0)
1063             return ret;
1064
1065         pic->f->display_picture_number = display_picture_number;
1066         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1067     }
1068
1069     /* shift buffer entries */
1070     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1071         s->input_picture[i - 1] = s->input_picture[i];
1072
1073     s->input_picture[encoding_delay] = (Picture*) pic;
1074
1075     return 0;
1076 }
1077
1078 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1079 {
1080     int x, y, plane;
1081     int score = 0;
1082     int64_t score64 = 0;
1083
1084     for (plane = 0; plane < 3; plane++) {
1085         const int stride = p->f->linesize[plane];
1086         const int bw = plane ? 1 : 2;
1087         for (y = 0; y < s->mb_height * bw; y++) {
1088             for (x = 0; x < s->mb_width * bw; x++) {
1089                 int off = p->shared ? 0 : 16;
1090                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1091                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1092                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1093
1094                 switch (s->avctx->frame_skip_exp) {
1095                 case 0: score    =  FFMAX(score, v);          break;
1096                 case 1: score   += FFABS(v);                  break;
1097                 case 2: score   += v * v;                     break;
1098                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1099                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1100                 }
1101             }
1102         }
1103     }
1104
1105     if (score)
1106         score64 = score;
1107
1108     if (score64 < s->avctx->frame_skip_threshold)
1109         return 1;
1110     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1111         return 1;
1112     return 0;
1113 }
1114
1115 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1116 {
1117     AVPacket pkt = { 0 };
1118     int ret, got_output;
1119
1120     av_init_packet(&pkt);
1121     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1122     if (ret < 0)
1123         return ret;
1124
1125     ret = pkt.size;
1126     av_free_packet(&pkt);
1127     return ret;
1128 }
1129
1130 static int estimate_best_b_count(MpegEncContext *s)
1131 {
1132     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1133     AVCodecContext *c = avcodec_alloc_context3(NULL);
1134     const int scale = s->avctx->brd_scale;
1135     int i, j, out_size, p_lambda, b_lambda, lambda2;
1136     int64_t best_rd  = INT64_MAX;
1137     int best_b_count = -1;
1138
1139     assert(scale >= 0 && scale <= 3);
1140
1141     //emms_c();
1142     //s->next_picture_ptr->quality;
1143     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1144     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1145     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1146     if (!b_lambda) // FIXME we should do this somewhere else
1147         b_lambda = p_lambda;
1148     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1149                FF_LAMBDA_SHIFT;
1150
1151     c->width        = s->width  >> scale;
1152     c->height       = s->height >> scale;
1153     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1154     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1155     c->mb_decision  = s->avctx->mb_decision;
1156     c->me_cmp       = s->avctx->me_cmp;
1157     c->mb_cmp       = s->avctx->mb_cmp;
1158     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1159     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1160     c->time_base    = s->avctx->time_base;
1161     c->max_b_frames = s->max_b_frames;
1162
1163     if (avcodec_open2(c, codec, NULL) < 0)
1164         return -1;
1165
1166     for (i = 0; i < s->max_b_frames + 2; i++) {
1167         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1168                                                 s->next_picture_ptr;
1169
1170         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1171             pre_input = *pre_input_ptr;
1172
1173             if (!pre_input.shared && i) {
1174                 pre_input.f->data[0] += INPLACE_OFFSET;
1175                 pre_input.f->data[1] += INPLACE_OFFSET;
1176                 pre_input.f->data[2] += INPLACE_OFFSET;
1177             }
1178
1179             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1180                                        s->tmp_frames[i]->linesize[0],
1181                                        pre_input.f->data[0],
1182                                        pre_input.f->linesize[0],
1183                                        c->width, c->height);
1184             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1185                                        s->tmp_frames[i]->linesize[1],
1186                                        pre_input.f->data[1],
1187                                        pre_input.f->linesize[1],
1188                                        c->width >> 1, c->height >> 1);
1189             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1190                                        s->tmp_frames[i]->linesize[2],
1191                                        pre_input.f->data[2],
1192                                        pre_input.f->linesize[2],
1193                                        c->width >> 1, c->height >> 1);
1194         }
1195     }
1196
1197     for (j = 0; j < s->max_b_frames + 1; j++) {
1198         int64_t rd = 0;
1199
1200         if (!s->input_picture[j])
1201             break;
1202
1203         c->error[0] = c->error[1] = c->error[2] = 0;
1204
1205         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1206         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1207
1208         out_size = encode_frame(c, s->tmp_frames[0]);
1209
1210         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1211
1212         for (i = 0; i < s->max_b_frames + 1; i++) {
1213             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1214
1215             s->tmp_frames[i + 1]->pict_type = is_p ?
1216                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1217             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1218
1219             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1220
1221             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1222         }
1223
1224         /* get the delayed frames */
1225         while (out_size) {
1226             out_size = encode_frame(c, NULL);
1227             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1228         }
1229
1230         rd += c->error[0] + c->error[1] + c->error[2];
1231
1232         if (rd < best_rd) {
1233             best_rd = rd;
1234             best_b_count = j;
1235         }
1236     }
1237
1238     avcodec_close(c);
1239     av_freep(&c);
1240
1241     return best_b_count;
1242 }
1243
1244 static int select_input_picture(MpegEncContext *s)
1245 {
1246     int i, ret;
1247
1248     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1249         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1250     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1251
1252     /* set next picture type & ordering */
1253     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1254         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1255             !s->next_picture_ptr || s->intra_only) {
1256             s->reordered_input_picture[0] = s->input_picture[0];
1257             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1258             s->reordered_input_picture[0]->f->coded_picture_number =
1259                 s->coded_picture_number++;
1260         } else {
1261             int b_frames;
1262
1263             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1264                 if (s->picture_in_gop_number < s->gop_size &&
1265                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1266                     // FIXME check that te gop check above is +-1 correct
1267                     av_frame_unref(s->input_picture[0]->f);
1268
1269                     emms_c();
1270                     ff_vbv_update(s, 0);
1271
1272                     goto no_output_pic;
1273                 }
1274             }
1275
1276             if (s->flags & CODEC_FLAG_PASS2) {
1277                 for (i = 0; i < s->max_b_frames + 1; i++) {
1278                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1279
1280                     if (pict_num >= s->rc_context.num_entries)
1281                         break;
1282                     if (!s->input_picture[i]) {
1283                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1284                         break;
1285                     }
1286
1287                     s->input_picture[i]->f->pict_type =
1288                         s->rc_context.entry[pict_num].new_pict_type;
1289                 }
1290             }
1291
1292             if (s->avctx->b_frame_strategy == 0) {
1293                 b_frames = s->max_b_frames;
1294                 while (b_frames && !s->input_picture[b_frames])
1295                     b_frames--;
1296             } else if (s->avctx->b_frame_strategy == 1) {
1297                 for (i = 1; i < s->max_b_frames + 1; i++) {
1298                     if (s->input_picture[i] &&
1299                         s->input_picture[i]->b_frame_score == 0) {
1300                         s->input_picture[i]->b_frame_score =
1301                             get_intra_count(s,
1302                                             s->input_picture[i    ]->f->data[0],
1303                                             s->input_picture[i - 1]->f->data[0],
1304                                             s->linesize) + 1;
1305                     }
1306                 }
1307                 for (i = 0; i < s->max_b_frames + 1; i++) {
1308                     if (!s->input_picture[i] ||
1309                         s->input_picture[i]->b_frame_score - 1 >
1310                             s->mb_num / s->avctx->b_sensitivity)
1311                         break;
1312                 }
1313
1314                 b_frames = FFMAX(0, i - 1);
1315
1316                 /* reset scores */
1317                 for (i = 0; i < b_frames + 1; i++) {
1318                     s->input_picture[i]->b_frame_score = 0;
1319                 }
1320             } else if (s->avctx->b_frame_strategy == 2) {
1321                 b_frames = estimate_best_b_count(s);
1322             } else {
1323                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1324                 b_frames = 0;
1325             }
1326
1327             emms_c();
1328
1329             for (i = b_frames - 1; i >= 0; i--) {
1330                 int type = s->input_picture[i]->f->pict_type;
1331                 if (type && type != AV_PICTURE_TYPE_B)
1332                     b_frames = i;
1333             }
1334             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1335                 b_frames == s->max_b_frames) {
1336                 av_log(s->avctx, AV_LOG_ERROR,
1337                        "warning, too many b frames in a row\n");
1338             }
1339
1340             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1341                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1342                     s->gop_size > s->picture_in_gop_number) {
1343                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1344                 } else {
1345                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1346                         b_frames = 0;
1347                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1348                 }
1349             }
1350
1351             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1352                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1353                 b_frames--;
1354
1355             s->reordered_input_picture[0] = s->input_picture[b_frames];
1356             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1357                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1358             s->reordered_input_picture[0]->f->coded_picture_number =
1359                 s->coded_picture_number++;
1360             for (i = 0; i < b_frames; i++) {
1361                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1362                 s->reordered_input_picture[i + 1]->f->pict_type =
1363                     AV_PICTURE_TYPE_B;
1364                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1365                     s->coded_picture_number++;
1366             }
1367         }
1368     }
1369 no_output_pic:
1370     if (s->reordered_input_picture[0]) {
1371         s->reordered_input_picture[0]->reference =
1372            s->reordered_input_picture[0]->f->pict_type !=
1373                AV_PICTURE_TYPE_B ? 3 : 0;
1374
1375         ff_mpeg_unref_picture(s, &s->new_picture);
1376         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1377             return ret;
1378
1379         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1380             // input is a shared pix, so we can't modifiy it -> alloc a new
1381             // one & ensure that the shared one is reuseable
1382
1383             Picture *pic;
1384             int i = ff_find_unused_picture(s, 0);
1385             if (i < 0)
1386                 return i;
1387             pic = &s->picture[i];
1388
1389             pic->reference = s->reordered_input_picture[0]->reference;
1390             if (ff_alloc_picture(s, pic, 0) < 0) {
1391                 return -1;
1392             }
1393
1394             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1395             if (ret < 0)
1396                 return ret;
1397
1398             /* mark us unused / free shared pic */
1399             av_frame_unref(s->reordered_input_picture[0]->f);
1400             s->reordered_input_picture[0]->shared = 0;
1401
1402             s->current_picture_ptr = pic;
1403         } else {
1404             // input is not a shared pix -> reuse buffer for current_pix
1405             s->current_picture_ptr = s->reordered_input_picture[0];
1406             for (i = 0; i < 4; i++) {
1407                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1408             }
1409         }
1410         ff_mpeg_unref_picture(s, &s->current_picture);
1411         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1412                                        s->current_picture_ptr)) < 0)
1413             return ret;
1414
1415         s->picture_number = s->new_picture.f->display_picture_number;
1416     } else {
1417         ff_mpeg_unref_picture(s, &s->new_picture);
1418     }
1419     return 0;
1420 }
1421
1422 static void frame_end(MpegEncContext *s)
1423 {
1424     int i;
1425
1426     if (s->unrestricted_mv &&
1427         s->current_picture.reference &&
1428         !s->intra_only) {
1429         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1430         int hshift = desc->log2_chroma_w;
1431         int vshift = desc->log2_chroma_h;
1432         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1433                                 s->h_edge_pos, s->v_edge_pos,
1434                                 EDGE_WIDTH, EDGE_WIDTH,
1435                                 EDGE_TOP | EDGE_BOTTOM);
1436         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1437                                 s->h_edge_pos >> hshift,
1438                                 s->v_edge_pos >> vshift,
1439                                 EDGE_WIDTH >> hshift,
1440                                 EDGE_WIDTH >> vshift,
1441                                 EDGE_TOP | EDGE_BOTTOM);
1442         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1443                                 s->h_edge_pos >> hshift,
1444                                 s->v_edge_pos >> vshift,
1445                                 EDGE_WIDTH >> hshift,
1446                                 EDGE_WIDTH >> vshift,
1447                                 EDGE_TOP | EDGE_BOTTOM);
1448     }
1449
1450     emms_c();
1451
1452     s->last_pict_type                 = s->pict_type;
1453     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1454     if (s->pict_type!= AV_PICTURE_TYPE_B)
1455         s->last_non_b_pict_type = s->pict_type;
1456
1457     if (s->encoding) {
1458         /* release non-reference frames */
1459         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1460             if (!s->picture[i].reference)
1461                 ff_mpeg_unref_picture(s, &s->picture[i]);
1462         }
1463     }
1464
1465     s->avctx->coded_frame = s->current_picture_ptr->f;
1466
1467 }
1468
1469 static void update_noise_reduction(MpegEncContext *s)
1470 {
1471     int intra, i;
1472
1473     for (intra = 0; intra < 2; intra++) {
1474         if (s->dct_count[intra] > (1 << 16)) {
1475             for (i = 0; i < 64; i++) {
1476                 s->dct_error_sum[intra][i] >>= 1;
1477             }
1478             s->dct_count[intra] >>= 1;
1479         }
1480
1481         for (i = 0; i < 64; i++) {
1482             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1483                                        s->dct_count[intra] +
1484                                        s->dct_error_sum[intra][i] / 2) /
1485                                       (s->dct_error_sum[intra][i] + 1);
1486         }
1487     }
1488 }
1489
1490 static int frame_start(MpegEncContext *s)
1491 {
1492     int ret;
1493
1494     /* mark & release old frames */
1495     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1496         s->last_picture_ptr != s->next_picture_ptr &&
1497         s->last_picture_ptr->f->buf[0]) {
1498         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1499     }
1500
1501     s->current_picture_ptr->f->pict_type = s->pict_type;
1502     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1503
1504     ff_mpeg_unref_picture(s, &s->current_picture);
1505     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1506                                    s->current_picture_ptr)) < 0)
1507         return ret;
1508
1509     if (s->pict_type != AV_PICTURE_TYPE_B) {
1510         s->last_picture_ptr = s->next_picture_ptr;
1511         if (!s->droppable)
1512             s->next_picture_ptr = s->current_picture_ptr;
1513     }
1514
1515     if (s->last_picture_ptr) {
1516         ff_mpeg_unref_picture(s, &s->last_picture);
1517         if (s->last_picture_ptr->f->buf[0] &&
1518             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1519                                        s->last_picture_ptr)) < 0)
1520             return ret;
1521     }
1522     if (s->next_picture_ptr) {
1523         ff_mpeg_unref_picture(s, &s->next_picture);
1524         if (s->next_picture_ptr->f->buf[0] &&
1525             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1526                                        s->next_picture_ptr)) < 0)
1527             return ret;
1528     }
1529
1530     if (s->picture_structure!= PICT_FRAME) {
1531         int i;
1532         for (i = 0; i < 4; i++) {
1533             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1534                 s->current_picture.f->data[i] +=
1535                     s->current_picture.f->linesize[i];
1536             }
1537             s->current_picture.f->linesize[i] *= 2;
1538             s->last_picture.f->linesize[i]    *= 2;
1539             s->next_picture.f->linesize[i]    *= 2;
1540         }
1541     }
1542
1543     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1544         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1545         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1546     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1547         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1548         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1549     } else {
1550         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1551         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1552     }
1553
1554     if (s->dct_error_sum) {
1555         assert(s->avctx->noise_reduction && s->encoding);
1556         update_noise_reduction(s);
1557     }
1558
1559     return 0;
1560 }
1561
1562 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1563                           const AVFrame *pic_arg, int *got_packet)
1564 {
1565     MpegEncContext *s = avctx->priv_data;
1566     int i, stuffing_count, ret;
1567     int context_count = s->slice_context_count;
1568
1569     s->picture_in_gop_number++;
1570
1571     if (load_input_picture(s, pic_arg) < 0)
1572         return -1;
1573
1574     if (select_input_picture(s) < 0) {
1575         return -1;
1576     }
1577
1578     /* output? */
1579     if (s->new_picture.f->data[0]) {
1580         if (!pkt->data &&
1581             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1582             return ret;
1583         if (s->mb_info) {
1584             s->mb_info_ptr = av_packet_new_side_data(pkt,
1585                                  AV_PKT_DATA_H263_MB_INFO,
1586                                  s->mb_width*s->mb_height*12);
1587             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1588         }
1589
1590         for (i = 0; i < context_count; i++) {
1591             int start_y = s->thread_context[i]->start_mb_y;
1592             int   end_y = s->thread_context[i]->  end_mb_y;
1593             int h       = s->mb_height;
1594             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1595             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1596
1597             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1598         }
1599
1600         s->pict_type = s->new_picture.f->pict_type;
1601         //emms_c();
1602         ret = frame_start(s);
1603         if (ret < 0)
1604             return ret;
1605 vbv_retry:
1606         if (encode_picture(s, s->picture_number) < 0)
1607             return -1;
1608
1609         avctx->header_bits = s->header_bits;
1610         avctx->mv_bits     = s->mv_bits;
1611         avctx->misc_bits   = s->misc_bits;
1612         avctx->i_tex_bits  = s->i_tex_bits;
1613         avctx->p_tex_bits  = s->p_tex_bits;
1614         avctx->i_count     = s->i_count;
1615         // FIXME f/b_count in avctx
1616         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1617         avctx->skip_count  = s->skip_count;
1618
1619         frame_end(s);
1620
1621         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1622             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1623
1624         if (avctx->rc_buffer_size) {
1625             RateControlContext *rcc = &s->rc_context;
1626             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1627
1628             if (put_bits_count(&s->pb) > max_size &&
1629                 s->lambda < s->lmax) {
1630                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1631                                        (s->qscale + 1) / s->qscale);
1632                 if (s->adaptive_quant) {
1633                     int i;
1634                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1635                         s->lambda_table[i] =
1636                             FFMAX(s->lambda_table[i] + 1,
1637                                   s->lambda_table[i] * (s->qscale + 1) /
1638                                   s->qscale);
1639                 }
1640                 s->mb_skipped = 0;        // done in frame_start()
1641                 // done in encode_picture() so we must undo it
1642                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1643                     if (s->flipflop_rounding          ||
1644                         s->codec_id == AV_CODEC_ID_H263P ||
1645                         s->codec_id == AV_CODEC_ID_MPEG4)
1646                         s->no_rounding ^= 1;
1647                 }
1648                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1649                     s->time_base       = s->last_time_base;
1650                     s->last_non_b_time = s->time - s->pp_time;
1651                 }
1652                 for (i = 0; i < context_count; i++) {
1653                     PutBitContext *pb = &s->thread_context[i]->pb;
1654                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1655                 }
1656                 goto vbv_retry;
1657             }
1658
1659             assert(s->avctx->rc_max_rate);
1660         }
1661
1662         if (s->flags & CODEC_FLAG_PASS1)
1663             ff_write_pass1_stats(s);
1664
1665         for (i = 0; i < 4; i++) {
1666             s->current_picture_ptr->f->error[i] = s->current_picture.f->error[i];
1667             avctx->error[i] += s->current_picture_ptr->f->error[i];
1668         }
1669
1670         if (s->flags & CODEC_FLAG_PASS1)
1671             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1672                    avctx->i_tex_bits + avctx->p_tex_bits ==
1673                        put_bits_count(&s->pb));
1674         flush_put_bits(&s->pb);
1675         s->frame_bits  = put_bits_count(&s->pb);
1676
1677         stuffing_count = ff_vbv_update(s, s->frame_bits);
1678         if (stuffing_count) {
1679             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1680                     stuffing_count + 50) {
1681                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1682                 return -1;
1683             }
1684
1685             switch (s->codec_id) {
1686             case AV_CODEC_ID_MPEG1VIDEO:
1687             case AV_CODEC_ID_MPEG2VIDEO:
1688                 while (stuffing_count--) {
1689                     put_bits(&s->pb, 8, 0);
1690                 }
1691             break;
1692             case AV_CODEC_ID_MPEG4:
1693                 put_bits(&s->pb, 16, 0);
1694                 put_bits(&s->pb, 16, 0x1C3);
1695                 stuffing_count -= 4;
1696                 while (stuffing_count--) {
1697                     put_bits(&s->pb, 8, 0xFF);
1698                 }
1699             break;
1700             default:
1701                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1702             }
1703             flush_put_bits(&s->pb);
1704             s->frame_bits  = put_bits_count(&s->pb);
1705         }
1706
1707         /* update mpeg1/2 vbv_delay for CBR */
1708         if (s->avctx->rc_max_rate                          &&
1709             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1710             s->out_format == FMT_MPEG1                     &&
1711             90000LL * (avctx->rc_buffer_size - 1) <=
1712                 s->avctx->rc_max_rate * 0xFFFFLL) {
1713             int vbv_delay, min_delay;
1714             double inbits  = s->avctx->rc_max_rate *
1715                              av_q2d(s->avctx->time_base);
1716             int    minbits = s->frame_bits - 8 *
1717                              (s->vbv_delay_ptr - s->pb.buf - 1);
1718             double bits    = s->rc_context.buffer_index + minbits - inbits;
1719
1720             if (bits < 0)
1721                 av_log(s->avctx, AV_LOG_ERROR,
1722                        "Internal error, negative bits\n");
1723
1724             assert(s->repeat_first_field == 0);
1725
1726             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1727             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1728                         s->avctx->rc_max_rate;
1729
1730             vbv_delay = FFMAX(vbv_delay, min_delay);
1731
1732             assert(vbv_delay < 0xFFFF);
1733
1734             s->vbv_delay_ptr[0] &= 0xF8;
1735             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1736             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1737             s->vbv_delay_ptr[2] &= 0x07;
1738             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1739             avctx->vbv_delay     = vbv_delay * 300;
1740         }
1741         s->total_bits     += s->frame_bits;
1742         avctx->frame_bits  = s->frame_bits;
1743
1744         pkt->pts = s->current_picture.f->pts;
1745         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1746             if (!s->current_picture.f->coded_picture_number)
1747                 pkt->dts = pkt->pts - s->dts_delta;
1748             else
1749                 pkt->dts = s->reordered_pts;
1750             s->reordered_pts = pkt->pts;
1751         } else
1752             pkt->dts = pkt->pts;
1753         if (s->current_picture.f->key_frame)
1754             pkt->flags |= AV_PKT_FLAG_KEY;
1755         if (s->mb_info)
1756             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1757     } else {
1758         s->frame_bits = 0;
1759     }
1760     assert((s->frame_bits & 7) == 0);
1761
1762     pkt->size = s->frame_bits / 8;
1763     *got_packet = !!pkt->size;
1764     return 0;
1765 }
1766
1767 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1768                                                 int n, int threshold)
1769 {
1770     static const char tab[64] = {
1771         3, 2, 2, 1, 1, 1, 1, 1,
1772         1, 1, 1, 1, 1, 1, 1, 1,
1773         1, 1, 1, 1, 1, 1, 1, 1,
1774         0, 0, 0, 0, 0, 0, 0, 0,
1775         0, 0, 0, 0, 0, 0, 0, 0,
1776         0, 0, 0, 0, 0, 0, 0, 0,
1777         0, 0, 0, 0, 0, 0, 0, 0,
1778         0, 0, 0, 0, 0, 0, 0, 0
1779     };
1780     int score = 0;
1781     int run = 0;
1782     int i;
1783     int16_t *block = s->block[n];
1784     const int last_index = s->block_last_index[n];
1785     int skip_dc;
1786
1787     if (threshold < 0) {
1788         skip_dc = 0;
1789         threshold = -threshold;
1790     } else
1791         skip_dc = 1;
1792
1793     /* Are all we could set to zero already zero? */
1794     if (last_index <= skip_dc - 1)
1795         return;
1796
1797     for (i = 0; i <= last_index; i++) {
1798         const int j = s->intra_scantable.permutated[i];
1799         const int level = FFABS(block[j]);
1800         if (level == 1) {
1801             if (skip_dc && i == 0)
1802                 continue;
1803             score += tab[run];
1804             run = 0;
1805         } else if (level > 1) {
1806             return;
1807         } else {
1808             run++;
1809         }
1810     }
1811     if (score >= threshold)
1812         return;
1813     for (i = skip_dc; i <= last_index; i++) {
1814         const int j = s->intra_scantable.permutated[i];
1815         block[j] = 0;
1816     }
1817     if (block[0])
1818         s->block_last_index[n] = 0;
1819     else
1820         s->block_last_index[n] = -1;
1821 }
1822
1823 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1824                                int last_index)
1825 {
1826     int i;
1827     const int maxlevel = s->max_qcoeff;
1828     const int minlevel = s->min_qcoeff;
1829     int overflow = 0;
1830
1831     if (s->mb_intra) {
1832         i = 1; // skip clipping of intra dc
1833     } else
1834         i = 0;
1835
1836     for (; i <= last_index; i++) {
1837         const int j = s->intra_scantable.permutated[i];
1838         int level = block[j];
1839
1840         if (level > maxlevel) {
1841             level = maxlevel;
1842             overflow++;
1843         } else if (level < minlevel) {
1844             level = minlevel;
1845             overflow++;
1846         }
1847
1848         block[j] = level;
1849     }
1850
1851     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1852         av_log(s->avctx, AV_LOG_INFO,
1853                "warning, clipping %d dct coefficients to %d..%d\n",
1854                overflow, minlevel, maxlevel);
1855 }
1856
1857 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1858 {
1859     int x, y;
1860     // FIXME optimize
1861     for (y = 0; y < 8; y++) {
1862         for (x = 0; x < 8; x++) {
1863             int x2, y2;
1864             int sum = 0;
1865             int sqr = 0;
1866             int count = 0;
1867
1868             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1869                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1870                     int v = ptr[x2 + y2 * stride];
1871                     sum += v;
1872                     sqr += v * v;
1873                     count++;
1874                 }
1875             }
1876             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1877         }
1878     }
1879 }
1880
1881 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1882                                                 int motion_x, int motion_y,
1883                                                 int mb_block_height,
1884                                                 int mb_block_count)
1885 {
1886     int16_t weight[8][64];
1887     int16_t orig[8][64];
1888     const int mb_x = s->mb_x;
1889     const int mb_y = s->mb_y;
1890     int i;
1891     int skip_dct[8];
1892     int dct_offset = s->linesize * 8; // default for progressive frames
1893     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1894     ptrdiff_t wrap_y, wrap_c;
1895
1896     for (i = 0; i < mb_block_count; i++)
1897         skip_dct[i] = s->skipdct;
1898
1899     if (s->adaptive_quant) {
1900         const int last_qp = s->qscale;
1901         const int mb_xy = mb_x + mb_y * s->mb_stride;
1902
1903         s->lambda = s->lambda_table[mb_xy];
1904         update_qscale(s);
1905
1906         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1907             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1908             s->dquant = s->qscale - last_qp;
1909
1910             if (s->out_format == FMT_H263) {
1911                 s->dquant = av_clip(s->dquant, -2, 2);
1912
1913                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1914                     if (!s->mb_intra) {
1915                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1916                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1917                                 s->dquant = 0;
1918                         }
1919                         if (s->mv_type == MV_TYPE_8X8)
1920                             s->dquant = 0;
1921                     }
1922                 }
1923             }
1924         }
1925         ff_set_qscale(s, last_qp + s->dquant);
1926     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1927         ff_set_qscale(s, s->qscale + s->dquant);
1928
1929     wrap_y = s->linesize;
1930     wrap_c = s->uvlinesize;
1931     ptr_y  = s->new_picture.f->data[0] +
1932              (mb_y * 16 * wrap_y)              + mb_x * 16;
1933     ptr_cb = s->new_picture.f->data[1] +
1934              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1935     ptr_cr = s->new_picture.f->data[2] +
1936              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1937
1938     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1939         uint8_t *ebuf = s->edge_emu_buffer + 32;
1940         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
1941                                  wrap_y, wrap_y,
1942                                  16, 16, mb_x * 16, mb_y * 16,
1943                                  s->width, s->height);
1944         ptr_y = ebuf;
1945         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
1946                                  wrap_c, wrap_c,
1947                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1948                                  s->width >> 1, s->height >> 1);
1949         ptr_cb = ebuf + 18 * wrap_y;
1950         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
1951                                  wrap_c, wrap_c,
1952                                  8, mb_block_height, mb_x * 8, mb_y * 8,
1953                                  s->width >> 1, s->height >> 1);
1954         ptr_cr = ebuf + 18 * wrap_y + 8;
1955     }
1956
1957     if (s->mb_intra) {
1958         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1959             int progressive_score, interlaced_score;
1960
1961             s->interlaced_dct = 0;
1962             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
1963                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1964                                                      NULL, wrap_y, 8) - 400;
1965
1966             if (progressive_score > 0) {
1967                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
1968                                                         NULL, wrap_y * 2, 8) +
1969                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
1970                                                         NULL, wrap_y * 2, 8);
1971                 if (progressive_score > interlaced_score) {
1972                     s->interlaced_dct = 1;
1973
1974                     dct_offset = wrap_y;
1975                     wrap_y <<= 1;
1976                     if (s->chroma_format == CHROMA_422)
1977                         wrap_c <<= 1;
1978                 }
1979             }
1980         }
1981
1982         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
1983         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
1984         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
1985         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
1986
1987         if (s->flags & CODEC_FLAG_GRAY) {
1988             skip_dct[4] = 1;
1989             skip_dct[5] = 1;
1990         } else {
1991             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1992             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1993             if (!s->chroma_y_shift) { /* 422 */
1994                 s->pdsp.get_pixels(s->block[6],
1995                                    ptr_cb + (dct_offset >> 1), wrap_c);
1996                 s->pdsp.get_pixels(s->block[7],
1997                                    ptr_cr + (dct_offset >> 1), wrap_c);
1998             }
1999         }
2000     } else {
2001         op_pixels_func (*op_pix)[4];
2002         qpel_mc_func (*op_qpix)[16];
2003         uint8_t *dest_y, *dest_cb, *dest_cr;
2004
2005         dest_y  = s->dest[0];
2006         dest_cb = s->dest[1];
2007         dest_cr = s->dest[2];
2008
2009         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2010             op_pix  = s->hdsp.put_pixels_tab;
2011             op_qpix = s->qdsp.put_qpel_pixels_tab;
2012         } else {
2013             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2014             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2015         }
2016
2017         if (s->mv_dir & MV_DIR_FORWARD) {
2018             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2019                           s->last_picture.f->data,
2020                           op_pix, op_qpix);
2021             op_pix  = s->hdsp.avg_pixels_tab;
2022             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2023         }
2024         if (s->mv_dir & MV_DIR_BACKWARD) {
2025             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2026                           s->next_picture.f->data,
2027                           op_pix, op_qpix);
2028         }
2029
2030         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2031             int progressive_score, interlaced_score;
2032
2033             s->interlaced_dct = 0;
2034             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2035                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2036                                                      ptr_y + wrap_y * 8,
2037                                                      wrap_y, 8) - 400;
2038
2039             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2040                 progressive_score -= 400;
2041
2042             if (progressive_score > 0) {
2043                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2044                                                         wrap_y * 2, 8) +
2045                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2046                                                         ptr_y + wrap_y,
2047                                                         wrap_y * 2, 8);
2048
2049                 if (progressive_score > interlaced_score) {
2050                     s->interlaced_dct = 1;
2051
2052                     dct_offset = wrap_y;
2053                     wrap_y <<= 1;
2054                     if (s->chroma_format == CHROMA_422)
2055                         wrap_c <<= 1;
2056                 }
2057             }
2058         }
2059
2060         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2061         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2062         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2063                             dest_y + dct_offset, wrap_y);
2064         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2065                             dest_y + dct_offset + 8, wrap_y);
2066
2067         if (s->flags & CODEC_FLAG_GRAY) {
2068             skip_dct[4] = 1;
2069             skip_dct[5] = 1;
2070         } else {
2071             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2072             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2073             if (!s->chroma_y_shift) { /* 422 */
2074                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2075                                     dest_cb + (dct_offset >> 1), wrap_c);
2076                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2077                                     dest_cr + (dct_offset >> 1), wrap_c);
2078             }
2079         }
2080         /* pre quantization */
2081         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2082                 2 * s->qscale * s->qscale) {
2083             // FIXME optimize
2084             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2085                 skip_dct[0] = 1;
2086             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2087                 skip_dct[1] = 1;
2088             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2089                                wrap_y, 8) < 20 * s->qscale)
2090                 skip_dct[2] = 1;
2091             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2092                                wrap_y, 8) < 20 * s->qscale)
2093                 skip_dct[3] = 1;
2094             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2095                 skip_dct[4] = 1;
2096             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2097                 skip_dct[5] = 1;
2098             if (!s->chroma_y_shift) { /* 422 */
2099                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2100                                    dest_cb + (dct_offset >> 1),
2101                                    wrap_c, 8) < 20 * s->qscale)
2102                     skip_dct[6] = 1;
2103                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2104                                    dest_cr + (dct_offset >> 1),
2105                                    wrap_c, 8) < 20 * s->qscale)
2106                     skip_dct[7] = 1;
2107             }
2108         }
2109     }
2110
2111     if (s->quantizer_noise_shaping) {
2112         if (!skip_dct[0])
2113             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2114         if (!skip_dct[1])
2115             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2116         if (!skip_dct[2])
2117             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2118         if (!skip_dct[3])
2119             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2120         if (!skip_dct[4])
2121             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2122         if (!skip_dct[5])
2123             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2124         if (!s->chroma_y_shift) { /* 422 */
2125             if (!skip_dct[6])
2126                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2127                                   wrap_c);
2128             if (!skip_dct[7])
2129                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2130                                   wrap_c);
2131         }
2132         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2133     }
2134
2135     /* DCT & quantize */
2136     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2137     {
2138         for (i = 0; i < mb_block_count; i++) {
2139             if (!skip_dct[i]) {
2140                 int overflow;
2141                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2142                 // FIXME we could decide to change to quantizer instead of
2143                 // clipping
2144                 // JS: I don't think that would be a good idea it could lower
2145                 //     quality instead of improve it. Just INTRADC clipping
2146                 //     deserves changes in quantizer
2147                 if (overflow)
2148                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2149             } else
2150                 s->block_last_index[i] = -1;
2151         }
2152         if (s->quantizer_noise_shaping) {
2153             for (i = 0; i < mb_block_count; i++) {
2154                 if (!skip_dct[i]) {
2155                     s->block_last_index[i] =
2156                         dct_quantize_refine(s, s->block[i], weight[i],
2157                                             orig[i], i, s->qscale);
2158                 }
2159             }
2160         }
2161
2162         if (s->luma_elim_threshold && !s->mb_intra)
2163             for (i = 0; i < 4; i++)
2164                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2165         if (s->chroma_elim_threshold && !s->mb_intra)
2166             for (i = 4; i < mb_block_count; i++)
2167                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2168
2169         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2170             for (i = 0; i < mb_block_count; i++) {
2171                 if (s->block_last_index[i] == -1)
2172                     s->coded_score[i] = INT_MAX / 256;
2173             }
2174         }
2175     }
2176
2177     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2178         s->block_last_index[4] =
2179         s->block_last_index[5] = 0;
2180         s->block[4][0] =
2181         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2182     }
2183
2184     // non c quantize code returns incorrect block_last_index FIXME
2185     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2186         for (i = 0; i < mb_block_count; i++) {
2187             int j;
2188             if (s->block_last_index[i] > 0) {
2189                 for (j = 63; j > 0; j--) {
2190                     if (s->block[i][s->intra_scantable.permutated[j]])
2191                         break;
2192                 }
2193                 s->block_last_index[i] = j;
2194             }
2195         }
2196     }
2197
2198     /* huffman encode */
2199     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2200     case AV_CODEC_ID_MPEG1VIDEO:
2201     case AV_CODEC_ID_MPEG2VIDEO:
2202         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2203             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2204         break;
2205     case AV_CODEC_ID_MPEG4:
2206         if (CONFIG_MPEG4_ENCODER)
2207             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2208         break;
2209     case AV_CODEC_ID_MSMPEG4V2:
2210     case AV_CODEC_ID_MSMPEG4V3:
2211     case AV_CODEC_ID_WMV1:
2212         if (CONFIG_MSMPEG4_ENCODER)
2213             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2214         break;
2215     case AV_CODEC_ID_WMV2:
2216         if (CONFIG_WMV2_ENCODER)
2217             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2218         break;
2219     case AV_CODEC_ID_H261:
2220         if (CONFIG_H261_ENCODER)
2221             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2222         break;
2223     case AV_CODEC_ID_H263:
2224     case AV_CODEC_ID_H263P:
2225     case AV_CODEC_ID_FLV1:
2226     case AV_CODEC_ID_RV10:
2227     case AV_CODEC_ID_RV20:
2228         if (CONFIG_H263_ENCODER)
2229             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2230         break;
2231     case AV_CODEC_ID_MJPEG:
2232         if (CONFIG_MJPEG_ENCODER)
2233             ff_mjpeg_encode_mb(s, s->block);
2234         break;
2235     default:
2236         assert(0);
2237     }
2238 }
2239
2240 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2241 {
2242     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2243     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2244 }
2245
2246 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2247     int i;
2248
2249     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2250
2251     /* mpeg1 */
2252     d->mb_skip_run= s->mb_skip_run;
2253     for(i=0; i<3; i++)
2254         d->last_dc[i] = s->last_dc[i];
2255
2256     /* statistics */
2257     d->mv_bits= s->mv_bits;
2258     d->i_tex_bits= s->i_tex_bits;
2259     d->p_tex_bits= s->p_tex_bits;
2260     d->i_count= s->i_count;
2261     d->f_count= s->f_count;
2262     d->b_count= s->b_count;
2263     d->skip_count= s->skip_count;
2264     d->misc_bits= s->misc_bits;
2265     d->last_bits= 0;
2266
2267     d->mb_skipped= 0;
2268     d->qscale= s->qscale;
2269     d->dquant= s->dquant;
2270
2271     d->esc3_level_length= s->esc3_level_length;
2272 }
2273
2274 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2275     int i;
2276
2277     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2278     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2279
2280     /* mpeg1 */
2281     d->mb_skip_run= s->mb_skip_run;
2282     for(i=0; i<3; i++)
2283         d->last_dc[i] = s->last_dc[i];
2284
2285     /* statistics */
2286     d->mv_bits= s->mv_bits;
2287     d->i_tex_bits= s->i_tex_bits;
2288     d->p_tex_bits= s->p_tex_bits;
2289     d->i_count= s->i_count;
2290     d->f_count= s->f_count;
2291     d->b_count= s->b_count;
2292     d->skip_count= s->skip_count;
2293     d->misc_bits= s->misc_bits;
2294
2295     d->mb_intra= s->mb_intra;
2296     d->mb_skipped= s->mb_skipped;
2297     d->mv_type= s->mv_type;
2298     d->mv_dir= s->mv_dir;
2299     d->pb= s->pb;
2300     if(s->data_partitioning){
2301         d->pb2= s->pb2;
2302         d->tex_pb= s->tex_pb;
2303     }
2304     d->block= s->block;
2305     for(i=0; i<8; i++)
2306         d->block_last_index[i]= s->block_last_index[i];
2307     d->interlaced_dct= s->interlaced_dct;
2308     d->qscale= s->qscale;
2309
2310     d->esc3_level_length= s->esc3_level_length;
2311 }
2312
2313 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2314                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2315                            int *dmin, int *next_block, int motion_x, int motion_y)
2316 {
2317     int score;
2318     uint8_t *dest_backup[3];
2319
2320     copy_context_before_encode(s, backup, type);
2321
2322     s->block= s->blocks[*next_block];
2323     s->pb= pb[*next_block];
2324     if(s->data_partitioning){
2325         s->pb2   = pb2   [*next_block];
2326         s->tex_pb= tex_pb[*next_block];
2327     }
2328
2329     if(*next_block){
2330         memcpy(dest_backup, s->dest, sizeof(s->dest));
2331         s->dest[0] = s->rd_scratchpad;
2332         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2333         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2334         assert(s->linesize >= 32); //FIXME
2335     }
2336
2337     encode_mb(s, motion_x, motion_y);
2338
2339     score= put_bits_count(&s->pb);
2340     if(s->data_partitioning){
2341         score+= put_bits_count(&s->pb2);
2342         score+= put_bits_count(&s->tex_pb);
2343     }
2344
2345     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2346         ff_mpv_decode_mb(s, s->block);
2347
2348         score *= s->lambda2;
2349         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2350     }
2351
2352     if(*next_block){
2353         memcpy(s->dest, dest_backup, sizeof(s->dest));
2354     }
2355
2356     if(score<*dmin){
2357         *dmin= score;
2358         *next_block^=1;
2359
2360         copy_context_after_encode(best, s, type);
2361     }
2362 }
2363
2364 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2365     uint32_t *sq = ff_square_tab + 256;
2366     int acc=0;
2367     int x,y;
2368
2369     if(w==16 && h==16)
2370         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2371     else if(w==8 && h==8)
2372         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2373
2374     for(y=0; y<h; y++){
2375         for(x=0; x<w; x++){
2376             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2377         }
2378     }
2379
2380     assert(acc>=0);
2381
2382     return acc;
2383 }
2384
2385 static int sse_mb(MpegEncContext *s){
2386     int w= 16;
2387     int h= 16;
2388
2389     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2390     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2391
2392     if(w==16 && h==16)
2393       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2394         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2395                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2396                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2397       }else{
2398         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2399                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2400                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2401       }
2402     else
2403         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2404                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2405                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2406 }
2407
2408 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2409     MpegEncContext *s= *(void**)arg;
2410
2411
2412     s->me.pre_pass=1;
2413     s->me.dia_size= s->avctx->pre_dia_size;
2414     s->first_slice_line=1;
2415     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2416         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2417             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2418         }
2419         s->first_slice_line=0;
2420     }
2421
2422     s->me.pre_pass=0;
2423
2424     return 0;
2425 }
2426
2427 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2428     MpegEncContext *s= *(void**)arg;
2429
2430     s->me.dia_size= s->avctx->dia_size;
2431     s->first_slice_line=1;
2432     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2433         s->mb_x=0; //for block init below
2434         ff_init_block_index(s);
2435         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2436             s->block_index[0]+=2;
2437             s->block_index[1]+=2;
2438             s->block_index[2]+=2;
2439             s->block_index[3]+=2;
2440
2441             /* compute motion vector & mb_type and store in context */
2442             if(s->pict_type==AV_PICTURE_TYPE_B)
2443                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2444             else
2445                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2446         }
2447         s->first_slice_line=0;
2448     }
2449     return 0;
2450 }
2451
2452 static int mb_var_thread(AVCodecContext *c, void *arg){
2453     MpegEncContext *s= *(void**)arg;
2454     int mb_x, mb_y;
2455
2456     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2457         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2458             int xx = mb_x * 16;
2459             int yy = mb_y * 16;
2460             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2461             int varc;
2462             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2463
2464             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2465                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2466
2467             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2468             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2469             s->me.mb_var_sum_temp    += varc;
2470         }
2471     }
2472     return 0;
2473 }
2474
2475 static void write_slice_end(MpegEncContext *s){
2476     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2477         if(s->partitioned_frame){
2478             ff_mpeg4_merge_partitions(s);
2479         }
2480
2481         ff_mpeg4_stuffing(&s->pb);
2482     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2483         ff_mjpeg_encode_stuffing(&s->pb);
2484     }
2485
2486     avpriv_align_put_bits(&s->pb);
2487     flush_put_bits(&s->pb);
2488
2489     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2490         s->misc_bits+= get_bits_diff(s);
2491 }
2492
2493 static void write_mb_info(MpegEncContext *s)
2494 {
2495     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2496     int offset = put_bits_count(&s->pb);
2497     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2498     int gobn = s->mb_y / s->gob_index;
2499     int pred_x, pred_y;
2500     if (CONFIG_H263_ENCODER)
2501         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2502     bytestream_put_le32(&ptr, offset);
2503     bytestream_put_byte(&ptr, s->qscale);
2504     bytestream_put_byte(&ptr, gobn);
2505     bytestream_put_le16(&ptr, mba);
2506     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2507     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2508     /* 4MV not implemented */
2509     bytestream_put_byte(&ptr, 0); /* hmv2 */
2510     bytestream_put_byte(&ptr, 0); /* vmv2 */
2511 }
2512
2513 static void update_mb_info(MpegEncContext *s, int startcode)
2514 {
2515     if (!s->mb_info)
2516         return;
2517     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2518         s->mb_info_size += 12;
2519         s->prev_mb_info = s->last_mb_info;
2520     }
2521     if (startcode) {
2522         s->prev_mb_info = put_bits_count(&s->pb)/8;
2523         /* This might have incremented mb_info_size above, and we return without
2524          * actually writing any info into that slot yet. But in that case,
2525          * this will be called again at the start of the after writing the
2526          * start code, actually writing the mb info. */
2527         return;
2528     }
2529
2530     s->last_mb_info = put_bits_count(&s->pb)/8;
2531     if (!s->mb_info_size)
2532         s->mb_info_size += 12;
2533     write_mb_info(s);
2534 }
2535
2536 static int encode_thread(AVCodecContext *c, void *arg){
2537     MpegEncContext *s= *(void**)arg;
2538     int mb_x, mb_y, pdif = 0;
2539     int chr_h= 16>>s->chroma_y_shift;
2540     int i, j;
2541     MpegEncContext best_s, backup_s;
2542     uint8_t bit_buf[2][MAX_MB_BYTES];
2543     uint8_t bit_buf2[2][MAX_MB_BYTES];
2544     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2545     PutBitContext pb[2], pb2[2], tex_pb[2];
2546
2547     for(i=0; i<2; i++){
2548         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2549         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2550         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2551     }
2552
2553     s->last_bits= put_bits_count(&s->pb);
2554     s->mv_bits=0;
2555     s->misc_bits=0;
2556     s->i_tex_bits=0;
2557     s->p_tex_bits=0;
2558     s->i_count=0;
2559     s->f_count=0;
2560     s->b_count=0;
2561     s->skip_count=0;
2562
2563     for(i=0; i<3; i++){
2564         /* init last dc values */
2565         /* note: quant matrix value (8) is implied here */
2566         s->last_dc[i] = 128 << s->intra_dc_precision;
2567
2568         s->current_picture.f->error[i] = 0;
2569     }
2570     s->mb_skip_run = 0;
2571     memset(s->last_mv, 0, sizeof(s->last_mv));
2572
2573     s->last_mv_dir = 0;
2574
2575     switch(s->codec_id){
2576     case AV_CODEC_ID_H263:
2577     case AV_CODEC_ID_H263P:
2578     case AV_CODEC_ID_FLV1:
2579         if (CONFIG_H263_ENCODER)
2580             s->gob_index = ff_h263_get_gob_height(s);
2581         break;
2582     case AV_CODEC_ID_MPEG4:
2583         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2584             ff_mpeg4_init_partitions(s);
2585         break;
2586     }
2587
2588     s->resync_mb_x=0;
2589     s->resync_mb_y=0;
2590     s->first_slice_line = 1;
2591     s->ptr_lastgob = s->pb.buf;
2592     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2593         s->mb_x=0;
2594         s->mb_y= mb_y;
2595
2596         ff_set_qscale(s, s->qscale);
2597         ff_init_block_index(s);
2598
2599         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2600             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2601             int mb_type= s->mb_type[xy];
2602 //            int d;
2603             int dmin= INT_MAX;
2604             int dir;
2605
2606             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2607                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2608                 return -1;
2609             }
2610             if(s->data_partitioning){
2611                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2612                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2613                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2614                     return -1;
2615                 }
2616             }
2617
2618             s->mb_x = mb_x;
2619             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2620             ff_update_block_index(s);
2621
2622             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2623                 ff_h261_reorder_mb_index(s);
2624                 xy= s->mb_y*s->mb_stride + s->mb_x;
2625                 mb_type= s->mb_type[xy];
2626             }
2627
2628             /* write gob / video packet header  */
2629             if(s->rtp_mode){
2630                 int current_packet_size, is_gob_start;
2631
2632                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2633
2634                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2635
2636                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2637
2638                 switch(s->codec_id){
2639                 case AV_CODEC_ID_H263:
2640                 case AV_CODEC_ID_H263P:
2641                     if(!s->h263_slice_structured)
2642                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2643                     break;
2644                 case AV_CODEC_ID_MPEG2VIDEO:
2645                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2646                 case AV_CODEC_ID_MPEG1VIDEO:
2647                     if(s->mb_skip_run) is_gob_start=0;
2648                     break;
2649                 }
2650
2651                 if(is_gob_start){
2652                     if(s->start_mb_y != mb_y || mb_x!=0){
2653                         write_slice_end(s);
2654
2655                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2656                             ff_mpeg4_init_partitions(s);
2657                         }
2658                     }
2659
2660                     assert((put_bits_count(&s->pb)&7) == 0);
2661                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2662
2663                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2664                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2665                         int d = 100 / s->error_rate;
2666                         if(r % d == 0){
2667                             current_packet_size=0;
2668                             s->pb.buf_ptr= s->ptr_lastgob;
2669                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2670                         }
2671                     }
2672
2673                     if (s->avctx->rtp_callback){
2674                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2675                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2676                     }
2677                     update_mb_info(s, 1);
2678
2679                     switch(s->codec_id){
2680                     case AV_CODEC_ID_MPEG4:
2681                         if (CONFIG_MPEG4_ENCODER) {
2682                             ff_mpeg4_encode_video_packet_header(s);
2683                             ff_mpeg4_clean_buffers(s);
2684                         }
2685                     break;
2686                     case AV_CODEC_ID_MPEG1VIDEO:
2687                     case AV_CODEC_ID_MPEG2VIDEO:
2688                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2689                             ff_mpeg1_encode_slice_header(s);
2690                             ff_mpeg1_clean_buffers(s);
2691                         }
2692                     break;
2693                     case AV_CODEC_ID_H263:
2694                     case AV_CODEC_ID_H263P:
2695                         if (CONFIG_H263_ENCODER)
2696                             ff_h263_encode_gob_header(s, mb_y);
2697                     break;
2698                     }
2699
2700                     if(s->flags&CODEC_FLAG_PASS1){
2701                         int bits= put_bits_count(&s->pb);
2702                         s->misc_bits+= bits - s->last_bits;
2703                         s->last_bits= bits;
2704                     }
2705
2706                     s->ptr_lastgob += current_packet_size;
2707                     s->first_slice_line=1;
2708                     s->resync_mb_x=mb_x;
2709                     s->resync_mb_y=mb_y;
2710                 }
2711             }
2712
2713             if(  (s->resync_mb_x   == s->mb_x)
2714                && s->resync_mb_y+1 == s->mb_y){
2715                 s->first_slice_line=0;
2716             }
2717
2718             s->mb_skipped=0;
2719             s->dquant=0; //only for QP_RD
2720
2721             update_mb_info(s, 0);
2722
2723             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2724                 int next_block=0;
2725                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2726
2727                 copy_context_before_encode(&backup_s, s, -1);
2728                 backup_s.pb= s->pb;
2729                 best_s.data_partitioning= s->data_partitioning;
2730                 best_s.partitioned_frame= s->partitioned_frame;
2731                 if(s->data_partitioning){
2732                     backup_s.pb2= s->pb2;
2733                     backup_s.tex_pb= s->tex_pb;
2734                 }
2735
2736                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2737                     s->mv_dir = MV_DIR_FORWARD;
2738                     s->mv_type = MV_TYPE_16X16;
2739                     s->mb_intra= 0;
2740                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2741                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2742                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2743                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2744                 }
2745                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2746                     s->mv_dir = MV_DIR_FORWARD;
2747                     s->mv_type = MV_TYPE_FIELD;
2748                     s->mb_intra= 0;
2749                     for(i=0; i<2; i++){
2750                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2751                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2752                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2753                     }
2754                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2755                                  &dmin, &next_block, 0, 0);
2756                 }
2757                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2758                     s->mv_dir = MV_DIR_FORWARD;
2759                     s->mv_type = MV_TYPE_16X16;
2760                     s->mb_intra= 0;
2761                     s->mv[0][0][0] = 0;
2762                     s->mv[0][0][1] = 0;
2763                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2764                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2765                 }
2766                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2767                     s->mv_dir = MV_DIR_FORWARD;
2768                     s->mv_type = MV_TYPE_8X8;
2769                     s->mb_intra= 0;
2770                     for(i=0; i<4; i++){
2771                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2772                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2773                     }
2774                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2775                                  &dmin, &next_block, 0, 0);
2776                 }
2777                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2778                     s->mv_dir = MV_DIR_FORWARD;
2779                     s->mv_type = MV_TYPE_16X16;
2780                     s->mb_intra= 0;
2781                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2782                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2783                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2784                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2785                 }
2786                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2787                     s->mv_dir = MV_DIR_BACKWARD;
2788                     s->mv_type = MV_TYPE_16X16;
2789                     s->mb_intra= 0;
2790                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2791                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2792                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2793                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2794                 }
2795                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2796                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2797                     s->mv_type = MV_TYPE_16X16;
2798                     s->mb_intra= 0;
2799                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2800                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2801                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2802                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2803                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2804                                  &dmin, &next_block, 0, 0);
2805                 }
2806                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2807                     s->mv_dir = MV_DIR_FORWARD;
2808                     s->mv_type = MV_TYPE_FIELD;
2809                     s->mb_intra= 0;
2810                     for(i=0; i<2; i++){
2811                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2812                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2813                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2814                     }
2815                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2816                                  &dmin, &next_block, 0, 0);
2817                 }
2818                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2819                     s->mv_dir = MV_DIR_BACKWARD;
2820                     s->mv_type = MV_TYPE_FIELD;
2821                     s->mb_intra= 0;
2822                     for(i=0; i<2; i++){
2823                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2824                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2825                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2826                     }
2827                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2828                                  &dmin, &next_block, 0, 0);
2829                 }
2830                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2831                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2832                     s->mv_type = MV_TYPE_FIELD;
2833                     s->mb_intra= 0;
2834                     for(dir=0; dir<2; dir++){
2835                         for(i=0; i<2; i++){
2836                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2837                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2838                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2839                         }
2840                     }
2841                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2842                                  &dmin, &next_block, 0, 0);
2843                 }
2844                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2845                     s->mv_dir = 0;
2846                     s->mv_type = MV_TYPE_16X16;
2847                     s->mb_intra= 1;
2848                     s->mv[0][0][0] = 0;
2849                     s->mv[0][0][1] = 0;
2850                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2851                                  &dmin, &next_block, 0, 0);
2852                     if(s->h263_pred || s->h263_aic){
2853                         if(best_s.mb_intra)
2854                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2855                         else
2856                             ff_clean_intra_table_entries(s); //old mode?
2857                     }
2858                 }
2859
2860                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2861                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2862                         const int last_qp= backup_s.qscale;
2863                         int qpi, qp, dc[6];
2864                         int16_t ac[6][16];
2865                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2866                         static const int dquant_tab[4]={-1,1,-2,2};
2867
2868                         assert(backup_s.dquant == 0);
2869
2870                         //FIXME intra
2871                         s->mv_dir= best_s.mv_dir;
2872                         s->mv_type = MV_TYPE_16X16;
2873                         s->mb_intra= best_s.mb_intra;
2874                         s->mv[0][0][0] = best_s.mv[0][0][0];
2875                         s->mv[0][0][1] = best_s.mv[0][0][1];
2876                         s->mv[1][0][0] = best_s.mv[1][0][0];
2877                         s->mv[1][0][1] = best_s.mv[1][0][1];
2878
2879                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2880                         for(; qpi<4; qpi++){
2881                             int dquant= dquant_tab[qpi];
2882                             qp= last_qp + dquant;
2883                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2884                                 continue;
2885                             backup_s.dquant= dquant;
2886                             if(s->mb_intra && s->dc_val[0]){
2887                                 for(i=0; i<6; i++){
2888                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2889                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2890                                 }
2891                             }
2892
2893                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2894                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2895                             if(best_s.qscale != qp){
2896                                 if(s->mb_intra && s->dc_val[0]){
2897                                     for(i=0; i<6; i++){
2898                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2899                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2900                                     }
2901                                 }
2902                             }
2903                         }
2904                     }
2905                 }
2906                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2907                     int mx= s->b_direct_mv_table[xy][0];
2908                     int my= s->b_direct_mv_table[xy][1];
2909
2910                     backup_s.dquant = 0;
2911                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2912                     s->mb_intra= 0;
2913                     ff_mpeg4_set_direct_mv(s, mx, my);
2914                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2915                                  &dmin, &next_block, mx, my);
2916                 }
2917                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2918                     backup_s.dquant = 0;
2919                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2920                     s->mb_intra= 0;
2921                     ff_mpeg4_set_direct_mv(s, 0, 0);
2922                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2923                                  &dmin, &next_block, 0, 0);
2924                 }
2925                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2926                     int coded=0;
2927                     for(i=0; i<6; i++)
2928                         coded |= s->block_last_index[i];
2929                     if(coded){
2930                         int mx,my;
2931                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2932                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2933                             mx=my=0; //FIXME find the one we actually used
2934                             ff_mpeg4_set_direct_mv(s, mx, my);
2935                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2936                             mx= s->mv[1][0][0];
2937                             my= s->mv[1][0][1];
2938                         }else{
2939                             mx= s->mv[0][0][0];
2940                             my= s->mv[0][0][1];
2941                         }
2942
2943                         s->mv_dir= best_s.mv_dir;
2944                         s->mv_type = best_s.mv_type;
2945                         s->mb_intra= 0;
2946 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2947                         s->mv[0][0][1] = best_s.mv[0][0][1];
2948                         s->mv[1][0][0] = best_s.mv[1][0][0];
2949                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2950                         backup_s.dquant= 0;
2951                         s->skipdct=1;
2952                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2953                                         &dmin, &next_block, mx, my);
2954                         s->skipdct=0;
2955                     }
2956                 }
2957
2958                 s->current_picture.qscale_table[xy] = best_s.qscale;
2959
2960                 copy_context_after_encode(s, &best_s, -1);
2961
2962                 pb_bits_count= put_bits_count(&s->pb);
2963                 flush_put_bits(&s->pb);
2964                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2965                 s->pb= backup_s.pb;
2966
2967                 if(s->data_partitioning){
2968                     pb2_bits_count= put_bits_count(&s->pb2);
2969                     flush_put_bits(&s->pb2);
2970                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2971                     s->pb2= backup_s.pb2;
2972
2973                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2974                     flush_put_bits(&s->tex_pb);
2975                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2976                     s->tex_pb= backup_s.tex_pb;
2977                 }
2978                 s->last_bits= put_bits_count(&s->pb);
2979
2980                 if (CONFIG_H263_ENCODER &&
2981                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2982                     ff_h263_update_motion_val(s);
2983
2984                 if(next_block==0){ //FIXME 16 vs linesize16
2985                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2986                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2987                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2988                 }
2989
2990                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2991                     ff_mpv_decode_mb(s, s->block);
2992             } else {
2993                 int motion_x = 0, motion_y = 0;
2994                 s->mv_type=MV_TYPE_16X16;
2995                 // only one MB-Type possible
2996
2997                 switch(mb_type){
2998                 case CANDIDATE_MB_TYPE_INTRA:
2999                     s->mv_dir = 0;
3000                     s->mb_intra= 1;
3001                     motion_x= s->mv[0][0][0] = 0;
3002                     motion_y= s->mv[0][0][1] = 0;
3003                     break;
3004                 case CANDIDATE_MB_TYPE_INTER:
3005                     s->mv_dir = MV_DIR_FORWARD;
3006                     s->mb_intra= 0;
3007                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3008                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3009                     break;
3010                 case CANDIDATE_MB_TYPE_INTER_I:
3011                     s->mv_dir = MV_DIR_FORWARD;
3012                     s->mv_type = MV_TYPE_FIELD;
3013                     s->mb_intra= 0;
3014                     for(i=0; i<2; i++){
3015                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3016                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3017                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3018                     }
3019                     break;
3020                 case CANDIDATE_MB_TYPE_INTER4V:
3021                     s->mv_dir = MV_DIR_FORWARD;
3022                     s->mv_type = MV_TYPE_8X8;
3023                     s->mb_intra= 0;
3024                     for(i=0; i<4; i++){
3025                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3026                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3027                     }
3028                     break;
3029                 case CANDIDATE_MB_TYPE_DIRECT:
3030                     if (CONFIG_MPEG4_ENCODER) {
3031                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3032                         s->mb_intra= 0;
3033                         motion_x=s->b_direct_mv_table[xy][0];
3034                         motion_y=s->b_direct_mv_table[xy][1];
3035                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3036                     }
3037                     break;
3038                 case CANDIDATE_MB_TYPE_DIRECT0:
3039                     if (CONFIG_MPEG4_ENCODER) {
3040                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3041                         s->mb_intra= 0;
3042                         ff_mpeg4_set_direct_mv(s, 0, 0);
3043                     }
3044                     break;
3045                 case CANDIDATE_MB_TYPE_BIDIR:
3046                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3047                     s->mb_intra= 0;
3048                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3049                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3050                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3051                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3052                     break;
3053                 case CANDIDATE_MB_TYPE_BACKWARD:
3054                     s->mv_dir = MV_DIR_BACKWARD;
3055                     s->mb_intra= 0;
3056                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3057                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3058                     break;
3059                 case CANDIDATE_MB_TYPE_FORWARD:
3060                     s->mv_dir = MV_DIR_FORWARD;
3061                     s->mb_intra= 0;
3062                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3063                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3064                     break;
3065                 case CANDIDATE_MB_TYPE_FORWARD_I:
3066                     s->mv_dir = MV_DIR_FORWARD;
3067                     s->mv_type = MV_TYPE_FIELD;
3068                     s->mb_intra= 0;
3069                     for(i=0; i<2; i++){
3070                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3071                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3072                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3073                     }
3074                     break;
3075                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3076                     s->mv_dir = MV_DIR_BACKWARD;
3077                     s->mv_type = MV_TYPE_FIELD;
3078                     s->mb_intra= 0;
3079                     for(i=0; i<2; i++){
3080                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3081                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3082                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3083                     }
3084                     break;
3085                 case CANDIDATE_MB_TYPE_BIDIR_I:
3086                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3087                     s->mv_type = MV_TYPE_FIELD;
3088                     s->mb_intra= 0;
3089                     for(dir=0; dir<2; dir++){
3090                         for(i=0; i<2; i++){
3091                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3092                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3093                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3094                         }
3095                     }
3096                     break;
3097                 default:
3098                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3099                 }
3100
3101                 encode_mb(s, motion_x, motion_y);
3102
3103                 // RAL: Update last macroblock type
3104                 s->last_mv_dir = s->mv_dir;
3105
3106                 if (CONFIG_H263_ENCODER &&
3107                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3108                     ff_h263_update_motion_val(s);
3109
3110                 ff_mpv_decode_mb(s, s->block);
3111             }
3112
3113             /* clean the MV table in IPS frames for direct mode in B frames */
3114             if(s->mb_intra /* && I,P,S_TYPE */){
3115                 s->p_mv_table[xy][0]=0;
3116                 s->p_mv_table[xy][1]=0;
3117             }
3118
3119             if(s->flags&CODEC_FLAG_PSNR){
3120                 int w= 16;
3121                 int h= 16;
3122
3123                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3124                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3125
3126                 s->current_picture.f->error[0] += sse(
3127                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3128                     s->dest[0], w, h, s->linesize);
3129                 s->current_picture.f->error[1] += sse(
3130                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3131                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3132                 s->current_picture.f->error[2] += sse(
3133                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3134                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3135             }
3136             if(s->loop_filter){
3137                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3138                     ff_h263_loop_filter(s);
3139             }
3140             av_dlog(s->avctx, "MB %d %d bits\n",
3141                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3142         }
3143     }
3144
3145     //not beautiful here but we must write it before flushing so it has to be here
3146     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3147         ff_msmpeg4_encode_ext_header(s);
3148
3149     write_slice_end(s);
3150
3151     /* Send the last GOB if RTP */
3152     if (s->avctx->rtp_callback) {
3153         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3154         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3155         /* Call the RTP callback to send the last GOB */
3156         emms_c();
3157         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3158     }
3159
3160     return 0;
3161 }
3162
3163 #define MERGE(field) dst->field += src->field; src->field=0
3164 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3165     MERGE(me.scene_change_score);
3166     MERGE(me.mc_mb_var_sum_temp);
3167     MERGE(me.mb_var_sum_temp);
3168 }
3169
3170 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3171     int i;
3172
3173     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3174     MERGE(dct_count[1]);
3175     MERGE(mv_bits);
3176     MERGE(i_tex_bits);
3177     MERGE(p_tex_bits);
3178     MERGE(i_count);
3179     MERGE(f_count);
3180     MERGE(b_count);
3181     MERGE(skip_count);
3182     MERGE(misc_bits);
3183     MERGE(er.error_count);
3184     MERGE(padding_bug_score);
3185     MERGE(current_picture.f->error[0]);
3186     MERGE(current_picture.f->error[1]);
3187     MERGE(current_picture.f->error[2]);
3188
3189     if(dst->avctx->noise_reduction){
3190         for(i=0; i<64; i++){
3191             MERGE(dct_error_sum[0][i]);
3192             MERGE(dct_error_sum[1][i]);
3193         }
3194     }
3195
3196     assert(put_bits_count(&src->pb) % 8 ==0);
3197     assert(put_bits_count(&dst->pb) % 8 ==0);
3198     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3199     flush_put_bits(&dst->pb);
3200 }
3201
3202 static int estimate_qp(MpegEncContext *s, int dry_run){
3203     if (s->next_lambda){
3204         s->current_picture_ptr->f->quality =
3205         s->current_picture.f->quality = s->next_lambda;
3206         if(!dry_run) s->next_lambda= 0;
3207     } else if (!s->fixed_qscale) {
3208         s->current_picture_ptr->f->quality =
3209         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3210         if (s->current_picture.f->quality < 0)
3211             return -1;
3212     }
3213
3214     if(s->adaptive_quant){
3215         switch(s->codec_id){
3216         case AV_CODEC_ID_MPEG4:
3217             if (CONFIG_MPEG4_ENCODER)
3218                 ff_clean_mpeg4_qscales(s);
3219             break;
3220         case AV_CODEC_ID_H263:
3221         case AV_CODEC_ID_H263P:
3222         case AV_CODEC_ID_FLV1:
3223             if (CONFIG_H263_ENCODER)
3224                 ff_clean_h263_qscales(s);
3225             break;
3226         default:
3227             ff_init_qscale_tab(s);
3228         }
3229
3230         s->lambda= s->lambda_table[0];
3231         //FIXME broken
3232     }else
3233         s->lambda = s->current_picture.f->quality;
3234     update_qscale(s);
3235     return 0;
3236 }
3237
3238 /* must be called before writing the header */
3239 static void set_frame_distances(MpegEncContext * s){
3240     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3241     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3242
3243     if(s->pict_type==AV_PICTURE_TYPE_B){
3244         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3245         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3246     }else{
3247         s->pp_time= s->time - s->last_non_b_time;
3248         s->last_non_b_time= s->time;
3249         assert(s->picture_number==0 || s->pp_time > 0);
3250     }
3251 }
3252
3253 static int encode_picture(MpegEncContext *s, int picture_number)
3254 {
3255     int i, ret;
3256     int bits;
3257     int context_count = s->slice_context_count;
3258
3259     s->picture_number = picture_number;
3260
3261     /* Reset the average MB variance */
3262     s->me.mb_var_sum_temp    =
3263     s->me.mc_mb_var_sum_temp = 0;
3264
3265     /* we need to initialize some time vars before we can encode b-frames */
3266     // RAL: Condition added for MPEG1VIDEO
3267     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3268         set_frame_distances(s);
3269     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3270         ff_set_mpeg4_time(s);
3271
3272     s->me.scene_change_score=0;
3273
3274 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3275
3276     if(s->pict_type==AV_PICTURE_TYPE_I){
3277         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3278         else                        s->no_rounding=0;
3279     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3280         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3281             s->no_rounding ^= 1;
3282     }
3283
3284     if(s->flags & CODEC_FLAG_PASS2){
3285         if (estimate_qp(s,1) < 0)
3286             return -1;
3287         ff_get_2pass_fcode(s);
3288     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3289         if(s->pict_type==AV_PICTURE_TYPE_B)
3290             s->lambda= s->last_lambda_for[s->pict_type];
3291         else
3292             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3293         update_qscale(s);
3294     }
3295
3296     s->mb_intra=0; //for the rate distortion & bit compare functions
3297     for(i=1; i<context_count; i++){
3298         ret = ff_update_duplicate_context(s->thread_context[i], s);
3299         if (ret < 0)
3300             return ret;
3301     }
3302
3303     if(ff_init_me(s)<0)
3304         return -1;
3305
3306     /* Estimate motion for every MB */
3307     if(s->pict_type != AV_PICTURE_TYPE_I){
3308         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3309         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3310         if (s->pict_type != AV_PICTURE_TYPE_B) {
3311             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3312                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3313             }
3314         }
3315
3316         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3317     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3318         /* I-Frame */
3319         for(i=0; i<s->mb_stride*s->mb_height; i++)
3320             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3321
3322         if(!s->fixed_qscale){
3323             /* finding spatial complexity for I-frame rate control */
3324             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3325         }
3326     }
3327     for(i=1; i<context_count; i++){
3328         merge_context_after_me(s, s->thread_context[i]);
3329     }
3330     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3331     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3332     emms_c();
3333
3334     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3335         s->pict_type= AV_PICTURE_TYPE_I;
3336         for(i=0; i<s->mb_stride*s->mb_height; i++)
3337             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3338         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3339                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3340     }
3341
3342     if(!s->umvplus){
3343         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3344             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3345
3346             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3347                 int a,b;
3348                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3349                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3350                 s->f_code= FFMAX3(s->f_code, a, b);
3351             }
3352
3353             ff_fix_long_p_mvs(s);
3354             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3355             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3356                 int j;
3357                 for(i=0; i<2; i++){
3358                     for(j=0; j<2; j++)
3359                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3360                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3361                 }
3362             }
3363         }
3364
3365         if(s->pict_type==AV_PICTURE_TYPE_B){
3366             int a, b;
3367
3368             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3369             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3370             s->f_code = FFMAX(a, b);
3371
3372             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3373             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3374             s->b_code = FFMAX(a, b);
3375
3376             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3377             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3378             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3379             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3380             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3381                 int dir, j;
3382                 for(dir=0; dir<2; dir++){
3383                     for(i=0; i<2; i++){
3384                         for(j=0; j<2; j++){
3385                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3386                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3387                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3388                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3389                         }
3390                     }
3391                 }
3392             }
3393         }
3394     }
3395
3396     if (estimate_qp(s, 0) < 0)
3397         return -1;
3398
3399     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3400         s->qscale= 3; //reduce clipping problems
3401
3402     if (s->out_format == FMT_MJPEG) {
3403         /* for mjpeg, we do include qscale in the matrix */
3404         for(i=1;i<64;i++){
3405             int j = s->idsp.idct_permutation[i];
3406
3407             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3408         }
3409         s->y_dc_scale_table=
3410         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3411         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3412         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3413                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3414         s->qscale= 8;
3415     }
3416
3417     //FIXME var duplication
3418     s->current_picture_ptr->f->key_frame =
3419     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3420     s->current_picture_ptr->f->pict_type =
3421     s->current_picture.f->pict_type = s->pict_type;
3422
3423     if (s->current_picture.f->key_frame)
3424         s->picture_in_gop_number=0;
3425
3426     s->last_bits= put_bits_count(&s->pb);
3427     switch(s->out_format) {
3428     case FMT_MJPEG:
3429         if (CONFIG_MJPEG_ENCODER)
3430             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3431                                            s->intra_matrix);
3432         break;
3433     case FMT_H261:
3434         if (CONFIG_H261_ENCODER)
3435             ff_h261_encode_picture_header(s, picture_number);
3436         break;
3437     case FMT_H263:
3438         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3439             ff_wmv2_encode_picture_header(s, picture_number);
3440         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3441             ff_msmpeg4_encode_picture_header(s, picture_number);
3442         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3443             ff_mpeg4_encode_picture_header(s, picture_number);
3444         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3445             ff_rv10_encode_picture_header(s, picture_number);
3446         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3447             ff_rv20_encode_picture_header(s, picture_number);
3448         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3449             ff_flv_encode_picture_header(s, picture_number);
3450         else if (CONFIG_H263_ENCODER)
3451             ff_h263_encode_picture_header(s, picture_number);
3452         break;
3453     case FMT_MPEG1:
3454         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3455             ff_mpeg1_encode_picture_header(s, picture_number);
3456         break;
3457     default:
3458         assert(0);
3459     }
3460     bits= put_bits_count(&s->pb);
3461     s->header_bits= bits - s->last_bits;
3462
3463     for(i=1; i<context_count; i++){
3464         update_duplicate_context_after_me(s->thread_context[i], s);
3465     }
3466     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3467     for(i=1; i<context_count; i++){
3468         merge_context_after_encode(s, s->thread_context[i]);
3469     }
3470     emms_c();
3471     return 0;
3472 }
3473
3474 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3475     const int intra= s->mb_intra;
3476     int i;
3477
3478     s->dct_count[intra]++;
3479
3480     for(i=0; i<64; i++){
3481         int level= block[i];
3482
3483         if(level){
3484             if(level>0){
3485                 s->dct_error_sum[intra][i] += level;
3486                 level -= s->dct_offset[intra][i];
3487                 if(level<0) level=0;
3488             }else{
3489                 s->dct_error_sum[intra][i] -= level;
3490                 level += s->dct_offset[intra][i];
3491                 if(level>0) level=0;
3492             }
3493             block[i]= level;
3494         }
3495     }
3496 }
3497
3498 static int dct_quantize_trellis_c(MpegEncContext *s,
3499                                   int16_t *block, int n,
3500                                   int qscale, int *overflow){
3501     const int *qmat;
3502     const uint8_t *scantable= s->intra_scantable.scantable;
3503     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3504     int max=0;
3505     unsigned int threshold1, threshold2;
3506     int bias=0;
3507     int run_tab[65];
3508     int level_tab[65];
3509     int score_tab[65];
3510     int survivor[65];
3511     int survivor_count;
3512     int last_run=0;
3513     int last_level=0;
3514     int last_score= 0;
3515     int last_i;
3516     int coeff[2][64];
3517     int coeff_count[64];
3518     int qmul, qadd, start_i, last_non_zero, i, dc;
3519     const int esc_length= s->ac_esc_length;
3520     uint8_t * length;
3521     uint8_t * last_length;
3522     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3523
3524     s->fdsp.fdct(block);
3525
3526     if(s->dct_error_sum)
3527         s->denoise_dct(s, block);
3528     qmul= qscale*16;
3529     qadd= ((qscale-1)|1)*8;
3530
3531     if (s->mb_intra) {
3532         int q;
3533         if (!s->h263_aic) {
3534             if (n < 4)
3535                 q = s->y_dc_scale;
3536             else
3537                 q = s->c_dc_scale;
3538             q = q << 3;
3539         } else{
3540             /* For AIC we skip quant/dequant of INTRADC */
3541             q = 1 << 3;
3542             qadd=0;
3543         }
3544
3545         /* note: block[0] is assumed to be positive */
3546         block[0] = (block[0] + (q >> 1)) / q;
3547         start_i = 1;
3548         last_non_zero = 0;
3549         qmat = s->q_intra_matrix[qscale];
3550         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3551             bias= 1<<(QMAT_SHIFT-1);
3552         length     = s->intra_ac_vlc_length;
3553         last_length= s->intra_ac_vlc_last_length;
3554     } else {
3555         start_i = 0;
3556         last_non_zero = -1;
3557         qmat = s->q_inter_matrix[qscale];
3558         length     = s->inter_ac_vlc_length;
3559         last_length= s->inter_ac_vlc_last_length;
3560     }
3561     last_i= start_i;
3562
3563     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3564     threshold2= (threshold1<<1);
3565
3566     for(i=63; i>=start_i; i--) {
3567         const int j = scantable[i];
3568         int level = block[j] * qmat[j];
3569
3570         if(((unsigned)(level+threshold1))>threshold2){
3571             last_non_zero = i;
3572             break;
3573         }
3574     }
3575
3576     for(i=start_i; i<=last_non_zero; i++) {
3577         const int j = scantable[i];
3578         int level = block[j] * qmat[j];
3579
3580 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3581 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3582         if(((unsigned)(level+threshold1))>threshold2){
3583             if(level>0){
3584                 level= (bias + level)>>QMAT_SHIFT;
3585                 coeff[0][i]= level;
3586                 coeff[1][i]= level-1;
3587 //                coeff[2][k]= level-2;
3588             }else{
3589                 level= (bias - level)>>QMAT_SHIFT;
3590                 coeff[0][i]= -level;
3591                 coeff[1][i]= -level+1;
3592 //                coeff[2][k]= -level+2;
3593             }
3594             coeff_count[i]= FFMIN(level, 2);
3595             assert(coeff_count[i]);
3596             max |=level;
3597         }else{
3598             coeff[0][i]= (level>>31)|1;
3599             coeff_count[i]= 1;
3600         }
3601     }
3602
3603     *overflow= s->max_qcoeff < max; //overflow might have happened
3604
3605     if(last_non_zero < start_i){
3606         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3607         return last_non_zero;
3608     }
3609
3610     score_tab[start_i]= 0;
3611     survivor[0]= start_i;
3612     survivor_count= 1;
3613
3614     for(i=start_i; i<=last_non_zero; i++){
3615         int level_index, j, zero_distortion;
3616         int dct_coeff= FFABS(block[ scantable[i] ]);
3617         int best_score=256*256*256*120;
3618
3619         if (s->fdsp.fdct == ff_fdct_ifast)
3620             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3621         zero_distortion= dct_coeff*dct_coeff;
3622
3623         for(level_index=0; level_index < coeff_count[i]; level_index++){
3624             int distortion;
3625             int level= coeff[level_index][i];
3626             const int alevel= FFABS(level);
3627             int unquant_coeff;
3628
3629             assert(level);
3630
3631             if(s->out_format == FMT_H263){
3632                 unquant_coeff= alevel*qmul + qadd;
3633             }else{ //MPEG1
3634                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3635                 if(s->mb_intra){
3636                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3637                         unquant_coeff =   (unquant_coeff - 1) | 1;
3638                 }else{
3639                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3640                         unquant_coeff =   (unquant_coeff - 1) | 1;
3641                 }
3642                 unquant_coeff<<= 3;
3643             }
3644
3645             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3646             level+=64;
3647             if((level&(~127)) == 0){
3648                 for(j=survivor_count-1; j>=0; j--){
3649                     int run= i - survivor[j];
3650                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3651                     score += score_tab[i-run];
3652
3653                     if(score < best_score){
3654                         best_score= score;
3655                         run_tab[i+1]= run;
3656                         level_tab[i+1]= level-64;
3657                     }
3658                 }
3659
3660                 if(s->out_format == FMT_H263){
3661                     for(j=survivor_count-1; j>=0; j--){
3662                         int run= i - survivor[j];
3663                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3664                         score += score_tab[i-run];
3665                         if(score < last_score){
3666                             last_score= score;
3667                             last_run= run;
3668                             last_level= level-64;
3669                             last_i= i+1;
3670                         }
3671                     }
3672                 }
3673             }else{
3674                 distortion += esc_length*lambda;
3675                 for(j=survivor_count-1; j>=0; j--){
3676                     int run= i - survivor[j];
3677                     int score= distortion + score_tab[i-run];
3678
3679                     if(score < best_score){
3680                         best_score= score;
3681                         run_tab[i+1]= run;
3682                         level_tab[i+1]= level-64;
3683                     }
3684                 }
3685
3686                 if(s->out_format == FMT_H263){
3687                   for(j=survivor_count-1; j>=0; j--){
3688                         int run= i - survivor[j];
3689                         int score= distortion + score_tab[i-run];
3690                         if(score < last_score){
3691                             last_score= score;
3692                             last_run= run;
3693                             last_level= level-64;
3694                             last_i= i+1;
3695                         }
3696                     }
3697                 }
3698             }
3699         }
3700
3701         score_tab[i+1]= best_score;
3702
3703         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3704         if(last_non_zero <= 27){
3705             for(; survivor_count; survivor_count--){
3706                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3707                     break;
3708             }
3709         }else{
3710             for(; survivor_count; survivor_count--){
3711                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3712                     break;
3713             }
3714         }
3715
3716         survivor[ survivor_count++ ]= i+1;
3717     }
3718
3719     if(s->out_format != FMT_H263){
3720         last_score= 256*256*256*120;
3721         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3722             int score= score_tab[i];
3723             if(i) score += lambda*2; //FIXME exacter?
3724
3725             if(score < last_score){
3726                 last_score= score;
3727                 last_i= i;
3728                 last_level= level_tab[i];
3729                 last_run= run_tab[i];
3730             }
3731         }
3732     }
3733
3734     s->coded_score[n] = last_score;
3735
3736     dc= FFABS(block[0]);
3737     last_non_zero= last_i - 1;
3738     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3739
3740     if(last_non_zero < start_i)
3741         return last_non_zero;
3742
3743     if(last_non_zero == 0 && start_i == 0){
3744         int best_level= 0;
3745         int best_score= dc * dc;
3746
3747         for(i=0; i<coeff_count[0]; i++){
3748             int level= coeff[i][0];
3749             int alevel= FFABS(level);
3750             int unquant_coeff, score, distortion;
3751
3752             if(s->out_format == FMT_H263){
3753                     unquant_coeff= (alevel*qmul + qadd)>>3;
3754             }else{ //MPEG1
3755                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3756                     unquant_coeff =   (unquant_coeff - 1) | 1;
3757             }
3758             unquant_coeff = (unquant_coeff + 4) >> 3;
3759             unquant_coeff<<= 3 + 3;
3760
3761             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3762             level+=64;
3763             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3764             else                    score= distortion + esc_length*lambda;
3765
3766             if(score < best_score){
3767                 best_score= score;
3768                 best_level= level - 64;
3769             }
3770         }
3771         block[0]= best_level;
3772         s->coded_score[n] = best_score - dc*dc;
3773         if(best_level == 0) return -1;
3774         else                return last_non_zero;
3775     }
3776
3777     i= last_i;
3778     assert(last_level);
3779
3780     block[ perm_scantable[last_non_zero] ]= last_level;
3781     i -= last_run + 1;
3782
3783     for(; i>start_i; i -= run_tab[i] + 1){
3784         block[ perm_scantable[i-1] ]= level_tab[i];
3785     }
3786
3787     return last_non_zero;
3788 }
3789
3790 //#define REFINE_STATS 1
3791 static int16_t basis[64][64];
3792
3793 static void build_basis(uint8_t *perm){
3794     int i, j, x, y;
3795     emms_c();
3796     for(i=0; i<8; i++){
3797         for(j=0; j<8; j++){
3798             for(y=0; y<8; y++){
3799                 for(x=0; x<8; x++){
3800                     double s= 0.25*(1<<BASIS_SHIFT);
3801                     int index= 8*i + j;
3802                     int perm_index= perm[index];
3803                     if(i==0) s*= sqrt(0.5);
3804                     if(j==0) s*= sqrt(0.5);
3805                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3806                 }
3807             }
3808         }
3809     }
3810 }
3811
3812 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3813                         int16_t *block, int16_t *weight, int16_t *orig,
3814                         int n, int qscale){
3815     int16_t rem[64];
3816     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3817     const uint8_t *scantable= s->intra_scantable.scantable;
3818     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3819 //    unsigned int threshold1, threshold2;
3820 //    int bias=0;
3821     int run_tab[65];
3822     int prev_run=0;
3823     int prev_level=0;
3824     int qmul, qadd, start_i, last_non_zero, i, dc;
3825     uint8_t * length;
3826     uint8_t * last_length;
3827     int lambda;
3828     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3829 #ifdef REFINE_STATS
3830 static int count=0;
3831 static int after_last=0;
3832 static int to_zero=0;
3833 static int from_zero=0;
3834 static int raise=0;
3835 static int lower=0;
3836 static int messed_sign=0;
3837 #endif
3838
3839     if(basis[0][0] == 0)
3840         build_basis(s->idsp.idct_permutation);
3841
3842     qmul= qscale*2;
3843     qadd= (qscale-1)|1;
3844     if (s->mb_intra) {
3845         if (!s->h263_aic) {
3846             if (n < 4)
3847                 q = s->y_dc_scale;
3848             else
3849                 q = s->c_dc_scale;
3850         } else{
3851             /* For AIC we skip quant/dequant of INTRADC */
3852             q = 1;
3853             qadd=0;
3854         }
3855         q <<= RECON_SHIFT-3;
3856         /* note: block[0] is assumed to be positive */
3857         dc= block[0]*q;
3858 //        block[0] = (block[0] + (q >> 1)) / q;
3859         start_i = 1;
3860 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3861 //            bias= 1<<(QMAT_SHIFT-1);
3862         length     = s->intra_ac_vlc_length;
3863         last_length= s->intra_ac_vlc_last_length;
3864     } else {
3865         dc= 0;
3866         start_i = 0;
3867         length     = s->inter_ac_vlc_length;
3868         last_length= s->inter_ac_vlc_last_length;
3869     }
3870     last_non_zero = s->block_last_index[n];
3871
3872 #ifdef REFINE_STATS
3873 {START_TIMER
3874 #endif
3875     dc += (1<<(RECON_SHIFT-1));
3876     for(i=0; i<64; i++){
3877         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3878     }
3879 #ifdef REFINE_STATS
3880 STOP_TIMER("memset rem[]")}
3881 #endif
3882     sum=0;
3883     for(i=0; i<64; i++){
3884         int one= 36;
3885         int qns=4;
3886         int w;
3887
3888         w= FFABS(weight[i]) + qns*one;
3889         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3890
3891         weight[i] = w;
3892 //        w=weight[i] = (63*qns + (w/2)) / w;
3893
3894         assert(w>0);
3895         assert(w<(1<<6));
3896         sum += w*w;
3897     }
3898     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3899 #ifdef REFINE_STATS
3900 {START_TIMER
3901 #endif
3902     run=0;
3903     rle_index=0;
3904     for(i=start_i; i<=last_non_zero; i++){
3905         int j= perm_scantable[i];
3906         const int level= block[j];
3907         int coeff;
3908
3909         if(level){
3910             if(level<0) coeff= qmul*level - qadd;
3911             else        coeff= qmul*level + qadd;
3912             run_tab[rle_index++]=run;
3913             run=0;
3914
3915             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
3916         }else{
3917             run++;
3918         }
3919     }
3920 #ifdef REFINE_STATS
3921 if(last_non_zero>0){
3922 STOP_TIMER("init rem[]")
3923 }
3924 }
3925
3926 {START_TIMER
3927 #endif
3928     for(;;){
3929         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
3930         int best_coeff=0;
3931         int best_change=0;
3932         int run2, best_unquant_change=0, analyze_gradient;
3933 #ifdef REFINE_STATS
3934 {START_TIMER
3935 #endif
3936         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3937
3938         if(analyze_gradient){
3939 #ifdef REFINE_STATS
3940 {START_TIMER
3941 #endif
3942             for(i=0; i<64; i++){
3943                 int w= weight[i];
3944
3945                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3946             }
3947 #ifdef REFINE_STATS
3948 STOP_TIMER("rem*w*w")}
3949 {START_TIMER
3950 #endif
3951             s->fdsp.fdct(d1);
3952 #ifdef REFINE_STATS
3953 STOP_TIMER("dct")}
3954 #endif
3955         }
3956
3957         if(start_i){
3958             const int level= block[0];
3959             int change, old_coeff;
3960
3961             assert(s->mb_intra);
3962
3963             old_coeff= q*level;
3964
3965             for(change=-1; change<=1; change+=2){
3966                 int new_level= level + change;
3967                 int score, new_coeff;
3968
3969                 new_coeff= q*new_level;
3970                 if(new_coeff >= 2048 || new_coeff < 0)
3971                     continue;
3972
3973                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
3974                                                   new_coeff - old_coeff);
3975                 if(score<best_score){
3976                     best_score= score;
3977                     best_coeff= 0;
3978                     best_change= change;
3979                     best_unquant_change= new_coeff - old_coeff;
3980                 }
3981             }
3982         }
3983
3984         run=0;
3985         rle_index=0;
3986         run2= run_tab[rle_index++];
3987         prev_level=0;
3988         prev_run=0;
3989
3990         for(i=start_i; i<64; i++){
3991             int j= perm_scantable[i];
3992             const int level= block[j];
3993             int change, old_coeff;
3994
3995             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3996                 break;
3997
3998             if(level){
3999                 if(level<0) old_coeff= qmul*level - qadd;
4000                 else        old_coeff= qmul*level + qadd;
4001                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4002             }else{
4003                 old_coeff=0;
4004                 run2--;
4005                 assert(run2>=0 || i >= last_non_zero );
4006             }
4007
4008             for(change=-1; change<=1; change+=2){
4009                 int new_level= level + change;
4010                 int score, new_coeff, unquant_change;
4011
4012                 score=0;
4013                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4014                    continue;
4015
4016                 if(new_level){
4017                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4018                     else            new_coeff= qmul*new_level + qadd;
4019                     if(new_coeff >= 2048 || new_coeff <= -2048)
4020                         continue;
4021                     //FIXME check for overflow
4022
4023                     if(level){
4024                         if(level < 63 && level > -63){
4025                             if(i < last_non_zero)
4026                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4027                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4028                             else
4029                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4030                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4031                         }
4032                     }else{
4033                         assert(FFABS(new_level)==1);
4034
4035                         if(analyze_gradient){
4036                             int g= d1[ scantable[i] ];
4037                             if(g && (g^new_level) >= 0)
4038                                 continue;
4039                         }
4040
4041                         if(i < last_non_zero){
4042                             int next_i= i + run2 + 1;
4043                             int next_level= block[ perm_scantable[next_i] ] + 64;
4044
4045                             if(next_level&(~127))
4046                                 next_level= 0;
4047
4048                             if(next_i < last_non_zero)
4049                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4050                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4051                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4052                             else
4053                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4054                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4055                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4056                         }else{
4057                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4058                             if(prev_level){
4059                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4060                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4061                             }
4062                         }
4063                     }
4064                 }else{
4065                     new_coeff=0;
4066                     assert(FFABS(level)==1);
4067
4068                     if(i < last_non_zero){
4069                         int next_i= i + run2 + 1;
4070                         int next_level= block[ perm_scantable[next_i] ] + 64;
4071
4072                         if(next_level&(~127))
4073                             next_level= 0;
4074
4075                         if(next_i < last_non_zero)
4076                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4077                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4078                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4079                         else
4080                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4081                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4082                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4083                     }else{
4084                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4085                         if(prev_level){
4086                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4087                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4088                         }
4089                     }
4090                 }
4091
4092                 score *= lambda;
4093
4094                 unquant_change= new_coeff - old_coeff;
4095                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4096
4097                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4098                                                    unquant_change);
4099                 if(score<best_score){
4100                     best_score= score;
4101                     best_coeff= i;
4102                     best_change= change;
4103                     best_unquant_change= unquant_change;
4104                 }
4105             }
4106             if(level){
4107                 prev_level= level + 64;
4108                 if(prev_level&(~127))
4109                     prev_level= 0;
4110                 prev_run= run;
4111                 run=0;
4112             }else{
4113                 run++;
4114             }
4115         }
4116 #ifdef REFINE_STATS
4117 STOP_TIMER("iterative step")}
4118 #endif
4119
4120         if(best_change){
4121             int j= perm_scantable[ best_coeff ];
4122
4123             block[j] += best_change;
4124
4125             if(best_coeff > last_non_zero){
4126                 last_non_zero= best_coeff;
4127                 assert(block[j]);
4128 #ifdef REFINE_STATS
4129 after_last++;
4130 #endif
4131             }else{
4132 #ifdef REFINE_STATS
4133 if(block[j]){
4134     if(block[j] - best_change){
4135         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4136             raise++;
4137         }else{
4138             lower++;
4139         }
4140     }else{
4141         from_zero++;
4142     }
4143 }else{
4144     to_zero++;
4145 }
4146 #endif
4147                 for(; last_non_zero>=start_i; last_non_zero--){
4148                     if(block[perm_scantable[last_non_zero]])
4149                         break;
4150                 }
4151             }
4152 #ifdef REFINE_STATS
4153 count++;
4154 if(256*256*256*64 % count == 0){
4155     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4156 }
4157 #endif
4158             run=0;
4159             rle_index=0;
4160             for(i=start_i; i<=last_non_zero; i++){
4161                 int j= perm_scantable[i];
4162                 const int level= block[j];
4163
4164                  if(level){
4165                      run_tab[rle_index++]=run;
4166                      run=0;
4167                  }else{
4168                      run++;
4169                  }
4170             }
4171
4172             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4173         }else{
4174             break;
4175         }
4176     }
4177 #ifdef REFINE_STATS
4178 if(last_non_zero>0){
4179 STOP_TIMER("iterative search")
4180 }
4181 }
4182 #endif
4183
4184     return last_non_zero;
4185 }
4186
4187 int ff_dct_quantize_c(MpegEncContext *s,
4188                         int16_t *block, int n,
4189                         int qscale, int *overflow)
4190 {
4191     int i, j, level, last_non_zero, q, start_i;
4192     const int *qmat;
4193     const uint8_t *scantable= s->intra_scantable.scantable;
4194     int bias;
4195     int max=0;
4196     unsigned int threshold1, threshold2;
4197
4198     s->fdsp.fdct(block);
4199
4200     if(s->dct_error_sum)
4201         s->denoise_dct(s, block);
4202
4203     if (s->mb_intra) {
4204         if (!s->h263_aic) {
4205             if (n < 4)
4206                 q = s->y_dc_scale;
4207             else
4208                 q = s->c_dc_scale;
4209             q = q << 3;
4210         } else
4211             /* For AIC we skip quant/dequant of INTRADC */
4212             q = 1 << 3;
4213
4214         /* note: block[0] is assumed to be positive */
4215         block[0] = (block[0] + (q >> 1)) / q;
4216         start_i = 1;
4217         last_non_zero = 0;
4218         qmat = s->q_intra_matrix[qscale];
4219         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4220     } else {
4221         start_i = 0;
4222         last_non_zero = -1;
4223         qmat = s->q_inter_matrix[qscale];
4224         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4225     }
4226     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4227     threshold2= (threshold1<<1);
4228     for(i=63;i>=start_i;i--) {
4229         j = scantable[i];
4230         level = block[j] * qmat[j];
4231
4232         if(((unsigned)(level+threshold1))>threshold2){
4233             last_non_zero = i;
4234             break;
4235         }else{
4236             block[j]=0;
4237         }
4238     }
4239     for(i=start_i; i<=last_non_zero; i++) {
4240         j = scantable[i];
4241         level = block[j] * qmat[j];
4242
4243 //        if(   bias+level >= (1<<QMAT_SHIFT)
4244 //           || bias-level >= (1<<QMAT_SHIFT)){
4245         if(((unsigned)(level+threshold1))>threshold2){
4246             if(level>0){
4247                 level= (bias + level)>>QMAT_SHIFT;
4248                 block[j]= level;
4249             }else{
4250                 level= (bias - level)>>QMAT_SHIFT;
4251                 block[j]= -level;
4252             }
4253             max |=level;
4254         }else{
4255             block[j]=0;
4256         }
4257     }
4258     *overflow= s->max_qcoeff < max; //overflow might have happened
4259
4260     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4261     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4262         ff_block_permute(block, s->idsp.idct_permutation,
4263                          scantable, last_non_zero);
4264
4265     return last_non_zero;
4266 }
4267
4268 #define OFFSET(x) offsetof(MpegEncContext, x)
4269 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4270 static const AVOption h263_options[] = {
4271     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4272     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4273     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4274     FF_MPV_COMMON_OPTS
4275     { NULL },
4276 };
4277
4278 static const AVClass h263_class = {
4279     .class_name = "H.263 encoder",
4280     .item_name  = av_default_item_name,
4281     .option     = h263_options,
4282     .version    = LIBAVUTIL_VERSION_INT,
4283 };
4284
4285 AVCodec ff_h263_encoder = {
4286     .name           = "h263",
4287     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4288     .type           = AVMEDIA_TYPE_VIDEO,
4289     .id             = AV_CODEC_ID_H263,
4290     .priv_data_size = sizeof(MpegEncContext),
4291     .init           = ff_mpv_encode_init,
4292     .encode2        = ff_mpv_encode_picture,
4293     .close          = ff_mpv_encode_end,
4294     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4295     .priv_class     = &h263_class,
4296 };
4297
4298 static const AVOption h263p_options[] = {
4299     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4300     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4301     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4302     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4303     FF_MPV_COMMON_OPTS
4304     { NULL },
4305 };
4306 static const AVClass h263p_class = {
4307     .class_name = "H.263p encoder",
4308     .item_name  = av_default_item_name,
4309     .option     = h263p_options,
4310     .version    = LIBAVUTIL_VERSION_INT,
4311 };
4312
4313 AVCodec ff_h263p_encoder = {
4314     .name           = "h263p",
4315     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4316     .type           = AVMEDIA_TYPE_VIDEO,
4317     .id             = AV_CODEC_ID_H263P,
4318     .priv_data_size = sizeof(MpegEncContext),
4319     .init           = ff_mpv_encode_init,
4320     .encode2        = ff_mpv_encode_picture,
4321     .close          = ff_mpv_encode_end,
4322     .capabilities   = CODEC_CAP_SLICE_THREADS,
4323     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4324     .priv_class     = &h263p_class,
4325 };
4326
4327 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4328
4329 AVCodec ff_msmpeg4v2_encoder = {
4330     .name           = "msmpeg4v2",
4331     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4332     .type           = AVMEDIA_TYPE_VIDEO,
4333     .id             = AV_CODEC_ID_MSMPEG4V2,
4334     .priv_data_size = sizeof(MpegEncContext),
4335     .init           = ff_mpv_encode_init,
4336     .encode2        = ff_mpv_encode_picture,
4337     .close          = ff_mpv_encode_end,
4338     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4339     .priv_class     = &msmpeg4v2_class,
4340 };
4341
4342 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4343
4344 AVCodec ff_msmpeg4v3_encoder = {
4345     .name           = "msmpeg4",
4346     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4347     .type           = AVMEDIA_TYPE_VIDEO,
4348     .id             = AV_CODEC_ID_MSMPEG4V3,
4349     .priv_data_size = sizeof(MpegEncContext),
4350     .init           = ff_mpv_encode_init,
4351     .encode2        = ff_mpv_encode_picture,
4352     .close          = ff_mpv_encode_end,
4353     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4354     .priv_class     = &msmpeg4v3_class,
4355 };
4356
4357 FF_MPV_GENERIC_CLASS(wmv1)
4358
4359 AVCodec ff_wmv1_encoder = {
4360     .name           = "wmv1",
4361     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4362     .type           = AVMEDIA_TYPE_VIDEO,
4363     .id             = AV_CODEC_ID_WMV1,
4364     .priv_data_size = sizeof(MpegEncContext),
4365     .init           = ff_mpv_encode_init,
4366     .encode2        = ff_mpv_encode_picture,
4367     .close          = ff_mpv_encode_end,
4368     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4369     .priv_class     = &wmv1_class,
4370 };