]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
6a7c9e500fb7ead2c47219ff6b643a38533cdf27
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "mpegvideodata.h"
44 #include "h261.h"
45 #include "h263.h"
46 #include "h263data.h"
47 #include "mjpegenc_common.h"
48 #include "mathops.h"
49 #include "mpegutils.h"
50 #include "mjpegenc.h"
51 #include "msmpeg4.h"
52 #include "pixblockdsp.h"
53 #include "qpeldsp.h"
54 #include "faandct.h"
55 #include "thread.h"
56 #include "aandcttab.h"
57 #include "flv.h"
58 #include "mpeg4video.h"
59 #include "internal.h"
60 #include "bytestream.h"
61 #include "wmv2.h"
62 #include "rv10.h"
63 #include "libxvid.h"
64 #include <limits.h>
65
66 #define QUANT_BIAS_SHIFT 8
67
68 #define QMAT_SHIFT_MMX 16
69 #define QMAT_SHIFT 22
70
71 static int encode_picture(MpegEncContext *s, int picture_number);
72 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
73 static int sse_mb(MpegEncContext *s);
74 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
75 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
76
77 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
78 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
79
80 const AVOption ff_mpv_generic_options[] = {
81     FF_MPV_COMMON_OPTS
82     { NULL },
83 };
84
85 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
86                        uint16_t (*qmat16)[2][64],
87                        const uint16_t *quant_matrix,
88                        int bias, int qmin, int qmax, int intra)
89 {
90     FDCTDSPContext *fdsp = &s->fdsp;
91     int qscale;
92     int shift = 0;
93
94     for (qscale = qmin; qscale <= qmax; qscale++) {
95         int i;
96         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
97 #if CONFIG_FAANDCT
98             fdsp->fdct == ff_faandct            ||
99 #endif /* CONFIG_FAANDCT */
100             fdsp->fdct == ff_jpeg_fdct_islow_10) {
101             for (i = 0; i < 64; i++) {
102                 const int j = s->idsp.idct_permutation[i];
103                 int64_t den = (int64_t) qscale * quant_matrix[j];
104                 /* 16 <= qscale * quant_matrix[i] <= 7905
105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
106                  *             19952 <=              x  <= 249205026
107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
108                  *           3444240 >= (1 << 36) / (x) >= 275 */
109
110                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
111             }
112         } else if (fdsp->fdct == ff_fdct_ifast) {
113             for (i = 0; i < 64; i++) {
114                 const int j = s->idsp.idct_permutation[i];
115                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
116                 /* 16 <= qscale * quant_matrix[i] <= 7905
117                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
118                  *             19952 <=              x  <= 249205026
119                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
120                  *           3444240 >= (1 << 36) / (x) >= 275 */
121
122                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
123             }
124         } else {
125             for (i = 0; i < 64; i++) {
126                 const int j = s->idsp.idct_permutation[i];
127                 int64_t den = (int64_t) qscale * quant_matrix[j];
128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
129                  * Assume x = qscale * quant_matrix[i]
130                  * So             16 <=              x  <= 7905
131                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
132                  * so          32768 >= (1 << 19) / (x) >= 67 */
133                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
134                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
135                 //                    (qscale * quant_matrix[i]);
136                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
137
138                 if (qmat16[qscale][0][i] == 0 ||
139                     qmat16[qscale][0][i] == 128 * 256)
140                     qmat16[qscale][0][i] = 128 * 256 - 1;
141                 qmat16[qscale][1][i] =
142                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
143                                 qmat16[qscale][0][i]);
144             }
145         }
146
147         for (i = intra; i < 64; i++) {
148             int64_t max = 8191;
149             if (fdsp->fdct == ff_fdct_ifast) {
150                 max = (8191LL * ff_aanscales[i]) >> 14;
151             }
152             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
153                 shift++;
154             }
155         }
156     }
157     if (shift) {
158         av_log(NULL, AV_LOG_INFO,
159                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
160                QMAT_SHIFT - shift);
161     }
162 }
163
164 static inline void update_qscale(MpegEncContext *s)
165 {
166     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
167                 (FF_LAMBDA_SHIFT + 7);
168     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
169
170     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
171                  FF_LAMBDA_SHIFT;
172 }
173
174 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
175 {
176     int i;
177
178     if (matrix) {
179         put_bits(pb, 1, 1);
180         for (i = 0; i < 64; i++) {
181             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
182         }
183     } else
184         put_bits(pb, 1, 0);
185 }
186
187 /**
188  * init s->current_picture.qscale_table from s->lambda_table
189  */
190 void ff_init_qscale_tab(MpegEncContext *s)
191 {
192     int8_t * const qscale_table = s->current_picture.qscale_table;
193     int i;
194
195     for (i = 0; i < s->mb_num; i++) {
196         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
197         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
198         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
199                                                   s->avctx->qmax);
200     }
201 }
202
203 static void update_duplicate_context_after_me(MpegEncContext *dst,
204                                               MpegEncContext *src)
205 {
206 #define COPY(a) dst->a= src->a
207     COPY(pict_type);
208     COPY(current_picture);
209     COPY(f_code);
210     COPY(b_code);
211     COPY(qscale);
212     COPY(lambda);
213     COPY(lambda2);
214     COPY(picture_in_gop_number);
215     COPY(gop_picture_number);
216     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
217     COPY(progressive_frame);    // FIXME don't set in encode_header
218     COPY(partitioned_frame);    // FIXME don't set in encode_header
219 #undef COPY
220 }
221
222 /**
223  * Set the given MpegEncContext to defaults for encoding.
224  * the changed fields will not depend upon the prior state of the MpegEncContext.
225  */
226 static void mpv_encode_defaults(MpegEncContext *s)
227 {
228     int i;
229     ff_mpv_common_defaults(s);
230
231     for (i = -16; i < 16; i++) {
232         default_fcode_tab[i + MAX_MV] = 1;
233     }
234     s->me.mv_penalty = default_mv_penalty;
235     s->fcode_tab     = default_fcode_tab;
236
237     s->input_picture_number  = 0;
238     s->picture_in_gop_number = 0;
239 }
240
241 /* init video encoder */
242 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
243 {
244     MpegEncContext *s = avctx->priv_data;
245     AVCPBProperties *cpb_props;
246     int i, ret, format_supported;
247
248     mpv_encode_defaults(s);
249
250     switch (avctx->codec_id) {
251     case AV_CODEC_ID_MPEG2VIDEO:
252         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
253             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
254             av_log(avctx, AV_LOG_ERROR,
255                    "only YUV420 and YUV422 are supported\n");
256             return -1;
257         }
258         break;
259     case AV_CODEC_ID_MJPEG:
260         format_supported = 0;
261         /* JPEG color space */
262         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
263             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
264             (avctx->color_range == AVCOL_RANGE_JPEG &&
265              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
266               avctx->pix_fmt == AV_PIX_FMT_YUV422P)))
267             format_supported = 1;
268         /* MPEG color space */
269         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
270                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
271                   avctx->pix_fmt == AV_PIX_FMT_YUV422P))
272             format_supported = 1;
273
274         if (!format_supported) {
275             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
276             return -1;
277         }
278         break;
279     default:
280         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
281             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
282             return -1;
283         }
284     }
285
286     switch (avctx->pix_fmt) {
287     case AV_PIX_FMT_YUVJ422P:
288     case AV_PIX_FMT_YUV422P:
289         s->chroma_format = CHROMA_422;
290         break;
291     case AV_PIX_FMT_YUVJ420P:
292     case AV_PIX_FMT_YUV420P:
293     default:
294         s->chroma_format = CHROMA_420;
295         break;
296     }
297
298 #if FF_API_PRIVATE_OPT
299 FF_DISABLE_DEPRECATION_WARNINGS
300     if (avctx->rtp_payload_size)
301         s->rtp_payload_size = avctx->rtp_payload_size;
302     if (avctx->me_penalty_compensation)
303         s->me_penalty_compensation = avctx->me_penalty_compensation;
304     if (avctx->pre_me)
305         s->me_pre = avctx->pre_me;
306 FF_ENABLE_DEPRECATION_WARNINGS
307 #endif
308
309     s->bit_rate = avctx->bit_rate;
310     s->width    = avctx->width;
311     s->height   = avctx->height;
312     if (avctx->gop_size > 600 &&
313         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
314         av_log(avctx, AV_LOG_ERROR,
315                "Warning keyframe interval too large! reducing it ...\n");
316         avctx->gop_size = 600;
317     }
318     s->gop_size     = avctx->gop_size;
319     s->avctx        = avctx;
320     if (avctx->max_b_frames > MAX_B_FRAMES) {
321         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
322                "is %d.\n", MAX_B_FRAMES);
323     }
324     s->max_b_frames = avctx->max_b_frames;
325     s->codec_id     = avctx->codec->id;
326     s->strict_std_compliance = avctx->strict_std_compliance;
327     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
328     s->rtp_mode           = !!s->rtp_payload_size;
329     s->intra_dc_precision = avctx->intra_dc_precision;
330     s->user_specified_pts = AV_NOPTS_VALUE;
331
332     if (s->gop_size <= 1) {
333         s->intra_only = 1;
334         s->gop_size   = 12;
335     } else {
336         s->intra_only = 0;
337     }
338
339 #if FF_API_MOTION_EST
340 FF_DISABLE_DEPRECATION_WARNINGS
341     s->me_method = avctx->me_method;
342 FF_ENABLE_DEPRECATION_WARNINGS
343 #endif
344
345     /* Fixed QSCALE */
346     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
347
348 #if FF_API_MPV_OPT
349     FF_DISABLE_DEPRECATION_WARNINGS
350     if (avctx->border_masking != 0.0)
351         s->border_masking = avctx->border_masking;
352     FF_ENABLE_DEPRECATION_WARNINGS
353 #endif
354
355     s->adaptive_quant = (s->avctx->lumi_masking ||
356                          s->avctx->dark_masking ||
357                          s->avctx->temporal_cplx_masking ||
358                          s->avctx->spatial_cplx_masking  ||
359                          s->avctx->p_masking      ||
360                          s->border_masking ||
361                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
362                         !s->fixed_qscale;
363
364     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
365
366     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
367         av_log(avctx, AV_LOG_ERROR,
368                "a vbv buffer size is needed, "
369                "for encoding with a maximum bitrate\n");
370         return -1;
371     }
372
373     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
374         av_log(avctx, AV_LOG_INFO,
375                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
376     }
377
378     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
379         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
380         return -1;
381     }
382
383     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
384         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
385         return -1;
386     }
387
388     if (avctx->rc_max_rate &&
389         avctx->rc_max_rate == avctx->bit_rate &&
390         avctx->rc_max_rate != avctx->rc_min_rate) {
391         av_log(avctx, AV_LOG_INFO,
392                "impossible bitrate constraints, this will fail\n");
393     }
394
395     if (avctx->rc_buffer_size &&
396         avctx->bit_rate * (int64_t)avctx->time_base.num >
397             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
398         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
399         return -1;
400     }
401
402     if (!s->fixed_qscale &&
403         avctx->bit_rate * av_q2d(avctx->time_base) >
404             avctx->bit_rate_tolerance) {
405         av_log(avctx, AV_LOG_ERROR,
406                "bitrate tolerance too small for bitrate\n");
407         return -1;
408     }
409
410     if (s->avctx->rc_max_rate &&
411         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
412         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
413          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
414         90000LL * (avctx->rc_buffer_size - 1) >
415             s->avctx->rc_max_rate * 0xFFFFLL) {
416         av_log(avctx, AV_LOG_INFO,
417                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
418                "specified vbv buffer is too large for the given bitrate!\n");
419     }
420
421     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
422         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
423         s->codec_id != AV_CODEC_ID_FLV1) {
424         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
425         return -1;
426     }
427
428     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
429         av_log(avctx, AV_LOG_ERROR,
430                "OBMC is only supported with simple mb decision\n");
431         return -1;
432     }
433
434     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
435         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
436         return -1;
437     }
438
439     if (s->max_b_frames                    &&
440         s->codec_id != AV_CODEC_ID_MPEG4      &&
441         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
442         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
443         av_log(avctx, AV_LOG_ERROR, "B-frames not supported by codec\n");
444         return -1;
445     }
446
447     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
448          s->codec_id == AV_CODEC_ID_H263  ||
449          s->codec_id == AV_CODEC_ID_H263P) &&
450         (avctx->sample_aspect_ratio.num > 255 ||
451          avctx->sample_aspect_ratio.den > 255)) {
452         av_log(avctx, AV_LOG_ERROR,
453                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
454                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
455         return -1;
456     }
457
458     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
459         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
460         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
461         return -1;
462     }
463
464 #if FF_API_PRIVATE_OPT
465     FF_DISABLE_DEPRECATION_WARNINGS
466     if (avctx->mpeg_quant)
467         s->mpeg_quant = avctx->mpeg_quant;
468     FF_ENABLE_DEPRECATION_WARNINGS
469 #endif
470
471     // FIXME mpeg2 uses that too
472     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
473         av_log(avctx, AV_LOG_ERROR,
474                "mpeg2 style quantization not supported by codec\n");
475         return -1;
476     }
477
478     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
479         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
480         return -1;
481     }
482
483     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
484         s->avctx->mb_decision != FF_MB_DECISION_RD) {
485         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
486         return -1;
487     }
488
489 #if FF_API_PRIVATE_OPT
490 FF_DISABLE_DEPRECATION_WARNINGS
491     if (avctx->scenechange_threshold)
492         s->scenechange_threshold = avctx->scenechange_threshold;
493 FF_ENABLE_DEPRECATION_WARNINGS
494 #endif
495
496     if (s->scenechange_threshold < 1000000000 &&
497         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
498         av_log(avctx, AV_LOG_ERROR,
499                "closed gop with scene change detection are not supported yet, "
500                "set threshold to 1000000000\n");
501         return -1;
502     }
503
504     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
505         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
506             av_log(avctx, AV_LOG_ERROR,
507                   "low delay forcing is only available for mpeg2\n");
508             return -1;
509         }
510         if (s->max_b_frames != 0) {
511             av_log(avctx, AV_LOG_ERROR,
512                    "B-frames cannot be used with low delay\n");
513             return -1;
514         }
515     }
516
517     if (s->q_scale_type == 1) {
518         if (avctx->qmax > 12) {
519             av_log(avctx, AV_LOG_ERROR,
520                    "non linear quant only supports qmax <= 12 currently\n");
521             return -1;
522         }
523     }
524
525     if (avctx->slices > 1 &&
526         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
527         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
528         return AVERROR(EINVAL);
529     }
530
531     if (s->avctx->thread_count > 1         &&
532         s->codec_id != AV_CODEC_ID_MPEG4      &&
533         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
534         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
535         (s->codec_id != AV_CODEC_ID_H263P)) {
536         av_log(avctx, AV_LOG_ERROR,
537                "multi threaded encoding not supported by codec\n");
538         return -1;
539     }
540
541     if (s->avctx->thread_count < 1) {
542         av_log(avctx, AV_LOG_ERROR,
543                "automatic thread number detection not supported by codec,"
544                "patch welcome\n");
545         return -1;
546     }
547
548     if (!avctx->time_base.den || !avctx->time_base.num) {
549         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
550         return -1;
551     }
552
553 #if FF_API_PRIVATE_OPT
554 FF_DISABLE_DEPRECATION_WARNINGS
555     if (avctx->b_frame_strategy)
556         s->b_frame_strategy = avctx->b_frame_strategy;
557     if (avctx->b_sensitivity != 40)
558         s->b_sensitivity = avctx->b_sensitivity;
559 FF_ENABLE_DEPRECATION_WARNINGS
560 #endif
561
562     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
563         av_log(avctx, AV_LOG_INFO,
564                "notice: b_frame_strategy only affects the first pass\n");
565         s->b_frame_strategy = 0;
566     }
567
568     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
569     if (i > 1) {
570         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
571         avctx->time_base.den /= i;
572         avctx->time_base.num /= i;
573         //return -1;
574     }
575
576     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
577         s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG) {
578         // (a + x * 3 / 8) / x
579         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
580         s->inter_quant_bias = 0;
581     } else {
582         s->intra_quant_bias = 0;
583         // (a - x / 4) / x
584         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
585     }
586
587 #if FF_API_QUANT_BIAS
588 FF_DISABLE_DEPRECATION_WARNINGS
589     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
590         s->intra_quant_bias = avctx->intra_quant_bias;
591     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
592         s->inter_quant_bias = avctx->inter_quant_bias;
593 FF_ENABLE_DEPRECATION_WARNINGS
594 #endif
595
596     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
597         s->avctx->time_base.den > (1 << 16) - 1) {
598         av_log(avctx, AV_LOG_ERROR,
599                "timebase %d/%d not supported by MPEG 4 standard, "
600                "the maximum admitted value for the timebase denominator "
601                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
602                (1 << 16) - 1);
603         return -1;
604     }
605     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
606
607     switch (avctx->codec->id) {
608     case AV_CODEC_ID_MPEG1VIDEO:
609         s->out_format = FMT_MPEG1;
610         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
611         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
612         break;
613     case AV_CODEC_ID_MPEG2VIDEO:
614         s->out_format = FMT_MPEG1;
615         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
616         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
617         s->rtp_mode   = 1;
618         break;
619     case AV_CODEC_ID_MJPEG:
620         s->out_format = FMT_MJPEG;
621         s->intra_only = 1; /* force intra only for jpeg */
622         if (!CONFIG_MJPEG_ENCODER ||
623             ff_mjpeg_encode_init(s) < 0)
624             return -1;
625         avctx->delay = 0;
626         s->low_delay = 1;
627         break;
628     case AV_CODEC_ID_H261:
629         if (!CONFIG_H261_ENCODER)
630             return -1;
631         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
632             av_log(avctx, AV_LOG_ERROR,
633                    "The specified picture size of %dx%d is not valid for the "
634                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
635                     s->width, s->height);
636             return -1;
637         }
638         s->out_format = FMT_H261;
639         avctx->delay  = 0;
640         s->low_delay  = 1;
641         s->rtp_mode   = 0; /* Sliced encoding not supported */
642         break;
643     case AV_CODEC_ID_H263:
644         if (!CONFIG_H263_ENCODER)
645         return -1;
646         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
647                              s->width, s->height) == 8) {
648             av_log(avctx, AV_LOG_INFO,
649                    "The specified picture size of %dx%d is not valid for "
650                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
651                    "352x288, 704x576, and 1408x1152."
652                    "Try H.263+.\n", s->width, s->height);
653             return -1;
654         }
655         s->out_format = FMT_H263;
656         avctx->delay  = 0;
657         s->low_delay  = 1;
658         break;
659     case AV_CODEC_ID_H263P:
660         s->out_format = FMT_H263;
661         s->h263_plus  = 1;
662         /* Fx */
663         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
664         s->modified_quant  = s->h263_aic;
665         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
666         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
667
668         /* /Fx */
669         /* These are just to be sure */
670         avctx->delay = 0;
671         s->low_delay = 1;
672         break;
673     case AV_CODEC_ID_FLV1:
674         s->out_format      = FMT_H263;
675         s->h263_flv        = 2; /* format = 1; 11-bit codes */
676         s->unrestricted_mv = 1;
677         s->rtp_mode  = 0; /* don't allow GOB */
678         avctx->delay = 0;
679         s->low_delay = 1;
680         break;
681     case AV_CODEC_ID_RV10:
682         s->out_format = FMT_H263;
683         avctx->delay  = 0;
684         s->low_delay  = 1;
685         break;
686     case AV_CODEC_ID_RV20:
687         s->out_format      = FMT_H263;
688         avctx->delay       = 0;
689         s->low_delay       = 1;
690         s->modified_quant  = 1;
691         s->h263_aic        = 1;
692         s->h263_plus       = 1;
693         s->loop_filter     = 1;
694         s->unrestricted_mv = 0;
695         break;
696     case AV_CODEC_ID_MPEG4:
697         s->out_format      = FMT_H263;
698         s->h263_pred       = 1;
699         s->unrestricted_mv = 1;
700         s->low_delay       = s->max_b_frames ? 0 : 1;
701         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
702         break;
703     case AV_CODEC_ID_MSMPEG4V2:
704         s->out_format      = FMT_H263;
705         s->h263_pred       = 1;
706         s->unrestricted_mv = 1;
707         s->msmpeg4_version = 2;
708         avctx->delay       = 0;
709         s->low_delay       = 1;
710         break;
711     case AV_CODEC_ID_MSMPEG4V3:
712         s->out_format        = FMT_H263;
713         s->h263_pred         = 1;
714         s->unrestricted_mv   = 1;
715         s->msmpeg4_version   = 3;
716         s->flipflop_rounding = 1;
717         avctx->delay         = 0;
718         s->low_delay         = 1;
719         break;
720     case AV_CODEC_ID_WMV1:
721         s->out_format        = FMT_H263;
722         s->h263_pred         = 1;
723         s->unrestricted_mv   = 1;
724         s->msmpeg4_version   = 4;
725         s->flipflop_rounding = 1;
726         avctx->delay         = 0;
727         s->low_delay         = 1;
728         break;
729     case AV_CODEC_ID_WMV2:
730         s->out_format        = FMT_H263;
731         s->h263_pred         = 1;
732         s->unrestricted_mv   = 1;
733         s->msmpeg4_version   = 5;
734         s->flipflop_rounding = 1;
735         avctx->delay         = 0;
736         s->low_delay         = 1;
737         break;
738     default:
739         return -1;
740     }
741
742 #if FF_API_PRIVATE_OPT
743     FF_DISABLE_DEPRECATION_WARNINGS
744     if (avctx->noise_reduction)
745         s->noise_reduction = avctx->noise_reduction;
746     FF_ENABLE_DEPRECATION_WARNINGS
747 #endif
748
749     avctx->has_b_frames = !s->low_delay;
750
751     s->encoding = 1;
752
753     s->progressive_frame    =
754     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
755                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
756                                 s->alternate_scan);
757
758     /* init */
759     ff_mpv_idct_init(s);
760     if (ff_mpv_common_init(s) < 0)
761         return -1;
762
763     if (ARCH_X86)
764         ff_mpv_encode_init_x86(s);
765
766     ff_fdctdsp_init(&s->fdsp, avctx);
767     ff_me_cmp_init(&s->mecc, avctx);
768     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
769     ff_pixblockdsp_init(&s->pdsp, avctx);
770     ff_qpeldsp_init(&s->qdsp);
771
772     if (s->msmpeg4_version) {
773         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
774                           2 * 2 * (MAX_LEVEL + 1) *
775                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
776     }
777     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
778
779     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
780     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
781     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
782     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
783     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
784                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
785     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
786                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
787
788
789     if (s->noise_reduction) {
790         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
791                           2 * 64 * sizeof(uint16_t), fail);
792     }
793
794     if (CONFIG_H263_ENCODER)
795         ff_h263dsp_init(&s->h263dsp);
796     if (!s->dct_quantize)
797         s->dct_quantize = ff_dct_quantize_c;
798     if (!s->denoise_dct)
799         s->denoise_dct  = denoise_dct_c;
800     s->fast_dct_quantize = s->dct_quantize;
801     if (avctx->trellis)
802         s->dct_quantize  = dct_quantize_trellis_c;
803
804     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
805         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
806
807     if (s->slice_context_count > 1) {
808         s->rtp_mode = 1;
809
810         if (avctx->codec_id == AV_CODEC_ID_H263 || avctx->codec_id == AV_CODEC_ID_H263P)
811             s->h263_slice_structured = 1;
812     }
813
814     s->quant_precision = 5;
815
816 #if FF_API_PRIVATE_OPT
817 FF_DISABLE_DEPRECATION_WARNINGS
818     if (avctx->frame_skip_threshold)
819         s->frame_skip_threshold = avctx->frame_skip_threshold;
820     if (avctx->frame_skip_factor)
821         s->frame_skip_factor = avctx->frame_skip_factor;
822     if (avctx->frame_skip_exp)
823         s->frame_skip_exp = avctx->frame_skip_exp;
824     if (avctx->frame_skip_cmp != FF_CMP_DCTMAX)
825         s->frame_skip_cmp = avctx->frame_skip_cmp;
826 FF_ENABLE_DEPRECATION_WARNINGS
827 #endif
828
829     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
830     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
831
832     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
833         ff_h261_encode_init(s);
834     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
835         ff_h263_encode_init(s);
836     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
837         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
838             return ret;
839     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
840         && s->out_format == FMT_MPEG1)
841         ff_mpeg1_encode_init(s);
842
843     /* init q matrix */
844     for (i = 0; i < 64; i++) {
845         int j = s->idsp.idct_permutation[i];
846         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
847             s->mpeg_quant) {
848             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
849             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
850         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
851             s->intra_matrix[j] =
852             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
853         } else {
854             /* MPEG-1/2 */
855             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
856             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
857         }
858         if (s->avctx->intra_matrix)
859             s->intra_matrix[j] = s->avctx->intra_matrix[i];
860         if (s->avctx->inter_matrix)
861             s->inter_matrix[j] = s->avctx->inter_matrix[i];
862     }
863
864     /* precompute matrix */
865     /* for mjpeg, we do include qscale in the matrix */
866     if (s->out_format != FMT_MJPEG) {
867         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
868                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
869                           31, 1);
870         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
871                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
872                           31, 0);
873     }
874
875 #if FF_API_RC_STRATEGY
876 FF_DISABLE_DEPRECATION_WARNINGS
877     if (!s->rc_strategy)
878         s->rc_strategy = s->avctx->rc_strategy;
879 FF_ENABLE_DEPRECATION_WARNINGS
880 #endif
881
882     if (ff_rate_control_init(s) < 0)
883         return -1;
884
885     if ((s->avctx->flags & AV_CODEC_FLAG_PASS2) && s->rc_strategy == 1) {
886 #if CONFIG_LIBXVID
887         ret = ff_xvid_rate_control_init(s);
888 #else
889         ret = AVERROR(ENOSYS);
890         av_log(s->avctx, AV_LOG_ERROR,
891                "Xvid ratecontrol requires libavcodec compiled with Xvid support.\n");
892 #endif
893         if (ret < 0)
894             return ret;
895     }
896
897 #if FF_API_ERROR_RATE
898     FF_DISABLE_DEPRECATION_WARNINGS
899     if (avctx->error_rate)
900         s->error_rate = avctx->error_rate;
901     FF_ENABLE_DEPRECATION_WARNINGS;
902 #endif
903
904 #if FF_API_NORMALIZE_AQP
905     FF_DISABLE_DEPRECATION_WARNINGS
906     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
907         s->mpv_flags |= FF_MPV_FLAG_NAQ;
908     FF_ENABLE_DEPRECATION_WARNINGS;
909 #endif
910
911 #if FF_API_MV0
912     FF_DISABLE_DEPRECATION_WARNINGS
913     if (avctx->flags & CODEC_FLAG_MV0)
914         s->mpv_flags |= FF_MPV_FLAG_MV0;
915     FF_ENABLE_DEPRECATION_WARNINGS
916 #endif
917
918 #if FF_API_MPV_OPT
919     FF_DISABLE_DEPRECATION_WARNINGS
920     if (avctx->rc_qsquish != 0.0)
921         s->rc_qsquish = avctx->rc_qsquish;
922     if (avctx->rc_qmod_amp != 0.0)
923         s->rc_qmod_amp = avctx->rc_qmod_amp;
924     if (avctx->rc_qmod_freq)
925         s->rc_qmod_freq = avctx->rc_qmod_freq;
926     if (avctx->rc_buffer_aggressivity != 1.0)
927         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
928     if (avctx->rc_initial_cplx != 0.0)
929         s->rc_initial_cplx = avctx->rc_initial_cplx;
930     if (avctx->lmin)
931         s->lmin = avctx->lmin;
932     if (avctx->lmax)
933         s->lmax = avctx->lmax;
934
935     if (avctx->rc_eq) {
936         av_freep(&s->rc_eq);
937         s->rc_eq = av_strdup(avctx->rc_eq);
938         if (!s->rc_eq)
939             return AVERROR(ENOMEM);
940     }
941     FF_ENABLE_DEPRECATION_WARNINGS
942 #endif
943
944 #if FF_API_PRIVATE_OPT
945     FF_DISABLE_DEPRECATION_WARNINGS
946     if (avctx->brd_scale)
947         s->brd_scale = avctx->brd_scale;
948
949     if (avctx->prediction_method)
950         s->pred = avctx->prediction_method + 1;
951     FF_ENABLE_DEPRECATION_WARNINGS
952 #endif
953
954     if (s->b_frame_strategy == 2) {
955         for (i = 0; i < s->max_b_frames + 2; i++) {
956             s->tmp_frames[i] = av_frame_alloc();
957             if (!s->tmp_frames[i])
958                 return AVERROR(ENOMEM);
959
960             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
961             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
962             s->tmp_frames[i]->height = s->height >> s->brd_scale;
963
964             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
965             if (ret < 0)
966                 return ret;
967         }
968     }
969
970     cpb_props = ff_add_cpb_side_data(avctx);
971     if (!cpb_props)
972         return AVERROR(ENOMEM);
973     cpb_props->max_bitrate = avctx->rc_max_rate;
974     cpb_props->min_bitrate = avctx->rc_min_rate;
975     cpb_props->avg_bitrate = avctx->bit_rate;
976     cpb_props->buffer_size = avctx->rc_buffer_size;
977
978     return 0;
979 fail:
980     ff_mpv_encode_end(avctx);
981     return AVERROR_UNKNOWN;
982 }
983
984 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
985 {
986     MpegEncContext *s = avctx->priv_data;
987     int i;
988
989     ff_rate_control_uninit(s);
990 #if CONFIG_LIBXVID
991     if ((avctx->flags & AV_CODEC_FLAG_PASS2) && s->rc_strategy == 1)
992         ff_xvid_rate_control_uninit(s);
993 #endif
994
995     ff_mpv_common_end(s);
996     if (CONFIG_MJPEG_ENCODER &&
997         s->out_format == FMT_MJPEG)
998         ff_mjpeg_encode_close(s);
999
1000     av_freep(&avctx->extradata);
1001
1002     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1003         av_frame_free(&s->tmp_frames[i]);
1004
1005     ff_free_picture_tables(&s->new_picture);
1006     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1007
1008     av_freep(&s->avctx->stats_out);
1009     av_freep(&s->ac_stats);
1010
1011     av_freep(&s->q_intra_matrix);
1012     av_freep(&s->q_inter_matrix);
1013     av_freep(&s->q_intra_matrix16);
1014     av_freep(&s->q_inter_matrix16);
1015     av_freep(&s->input_picture);
1016     av_freep(&s->reordered_input_picture);
1017     av_freep(&s->dct_offset);
1018
1019     return 0;
1020 }
1021
1022 static int get_sae(uint8_t *src, int ref, int stride)
1023 {
1024     int x,y;
1025     int acc = 0;
1026
1027     for (y = 0; y < 16; y++) {
1028         for (x = 0; x < 16; x++) {
1029             acc += FFABS(src[x + y * stride] - ref);
1030         }
1031     }
1032
1033     return acc;
1034 }
1035
1036 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1037                            uint8_t *ref, int stride)
1038 {
1039     int x, y, w, h;
1040     int acc = 0;
1041
1042     w = s->width  & ~15;
1043     h = s->height & ~15;
1044
1045     for (y = 0; y < h; y += 16) {
1046         for (x = 0; x < w; x += 16) {
1047             int offset = x + y * stride;
1048             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1049                                       stride, 16);
1050             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1051             int sae  = get_sae(src + offset, mean, stride);
1052
1053             acc += sae + 500 < sad;
1054         }
1055     }
1056     return acc;
1057 }
1058
1059 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1060 {
1061     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1062                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1063                             s->mb_stride, s->mb_height, s->b8_stride,
1064                             &s->linesize, &s->uvlinesize);
1065 }
1066
1067 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1068 {
1069     Picture *pic = NULL;
1070     int64_t pts;
1071     int i, display_picture_number = 0, ret;
1072     int encoding_delay = s->max_b_frames ? s->max_b_frames
1073                                          : (s->low_delay ? 0 : 1);
1074     int flush_offset = 1;
1075     int direct = 1;
1076
1077     if (pic_arg) {
1078         pts = pic_arg->pts;
1079         display_picture_number = s->input_picture_number++;
1080
1081         if (pts != AV_NOPTS_VALUE) {
1082             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1083                 int64_t time = pts;
1084                 int64_t last = s->user_specified_pts;
1085
1086                 if (time <= last) {
1087                     av_log(s->avctx, AV_LOG_ERROR,
1088                            "Error, Invalid timestamp=%"PRId64", "
1089                            "last=%"PRId64"\n", pts, s->user_specified_pts);
1090                     return -1;
1091                 }
1092
1093                 if (!s->low_delay && display_picture_number == 1)
1094                     s->dts_delta = time - last;
1095             }
1096             s->user_specified_pts = pts;
1097         } else {
1098             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1099                 s->user_specified_pts =
1100                 pts = s->user_specified_pts + 1;
1101                 av_log(s->avctx, AV_LOG_INFO,
1102                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1103                        pts);
1104             } else {
1105                 pts = display_picture_number;
1106             }
1107         }
1108
1109         if (!pic_arg->buf[0] ||
1110             pic_arg->linesize[0] != s->linesize ||
1111             pic_arg->linesize[1] != s->uvlinesize ||
1112             pic_arg->linesize[2] != s->uvlinesize)
1113             direct = 0;
1114         if ((s->width & 15) || (s->height & 15))
1115             direct = 0;
1116
1117         ff_dlog(s->avctx, "%d %d %td %td\n", pic_arg->linesize[0],
1118                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1119
1120         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1121         if (i < 0)
1122             return i;
1123
1124         pic = &s->picture[i];
1125         pic->reference = 3;
1126
1127         if (direct) {
1128             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1129                 return ret;
1130         }
1131         ret = alloc_picture(s, pic, direct);
1132         if (ret < 0)
1133             return ret;
1134
1135         if (!direct) {
1136             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1137                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1138                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1139                 // empty
1140             } else {
1141                 int h_chroma_shift, v_chroma_shift;
1142                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1143                                                  &h_chroma_shift,
1144                                                  &v_chroma_shift);
1145
1146                 for (i = 0; i < 3; i++) {
1147                     int src_stride = pic_arg->linesize[i];
1148                     int dst_stride = i ? s->uvlinesize : s->linesize;
1149                     int h_shift = i ? h_chroma_shift : 0;
1150                     int v_shift = i ? v_chroma_shift : 0;
1151                     int w = s->width  >> h_shift;
1152                     int h = s->height >> v_shift;
1153                     uint8_t *src = pic_arg->data[i];
1154                     uint8_t *dst = pic->f->data[i];
1155
1156                     if (!s->avctx->rc_buffer_size)
1157                         dst += INPLACE_OFFSET;
1158
1159                     if (src_stride == dst_stride)
1160                         memcpy(dst, src, src_stride * h);
1161                     else {
1162                         int h2 = h;
1163                         uint8_t *dst2 = dst;
1164                         while (h2--) {
1165                             memcpy(dst2, src, w);
1166                             dst2 += dst_stride;
1167                             src += src_stride;
1168                         }
1169                     }
1170                     if ((s->width & 15) || (s->height & 15)) {
1171                         s->mpvencdsp.draw_edges(dst, dst_stride,
1172                                                 w, h,
1173                                                 16 >> h_shift,
1174                                                 16 >> v_shift,
1175                                                 EDGE_BOTTOM);
1176                     }
1177                 }
1178             }
1179         }
1180         ret = av_frame_copy_props(pic->f, pic_arg);
1181         if (ret < 0)
1182             return ret;
1183
1184         pic->f->display_picture_number = display_picture_number;
1185         pic->f->pts = pts; // we set this here to avoid modifying pic_arg
1186     } else {
1187         /* Flushing: When we have not received enough input frames,
1188          * ensure s->input_picture[0] contains the first picture */
1189         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1190             if (s->input_picture[flush_offset])
1191                 break;
1192
1193         if (flush_offset <= 1)
1194             flush_offset = 1;
1195         else
1196             encoding_delay = encoding_delay - flush_offset + 1;
1197     }
1198
1199     /* shift buffer entries */
1200     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1201         s->input_picture[i - flush_offset] = s->input_picture[i];
1202
1203     s->input_picture[encoding_delay] = (Picture*) pic;
1204
1205     return 0;
1206 }
1207
1208 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1209 {
1210     int x, y, plane;
1211     int score = 0;
1212     int64_t score64 = 0;
1213
1214     for (plane = 0; plane < 3; plane++) {
1215         const int stride = p->f->linesize[plane];
1216         const int bw = plane ? 1 : 2;
1217         for (y = 0; y < s->mb_height * bw; y++) {
1218             for (x = 0; x < s->mb_width * bw; x++) {
1219                 int off = p->shared ? 0 : 16;
1220                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1221                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1222                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1223
1224                 switch (s->frame_skip_exp) {
1225                 case 0: score    =  FFMAX(score, v);          break;
1226                 case 1: score   += FFABS(v);                  break;
1227                 case 2: score   += v * v;                     break;
1228                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1229                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1230                 }
1231             }
1232         }
1233     }
1234
1235     if (score)
1236         score64 = score;
1237
1238     if (score64 < s->frame_skip_threshold)
1239         return 1;
1240     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1241         return 1;
1242     return 0;
1243 }
1244
1245 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1246 {
1247     AVPacket pkt = { 0 };
1248     int ret;
1249     int size = 0;
1250
1251     av_init_packet(&pkt);
1252
1253     ret = avcodec_send_frame(c, frame);
1254     if (ret < 0)
1255         return ret;
1256
1257     do {
1258         ret = avcodec_receive_packet(c, &pkt);
1259         if (ret >= 0) {
1260             size += pkt.size;
1261             av_packet_unref(&pkt);
1262         } else if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
1263             return ret;
1264     } while (ret >= 0);
1265
1266     return size;
1267 }
1268
1269 static int estimate_best_b_count(MpegEncContext *s)
1270 {
1271     const AVCodec *codec = avcodec_find_encoder(s->avctx->codec_id);
1272     const int scale = s->brd_scale;
1273     int width  = s->width  >> scale;
1274     int height = s->height >> scale;
1275     int i, j, out_size, p_lambda, b_lambda, lambda2;
1276     int64_t best_rd  = INT64_MAX;
1277     int best_b_count = -1;
1278     int ret = 0;
1279
1280     assert(scale >= 0 && scale <= 3);
1281
1282     //emms_c();
1283     //s->next_picture_ptr->quality;
1284     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1285     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1286     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1287     if (!b_lambda) // FIXME we should do this somewhere else
1288         b_lambda = p_lambda;
1289     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1290                FF_LAMBDA_SHIFT;
1291
1292     for (i = 0; i < s->max_b_frames + 2; i++) {
1293         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1294                                                 s->next_picture_ptr;
1295
1296         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1297             pre_input = *pre_input_ptr;
1298
1299             if (!pre_input.shared && i) {
1300                 pre_input.f->data[0] += INPLACE_OFFSET;
1301                 pre_input.f->data[1] += INPLACE_OFFSET;
1302                 pre_input.f->data[2] += INPLACE_OFFSET;
1303             }
1304
1305             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1306                                        s->tmp_frames[i]->linesize[0],
1307                                        pre_input.f->data[0],
1308                                        pre_input.f->linesize[0],
1309                                        width, height);
1310             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1311                                        s->tmp_frames[i]->linesize[1],
1312                                        pre_input.f->data[1],
1313                                        pre_input.f->linesize[1],
1314                                        width >> 1, height >> 1);
1315             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1316                                        s->tmp_frames[i]->linesize[2],
1317                                        pre_input.f->data[2],
1318                                        pre_input.f->linesize[2],
1319                                        width >> 1, height >> 1);
1320         }
1321     }
1322
1323     for (j = 0; j < s->max_b_frames + 1; j++) {
1324         AVCodecContext *c;
1325         int64_t rd = 0;
1326
1327         if (!s->input_picture[j])
1328             break;
1329
1330         c = avcodec_alloc_context3(NULL);
1331         if (!c)
1332             return AVERROR(ENOMEM);
1333
1334         c->width        = width;
1335         c->height       = height;
1336         c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1337         c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1338         c->mb_decision  = s->avctx->mb_decision;
1339         c->me_cmp       = s->avctx->me_cmp;
1340         c->mb_cmp       = s->avctx->mb_cmp;
1341         c->me_sub_cmp   = s->avctx->me_sub_cmp;
1342         c->pix_fmt      = AV_PIX_FMT_YUV420P;
1343         c->time_base    = s->avctx->time_base;
1344         c->max_b_frames = s->max_b_frames;
1345
1346         ret = avcodec_open2(c, codec, NULL);
1347         if (ret < 0)
1348             goto fail;
1349
1350         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1351         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1352
1353         out_size = encode_frame(c, s->tmp_frames[0]);
1354         if (out_size < 0) {
1355             ret = out_size;
1356             goto fail;
1357         }
1358
1359         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1360
1361         for (i = 0; i < s->max_b_frames + 1; i++) {
1362             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1363
1364             s->tmp_frames[i + 1]->pict_type = is_p ?
1365                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1366             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1367
1368             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1369             if (out_size < 0) {
1370                 ret = out_size;
1371                 goto fail;
1372             }
1373
1374             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1375         }
1376
1377         /* get the delayed frames */
1378         out_size = encode_frame(c, NULL);
1379         if (out_size < 0) {
1380             ret = out_size;
1381             goto fail;
1382         }
1383         rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1384
1385         rd += c->error[0] + c->error[1] + c->error[2];
1386
1387         if (rd < best_rd) {
1388             best_rd = rd;
1389             best_b_count = j;
1390         }
1391
1392 fail:
1393         avcodec_free_context(&c);
1394         if (ret < 0)
1395             return ret;
1396     }
1397
1398     return best_b_count;
1399 }
1400
1401 static int select_input_picture(MpegEncContext *s)
1402 {
1403     int i, ret;
1404
1405     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1406         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1407     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1408
1409     /* set next picture type & ordering */
1410     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1411         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1412             !s->next_picture_ptr || s->intra_only) {
1413             s->reordered_input_picture[0] = s->input_picture[0];
1414             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1415             s->reordered_input_picture[0]->f->coded_picture_number =
1416                 s->coded_picture_number++;
1417         } else {
1418             int b_frames = 0;
1419
1420             if (s->frame_skip_threshold || s->frame_skip_factor) {
1421                 if (s->picture_in_gop_number < s->gop_size &&
1422                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1423                     // FIXME check that the gop check above is +-1 correct
1424                     av_frame_unref(s->input_picture[0]->f);
1425
1426                     emms_c();
1427                     ff_vbv_update(s, 0);
1428
1429                     goto no_output_pic;
1430                 }
1431             }
1432
1433             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1434                 for (i = 0; i < s->max_b_frames + 1; i++) {
1435                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1436
1437                     if (pict_num >= s->rc_context.num_entries)
1438                         break;
1439                     if (!s->input_picture[i]) {
1440                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1441                         break;
1442                     }
1443
1444                     s->input_picture[i]->f->pict_type =
1445                         s->rc_context.entry[pict_num].new_pict_type;
1446                 }
1447             }
1448
1449             if (s->b_frame_strategy == 0) {
1450                 b_frames = s->max_b_frames;
1451                 while (b_frames && !s->input_picture[b_frames])
1452                     b_frames--;
1453             } else if (s->b_frame_strategy == 1) {
1454                 for (i = 1; i < s->max_b_frames + 1; i++) {
1455                     if (s->input_picture[i] &&
1456                         s->input_picture[i]->b_frame_score == 0) {
1457                         s->input_picture[i]->b_frame_score =
1458                             get_intra_count(s,
1459                                             s->input_picture[i    ]->f->data[0],
1460                                             s->input_picture[i - 1]->f->data[0],
1461                                             s->linesize) + 1;
1462                     }
1463                 }
1464                 for (i = 0; i < s->max_b_frames + 1; i++) {
1465                     if (!s->input_picture[i] ||
1466                         s->input_picture[i]->b_frame_score - 1 >
1467                             s->mb_num / s->b_sensitivity)
1468                         break;
1469                 }
1470
1471                 b_frames = FFMAX(0, i - 1);
1472
1473                 /* reset scores */
1474                 for (i = 0; i < b_frames + 1; i++) {
1475                     s->input_picture[i]->b_frame_score = 0;
1476                 }
1477             } else if (s->b_frame_strategy == 2) {
1478                 b_frames = estimate_best_b_count(s);
1479                 if (b_frames < 0)
1480                     return b_frames;
1481             }
1482
1483             emms_c();
1484
1485             for (i = b_frames - 1; i >= 0; i--) {
1486                 int type = s->input_picture[i]->f->pict_type;
1487                 if (type && type != AV_PICTURE_TYPE_B)
1488                     b_frames = i;
1489             }
1490             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1491                 b_frames == s->max_b_frames) {
1492                 av_log(s->avctx, AV_LOG_ERROR,
1493                        "warning, too many B-frames in a row\n");
1494             }
1495
1496             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1497                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1498                     s->gop_size > s->picture_in_gop_number) {
1499                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1500                 } else {
1501                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1502                         b_frames = 0;
1503                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1504                 }
1505             }
1506
1507             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1508                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1509                 b_frames--;
1510
1511             s->reordered_input_picture[0] = s->input_picture[b_frames];
1512             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1513                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1514             s->reordered_input_picture[0]->f->coded_picture_number =
1515                 s->coded_picture_number++;
1516             for (i = 0; i < b_frames; i++) {
1517                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1518                 s->reordered_input_picture[i + 1]->f->pict_type =
1519                     AV_PICTURE_TYPE_B;
1520                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1521                     s->coded_picture_number++;
1522             }
1523         }
1524     }
1525 no_output_pic:
1526     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1527
1528     if (s->reordered_input_picture[0]) {
1529         s->reordered_input_picture[0]->reference =
1530            s->reordered_input_picture[0]->f->pict_type !=
1531                AV_PICTURE_TYPE_B ? 3 : 0;
1532
1533         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1534             return ret;
1535
1536         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1537             // input is a shared pix, so we can't modify it -> allocate a new
1538             // one & ensure that the shared one is reuseable
1539
1540             Picture *pic;
1541             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1542             if (i < 0)
1543                 return i;
1544             pic = &s->picture[i];
1545
1546             pic->reference = s->reordered_input_picture[0]->reference;
1547             if (alloc_picture(s, pic, 0) < 0) {
1548                 return -1;
1549             }
1550
1551             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1552             if (ret < 0)
1553                 return ret;
1554
1555             /* mark us unused / free shared pic */
1556             av_frame_unref(s->reordered_input_picture[0]->f);
1557             s->reordered_input_picture[0]->shared = 0;
1558
1559             s->current_picture_ptr = pic;
1560         } else {
1561             // input is not a shared pix -> reuse buffer for current_pix
1562             s->current_picture_ptr = s->reordered_input_picture[0];
1563             for (i = 0; i < 4; i++) {
1564                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1565             }
1566         }
1567         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1568         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1569                                        s->current_picture_ptr)) < 0)
1570             return ret;
1571
1572         s->picture_number = s->new_picture.f->display_picture_number;
1573     }
1574     return 0;
1575 }
1576
1577 static void frame_end(MpegEncContext *s)
1578 {
1579     int i;
1580
1581     if (s->unrestricted_mv &&
1582         s->current_picture.reference &&
1583         !s->intra_only) {
1584         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1585         int hshift = desc->log2_chroma_w;
1586         int vshift = desc->log2_chroma_h;
1587         s->mpvencdsp.draw_edges(s->current_picture.f->data[0], s->linesize,
1588                                 s->h_edge_pos, s->v_edge_pos,
1589                                 EDGE_WIDTH, EDGE_WIDTH,
1590                                 EDGE_TOP | EDGE_BOTTOM);
1591         s->mpvencdsp.draw_edges(s->current_picture.f->data[1], s->uvlinesize,
1592                                 s->h_edge_pos >> hshift,
1593                                 s->v_edge_pos >> vshift,
1594                                 EDGE_WIDTH >> hshift,
1595                                 EDGE_WIDTH >> vshift,
1596                                 EDGE_TOP | EDGE_BOTTOM);
1597         s->mpvencdsp.draw_edges(s->current_picture.f->data[2], s->uvlinesize,
1598                                 s->h_edge_pos >> hshift,
1599                                 s->v_edge_pos >> vshift,
1600                                 EDGE_WIDTH >> hshift,
1601                                 EDGE_WIDTH >> vshift,
1602                                 EDGE_TOP | EDGE_BOTTOM);
1603     }
1604
1605     emms_c();
1606
1607     s->last_pict_type                 = s->pict_type;
1608     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1609     if (s->pict_type!= AV_PICTURE_TYPE_B)
1610         s->last_non_b_pict_type = s->pict_type;
1611
1612     if (s->encoding) {
1613         /* release non-reference frames */
1614         for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1615             if (!s->picture[i].reference)
1616                 ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1617         }
1618     }
1619
1620 #if FF_API_CODED_FRAME
1621 FF_DISABLE_DEPRECATION_WARNINGS
1622     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1623 FF_ENABLE_DEPRECATION_WARNINGS
1624 #endif
1625 #if FF_API_ERROR_FRAME
1626 FF_DISABLE_DEPRECATION_WARNINGS
1627     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1628            sizeof(s->current_picture.encoding_error));
1629 FF_ENABLE_DEPRECATION_WARNINGS
1630 #endif
1631 }
1632
1633 static void update_noise_reduction(MpegEncContext *s)
1634 {
1635     int intra, i;
1636
1637     for (intra = 0; intra < 2; intra++) {
1638         if (s->dct_count[intra] > (1 << 16)) {
1639             for (i = 0; i < 64; i++) {
1640                 s->dct_error_sum[intra][i] >>= 1;
1641             }
1642             s->dct_count[intra] >>= 1;
1643         }
1644
1645         for (i = 0; i < 64; i++) {
1646             s->dct_offset[intra][i] = (s->noise_reduction *
1647                                        s->dct_count[intra] +
1648                                        s->dct_error_sum[intra][i] / 2) /
1649                                       (s->dct_error_sum[intra][i] + 1);
1650         }
1651     }
1652 }
1653
1654 static int frame_start(MpegEncContext *s)
1655 {
1656     int ret;
1657
1658     /* mark & release old frames */
1659     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1660         s->last_picture_ptr != s->next_picture_ptr &&
1661         s->last_picture_ptr->f->buf[0]) {
1662         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1663     }
1664
1665     s->current_picture_ptr->f->pict_type = s->pict_type;
1666     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1667
1668     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1669     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1670                                    s->current_picture_ptr)) < 0)
1671         return ret;
1672
1673     if (s->pict_type != AV_PICTURE_TYPE_B) {
1674         s->last_picture_ptr = s->next_picture_ptr;
1675         if (!s->droppable)
1676             s->next_picture_ptr = s->current_picture_ptr;
1677     }
1678
1679     if (s->last_picture_ptr) {
1680         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1681         if (s->last_picture_ptr->f->buf[0] &&
1682             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1683                                        s->last_picture_ptr)) < 0)
1684             return ret;
1685     }
1686     if (s->next_picture_ptr) {
1687         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1688         if (s->next_picture_ptr->f->buf[0] &&
1689             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1690                                        s->next_picture_ptr)) < 0)
1691             return ret;
1692     }
1693
1694     if (s->picture_structure!= PICT_FRAME) {
1695         int i;
1696         for (i = 0; i < 4; i++) {
1697             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1698                 s->current_picture.f->data[i] +=
1699                     s->current_picture.f->linesize[i];
1700             }
1701             s->current_picture.f->linesize[i] *= 2;
1702             s->last_picture.f->linesize[i]    *= 2;
1703             s->next_picture.f->linesize[i]    *= 2;
1704         }
1705     }
1706
1707     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1708         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1709         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1710     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1711         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1712         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1713     } else {
1714         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1715         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1716     }
1717
1718     if (s->dct_error_sum) {
1719         assert(s->noise_reduction && s->encoding);
1720         update_noise_reduction(s);
1721     }
1722
1723     return 0;
1724 }
1725
1726 static void write_pass1_stats(MpegEncContext *s)
1727 {
1728     snprintf(s->avctx->stats_out, 256,
1729              "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d "
1730              "fcode:%d bcode:%d mc-var:%d var:%d icount:%d skipcount:%d "
1731              "hbits:%d;\n",
1732              s->current_picture_ptr->f->display_picture_number,
1733              s->current_picture_ptr->f->coded_picture_number,
1734              s->pict_type,
1735              s->current_picture.f->quality,
1736              s->i_tex_bits,
1737              s->p_tex_bits,
1738              s->mv_bits,
1739              s->misc_bits,
1740              s->f_code,
1741              s->b_code,
1742              s->current_picture.mc_mb_var_sum,
1743              s->current_picture.mb_var_sum,
1744              s->i_count, s->skip_count,
1745              s->header_bits);
1746 }
1747
1748 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1749                           const AVFrame *pic_arg, int *got_packet)
1750 {
1751     MpegEncContext *s = avctx->priv_data;
1752     int i, stuffing_count, ret;
1753     int context_count = s->slice_context_count;
1754
1755     s->picture_in_gop_number++;
1756
1757     if (load_input_picture(s, pic_arg) < 0)
1758         return -1;
1759
1760     if (select_input_picture(s) < 0) {
1761         return -1;
1762     }
1763
1764     /* output? */
1765     if (s->new_picture.f->data[0]) {
1766         uint8_t *sd;
1767         if (!pkt->data &&
1768             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1769             return ret;
1770         if (s->mb_info) {
1771             s->mb_info_ptr = av_packet_new_side_data(pkt,
1772                                  AV_PKT_DATA_H263_MB_INFO,
1773                                  s->mb_width*s->mb_height*12);
1774             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1775         }
1776
1777         for (i = 0; i < context_count; i++) {
1778             int start_y = s->thread_context[i]->start_mb_y;
1779             int   end_y = s->thread_context[i]->  end_mb_y;
1780             int h       = s->mb_height;
1781             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1782             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1783
1784             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1785         }
1786
1787         s->pict_type = s->new_picture.f->pict_type;
1788         //emms_c();
1789         ret = frame_start(s);
1790         if (ret < 0)
1791             return ret;
1792 vbv_retry:
1793         if (encode_picture(s, s->picture_number) < 0)
1794             return -1;
1795
1796 #if FF_API_STAT_BITS
1797 FF_DISABLE_DEPRECATION_WARNINGS
1798         avctx->header_bits = s->header_bits;
1799         avctx->mv_bits     = s->mv_bits;
1800         avctx->misc_bits   = s->misc_bits;
1801         avctx->i_tex_bits  = s->i_tex_bits;
1802         avctx->p_tex_bits  = s->p_tex_bits;
1803         avctx->i_count     = s->i_count;
1804         // FIXME f/b_count in avctx
1805         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1806         avctx->skip_count  = s->skip_count;
1807 FF_ENABLE_DEPRECATION_WARNINGS
1808 #endif
1809
1810         frame_end(s);
1811
1812         sd = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_FACTOR,
1813                                      sizeof(int));
1814         if (!sd)
1815             return AVERROR(ENOMEM);
1816         *(int *)sd = s->current_picture.f->quality;
1817
1818         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1819             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1820
1821         if (avctx->rc_buffer_size) {
1822             RateControlContext *rcc = &s->rc_context;
1823             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1824
1825             if (put_bits_count(&s->pb) > max_size &&
1826                 s->lambda < s->lmax) {
1827                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1828                                        (s->qscale + 1) / s->qscale);
1829                 if (s->adaptive_quant) {
1830                     int i;
1831                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1832                         s->lambda_table[i] =
1833                             FFMAX(s->lambda_table[i] + 1,
1834                                   s->lambda_table[i] * (s->qscale + 1) /
1835                                   s->qscale);
1836                 }
1837                 s->mb_skipped = 0;        // done in frame_start()
1838                 // done in encode_picture() so we must undo it
1839                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1840                     if (s->flipflop_rounding          ||
1841                         s->codec_id == AV_CODEC_ID_H263P ||
1842                         s->codec_id == AV_CODEC_ID_MPEG4)
1843                         s->no_rounding ^= 1;
1844                 }
1845                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1846                     s->time_base       = s->last_time_base;
1847                     s->last_non_b_time = s->time - s->pp_time;
1848                 }
1849                 for (i = 0; i < context_count; i++) {
1850                     PutBitContext *pb = &s->thread_context[i]->pb;
1851                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1852                 }
1853                 goto vbv_retry;
1854             }
1855
1856             assert(s->avctx->rc_max_rate);
1857         }
1858
1859         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1860             write_pass1_stats(s);
1861
1862         for (i = 0; i < 4; i++) {
1863             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1864             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1865         }
1866
1867         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1868             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1869                                              s->misc_bits + s->i_tex_bits +
1870                                              s->p_tex_bits);
1871         flush_put_bits(&s->pb);
1872         s->frame_bits  = put_bits_count(&s->pb);
1873
1874         stuffing_count = ff_vbv_update(s, s->frame_bits);
1875         if (stuffing_count) {
1876             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1877                     stuffing_count + 50) {
1878                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1879                 return -1;
1880             }
1881
1882             switch (s->codec_id) {
1883             case AV_CODEC_ID_MPEG1VIDEO:
1884             case AV_CODEC_ID_MPEG2VIDEO:
1885                 while (stuffing_count--) {
1886                     put_bits(&s->pb, 8, 0);
1887                 }
1888             break;
1889             case AV_CODEC_ID_MPEG4:
1890                 put_bits(&s->pb, 16, 0);
1891                 put_bits(&s->pb, 16, 0x1C3);
1892                 stuffing_count -= 4;
1893                 while (stuffing_count--) {
1894                     put_bits(&s->pb, 8, 0xFF);
1895                 }
1896             break;
1897             default:
1898                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1899             }
1900             flush_put_bits(&s->pb);
1901             s->frame_bits  = put_bits_count(&s->pb);
1902         }
1903
1904         /* update MPEG-1/2 vbv_delay for CBR */
1905         if (s->avctx->rc_max_rate                          &&
1906             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1907             s->out_format == FMT_MPEG1                     &&
1908             90000LL * (avctx->rc_buffer_size - 1) <=
1909                 s->avctx->rc_max_rate * 0xFFFFLL) {
1910             AVCPBProperties *props;
1911             size_t props_size;
1912
1913             int vbv_delay, min_delay;
1914             double inbits  = s->avctx->rc_max_rate *
1915                              av_q2d(s->avctx->time_base);
1916             int    minbits = s->frame_bits - 8 *
1917                              (s->vbv_delay_ptr - s->pb.buf - 1);
1918             double bits    = s->rc_context.buffer_index + minbits - inbits;
1919
1920             if (bits < 0)
1921                 av_log(s->avctx, AV_LOG_ERROR,
1922                        "Internal error, negative bits\n");
1923
1924             assert(s->repeat_first_field == 0);
1925
1926             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1927             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1928                         s->avctx->rc_max_rate;
1929
1930             vbv_delay = FFMAX(vbv_delay, min_delay);
1931
1932             assert(vbv_delay < 0xFFFF);
1933
1934             s->vbv_delay_ptr[0] &= 0xF8;
1935             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1936             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1937             s->vbv_delay_ptr[2] &= 0x07;
1938             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1939
1940             props = av_cpb_properties_alloc(&props_size);
1941             if (!props)
1942                 return AVERROR(ENOMEM);
1943             props->vbv_delay = vbv_delay * 300;
1944
1945             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1946                                           (uint8_t*)props, props_size);
1947             if (ret < 0) {
1948                 av_freep(&props);
1949                 return ret;
1950             }
1951
1952 #if FF_API_VBV_DELAY
1953 FF_DISABLE_DEPRECATION_WARNINGS
1954             avctx->vbv_delay     = vbv_delay * 300;
1955 FF_ENABLE_DEPRECATION_WARNINGS
1956 #endif
1957         }
1958         s->total_bits     += s->frame_bits;
1959 #if FF_API_STAT_BITS
1960 FF_DISABLE_DEPRECATION_WARNINGS
1961         avctx->frame_bits  = s->frame_bits;
1962 FF_ENABLE_DEPRECATION_WARNINGS
1963 #endif
1964
1965
1966         pkt->pts = s->current_picture.f->pts;
1967         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1968             if (!s->current_picture.f->coded_picture_number)
1969                 pkt->dts = pkt->pts - s->dts_delta;
1970             else
1971                 pkt->dts = s->reordered_pts;
1972             s->reordered_pts = pkt->pts;
1973         } else
1974             pkt->dts = pkt->pts;
1975         if (s->current_picture.f->key_frame)
1976             pkt->flags |= AV_PKT_FLAG_KEY;
1977         if (s->mb_info)
1978             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1979     } else {
1980         s->frame_bits = 0;
1981     }
1982     assert((s->frame_bits & 7) == 0);
1983
1984     pkt->size = s->frame_bits / 8;
1985     *got_packet = !!pkt->size;
1986     return 0;
1987 }
1988
1989 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1990                                                 int n, int threshold)
1991 {
1992     static const char tab[64] = {
1993         3, 2, 2, 1, 1, 1, 1, 1,
1994         1, 1, 1, 1, 1, 1, 1, 1,
1995         1, 1, 1, 1, 1, 1, 1, 1,
1996         0, 0, 0, 0, 0, 0, 0, 0,
1997         0, 0, 0, 0, 0, 0, 0, 0,
1998         0, 0, 0, 0, 0, 0, 0, 0,
1999         0, 0, 0, 0, 0, 0, 0, 0,
2000         0, 0, 0, 0, 0, 0, 0, 0
2001     };
2002     int score = 0;
2003     int run = 0;
2004     int i;
2005     int16_t *block = s->block[n];
2006     const int last_index = s->block_last_index[n];
2007     int skip_dc;
2008
2009     if (threshold < 0) {
2010         skip_dc = 0;
2011         threshold = -threshold;
2012     } else
2013         skip_dc = 1;
2014
2015     /* Are all we could set to zero already zero? */
2016     if (last_index <= skip_dc - 1)
2017         return;
2018
2019     for (i = 0; i <= last_index; i++) {
2020         const int j = s->intra_scantable.permutated[i];
2021         const int level = FFABS(block[j]);
2022         if (level == 1) {
2023             if (skip_dc && i == 0)
2024                 continue;
2025             score += tab[run];
2026             run = 0;
2027         } else if (level > 1) {
2028             return;
2029         } else {
2030             run++;
2031         }
2032     }
2033     if (score >= threshold)
2034         return;
2035     for (i = skip_dc; i <= last_index; i++) {
2036         const int j = s->intra_scantable.permutated[i];
2037         block[j] = 0;
2038     }
2039     if (block[0])
2040         s->block_last_index[n] = 0;
2041     else
2042         s->block_last_index[n] = -1;
2043 }
2044
2045 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2046                                int last_index)
2047 {
2048     int i;
2049     const int maxlevel = s->max_qcoeff;
2050     const int minlevel = s->min_qcoeff;
2051     int overflow = 0;
2052
2053     if (s->mb_intra) {
2054         i = 1; // skip clipping of intra dc
2055     } else
2056         i = 0;
2057
2058     for (; i <= last_index; i++) {
2059         const int j = s->intra_scantable.permutated[i];
2060         int level = block[j];
2061
2062         if (level > maxlevel) {
2063             level = maxlevel;
2064             overflow++;
2065         } else if (level < minlevel) {
2066             level = minlevel;
2067             overflow++;
2068         }
2069
2070         block[j] = level;
2071     }
2072
2073     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2074         av_log(s->avctx, AV_LOG_INFO,
2075                "warning, clipping %d dct coefficients to %d..%d\n",
2076                overflow, minlevel, maxlevel);
2077 }
2078
2079 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2080 {
2081     int x, y;
2082     // FIXME optimize
2083     for (y = 0; y < 8; y++) {
2084         for (x = 0; x < 8; x++) {
2085             int x2, y2;
2086             int sum = 0;
2087             int sqr = 0;
2088             int count = 0;
2089
2090             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2091                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2092                     int v = ptr[x2 + y2 * stride];
2093                     sum += v;
2094                     sqr += v * v;
2095                     count++;
2096                 }
2097             }
2098             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2099         }
2100     }
2101 }
2102
2103 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2104                                                 int motion_x, int motion_y,
2105                                                 int mb_block_height,
2106                                                 int mb_block_count)
2107 {
2108     int16_t weight[8][64];
2109     int16_t orig[8][64];
2110     const int mb_x = s->mb_x;
2111     const int mb_y = s->mb_y;
2112     int i;
2113     int skip_dct[8];
2114     int dct_offset = s->linesize * 8; // default for progressive frames
2115     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2116     ptrdiff_t wrap_y, wrap_c;
2117
2118     for (i = 0; i < mb_block_count; i++)
2119         skip_dct[i] = s->skipdct;
2120
2121     if (s->adaptive_quant) {
2122         const int last_qp = s->qscale;
2123         const int mb_xy = mb_x + mb_y * s->mb_stride;
2124
2125         s->lambda = s->lambda_table[mb_xy];
2126         update_qscale(s);
2127
2128         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2129             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2130             s->dquant = s->qscale - last_qp;
2131
2132             if (s->out_format == FMT_H263) {
2133                 s->dquant = av_clip(s->dquant, -2, 2);
2134
2135                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2136                     if (!s->mb_intra) {
2137                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2138                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2139                                 s->dquant = 0;
2140                         }
2141                         if (s->mv_type == MV_TYPE_8X8)
2142                             s->dquant = 0;
2143                     }
2144                 }
2145             }
2146         }
2147         ff_set_qscale(s, last_qp + s->dquant);
2148     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2149         ff_set_qscale(s, s->qscale + s->dquant);
2150
2151     wrap_y = s->linesize;
2152     wrap_c = s->uvlinesize;
2153     ptr_y  = s->new_picture.f->data[0] +
2154              (mb_y * 16 * wrap_y)              + mb_x * 16;
2155     ptr_cb = s->new_picture.f->data[1] +
2156              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2157     ptr_cr = s->new_picture.f->data[2] +
2158              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
2159
2160     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
2161         uint8_t *ebuf = s->sc.edge_emu_buffer + 32;
2162         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2163                                  wrap_y, wrap_y,
2164                                  16, 16, mb_x * 16, mb_y * 16,
2165                                  s->width, s->height);
2166         ptr_y = ebuf;
2167         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb,
2168                                  wrap_c, wrap_c,
2169                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2170                                  s->width >> 1, s->height >> 1);
2171         ptr_cb = ebuf + 18 * wrap_y;
2172         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr,
2173                                  wrap_c, wrap_c,
2174                                  8, mb_block_height, mb_x * 8, mb_y * 8,
2175                                  s->width >> 1, s->height >> 1);
2176         ptr_cr = ebuf + 18 * wrap_y + 8;
2177     }
2178
2179     if (s->mb_intra) {
2180         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2181             int progressive_score, interlaced_score;
2182
2183             s->interlaced_dct = 0;
2184             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2185                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2186                                                      NULL, wrap_y, 8) - 400;
2187
2188             if (progressive_score > 0) {
2189                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2190                                                         NULL, wrap_y * 2, 8) +
2191                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2192                                                         NULL, wrap_y * 2, 8);
2193                 if (progressive_score > interlaced_score) {
2194                     s->interlaced_dct = 1;
2195
2196                     dct_offset = wrap_y;
2197                     wrap_y <<= 1;
2198                     if (s->chroma_format == CHROMA_422)
2199                         wrap_c <<= 1;
2200                 }
2201             }
2202         }
2203
2204         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2205         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2206         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2207         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2208
2209         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2210             skip_dct[4] = 1;
2211             skip_dct[5] = 1;
2212         } else {
2213             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2214             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2215             if (!s->chroma_y_shift) { /* 422 */
2216                 s->pdsp.get_pixels(s->block[6],
2217                                    ptr_cb + (dct_offset >> 1), wrap_c);
2218                 s->pdsp.get_pixels(s->block[7],
2219                                    ptr_cr + (dct_offset >> 1), wrap_c);
2220             }
2221         }
2222     } else {
2223         op_pixels_func (*op_pix)[4];
2224         qpel_mc_func (*op_qpix)[16];
2225         uint8_t *dest_y, *dest_cb, *dest_cr;
2226
2227         dest_y  = s->dest[0];
2228         dest_cb = s->dest[1];
2229         dest_cr = s->dest[2];
2230
2231         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2232             op_pix  = s->hdsp.put_pixels_tab;
2233             op_qpix = s->qdsp.put_qpel_pixels_tab;
2234         } else {
2235             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2236             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2237         }
2238
2239         if (s->mv_dir & MV_DIR_FORWARD) {
2240             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2241                           s->last_picture.f->data,
2242                           op_pix, op_qpix);
2243             op_pix  = s->hdsp.avg_pixels_tab;
2244             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2245         }
2246         if (s->mv_dir & MV_DIR_BACKWARD) {
2247             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2248                           s->next_picture.f->data,
2249                           op_pix, op_qpix);
2250         }
2251
2252         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2253             int progressive_score, interlaced_score;
2254
2255             s->interlaced_dct = 0;
2256             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2257                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2258                                                      ptr_y + wrap_y * 8,
2259                                                      wrap_y, 8) - 400;
2260
2261             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2262                 progressive_score -= 400;
2263
2264             if (progressive_score > 0) {
2265                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2266                                                         wrap_y * 2, 8) +
2267                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2268                                                         ptr_y + wrap_y,
2269                                                         wrap_y * 2, 8);
2270
2271                 if (progressive_score > interlaced_score) {
2272                     s->interlaced_dct = 1;
2273
2274                     dct_offset = wrap_y;
2275                     wrap_y <<= 1;
2276                     if (s->chroma_format == CHROMA_422)
2277                         wrap_c <<= 1;
2278                 }
2279             }
2280         }
2281
2282         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2283         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2284         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2285                             dest_y + dct_offset, wrap_y);
2286         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2287                             dest_y + dct_offset + 8, wrap_y);
2288
2289         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2290             skip_dct[4] = 1;
2291             skip_dct[5] = 1;
2292         } else {
2293             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2294             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2295             if (!s->chroma_y_shift) { /* 422 */
2296                 s->pdsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
2297                                     dest_cb + (dct_offset >> 1), wrap_c);
2298                 s->pdsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
2299                                     dest_cr + (dct_offset >> 1), wrap_c);
2300             }
2301         }
2302         /* pre quantization */
2303         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2304                 2 * s->qscale * s->qscale) {
2305             // FIXME optimize
2306             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2307                 skip_dct[0] = 1;
2308             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2309                 skip_dct[1] = 1;
2310             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2311                                wrap_y, 8) < 20 * s->qscale)
2312                 skip_dct[2] = 1;
2313             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2314                                wrap_y, 8) < 20 * s->qscale)
2315                 skip_dct[3] = 1;
2316             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2317                 skip_dct[4] = 1;
2318             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2319                 skip_dct[5] = 1;
2320             if (!s->chroma_y_shift) { /* 422 */
2321                 if (s->mecc.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2322                                    dest_cb + (dct_offset >> 1),
2323                                    wrap_c, 8) < 20 * s->qscale)
2324                     skip_dct[6] = 1;
2325                 if (s->mecc.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2326                                    dest_cr + (dct_offset >> 1),
2327                                    wrap_c, 8) < 20 * s->qscale)
2328                     skip_dct[7] = 1;
2329             }
2330         }
2331     }
2332
2333     if (s->quantizer_noise_shaping) {
2334         if (!skip_dct[0])
2335             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2336         if (!skip_dct[1])
2337             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2338         if (!skip_dct[2])
2339             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2340         if (!skip_dct[3])
2341             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2342         if (!skip_dct[4])
2343             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2344         if (!skip_dct[5])
2345             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2346         if (!s->chroma_y_shift) { /* 422 */
2347             if (!skip_dct[6])
2348                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2349                                   wrap_c);
2350             if (!skip_dct[7])
2351                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2352                                   wrap_c);
2353         }
2354         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2355     }
2356
2357     /* DCT & quantize */
2358     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
2359     {
2360         for (i = 0; i < mb_block_count; i++) {
2361             if (!skip_dct[i]) {
2362                 int overflow;
2363                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2364                 // FIXME we could decide to change to quantizer instead of
2365                 // clipping
2366                 // JS: I don't think that would be a good idea it could lower
2367                 //     quality instead of improve it. Just INTRADC clipping
2368                 //     deserves changes in quantizer
2369                 if (overflow)
2370                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2371             } else
2372                 s->block_last_index[i] = -1;
2373         }
2374         if (s->quantizer_noise_shaping) {
2375             for (i = 0; i < mb_block_count; i++) {
2376                 if (!skip_dct[i]) {
2377                     s->block_last_index[i] =
2378                         dct_quantize_refine(s, s->block[i], weight[i],
2379                                             orig[i], i, s->qscale);
2380                 }
2381             }
2382         }
2383
2384         if (s->luma_elim_threshold && !s->mb_intra)
2385             for (i = 0; i < 4; i++)
2386                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2387         if (s->chroma_elim_threshold && !s->mb_intra)
2388             for (i = 4; i < mb_block_count; i++)
2389                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2390
2391         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2392             for (i = 0; i < mb_block_count; i++) {
2393                 if (s->block_last_index[i] == -1)
2394                     s->coded_score[i] = INT_MAX / 256;
2395             }
2396         }
2397     }
2398
2399     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2400         s->block_last_index[4] =
2401         s->block_last_index[5] = 0;
2402         s->block[4][0] =
2403         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2404     }
2405
2406     // non c quantize code returns incorrect block_last_index FIXME
2407     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2408         for (i = 0; i < mb_block_count; i++) {
2409             int j;
2410             if (s->block_last_index[i] > 0) {
2411                 for (j = 63; j > 0; j--) {
2412                     if (s->block[i][s->intra_scantable.permutated[j]])
2413                         break;
2414                 }
2415                 s->block_last_index[i] = j;
2416             }
2417         }
2418     }
2419
2420     /* huffman encode */
2421     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2422     case AV_CODEC_ID_MPEG1VIDEO:
2423     case AV_CODEC_ID_MPEG2VIDEO:
2424         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2425             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2426         break;
2427     case AV_CODEC_ID_MPEG4:
2428         if (CONFIG_MPEG4_ENCODER)
2429             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2430         break;
2431     case AV_CODEC_ID_MSMPEG4V2:
2432     case AV_CODEC_ID_MSMPEG4V3:
2433     case AV_CODEC_ID_WMV1:
2434         if (CONFIG_MSMPEG4_ENCODER)
2435             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2436         break;
2437     case AV_CODEC_ID_WMV2:
2438         if (CONFIG_WMV2_ENCODER)
2439             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2440         break;
2441     case AV_CODEC_ID_H261:
2442         if (CONFIG_H261_ENCODER)
2443             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2444         break;
2445     case AV_CODEC_ID_H263:
2446     case AV_CODEC_ID_H263P:
2447     case AV_CODEC_ID_FLV1:
2448     case AV_CODEC_ID_RV10:
2449     case AV_CODEC_ID_RV20:
2450         if (CONFIG_H263_ENCODER)
2451             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2452         break;
2453     case AV_CODEC_ID_MJPEG:
2454         if (CONFIG_MJPEG_ENCODER)
2455             ff_mjpeg_encode_mb(s, s->block);
2456         break;
2457     default:
2458         assert(0);
2459     }
2460 }
2461
2462 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2463 {
2464     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2465     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2466 }
2467
2468 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2469     int i;
2470
2471     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2472
2473     /* MPEG-1 */
2474     d->mb_skip_run= s->mb_skip_run;
2475     for(i=0; i<3; i++)
2476         d->last_dc[i] = s->last_dc[i];
2477
2478     /* statistics */
2479     d->mv_bits= s->mv_bits;
2480     d->i_tex_bits= s->i_tex_bits;
2481     d->p_tex_bits= s->p_tex_bits;
2482     d->i_count= s->i_count;
2483     d->f_count= s->f_count;
2484     d->b_count= s->b_count;
2485     d->skip_count= s->skip_count;
2486     d->misc_bits= s->misc_bits;
2487     d->last_bits= 0;
2488
2489     d->mb_skipped= 0;
2490     d->qscale= s->qscale;
2491     d->dquant= s->dquant;
2492
2493     d->esc3_level_length= s->esc3_level_length;
2494 }
2495
2496 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2497     int i;
2498
2499     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2500     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2501
2502     /* MPEG-1 */
2503     d->mb_skip_run= s->mb_skip_run;
2504     for(i=0; i<3; i++)
2505         d->last_dc[i] = s->last_dc[i];
2506
2507     /* statistics */
2508     d->mv_bits= s->mv_bits;
2509     d->i_tex_bits= s->i_tex_bits;
2510     d->p_tex_bits= s->p_tex_bits;
2511     d->i_count= s->i_count;
2512     d->f_count= s->f_count;
2513     d->b_count= s->b_count;
2514     d->skip_count= s->skip_count;
2515     d->misc_bits= s->misc_bits;
2516
2517     d->mb_intra= s->mb_intra;
2518     d->mb_skipped= s->mb_skipped;
2519     d->mv_type= s->mv_type;
2520     d->mv_dir= s->mv_dir;
2521     d->pb= s->pb;
2522     if(s->data_partitioning){
2523         d->pb2= s->pb2;
2524         d->tex_pb= s->tex_pb;
2525     }
2526     d->block= s->block;
2527     for(i=0; i<8; i++)
2528         d->block_last_index[i]= s->block_last_index[i];
2529     d->interlaced_dct= s->interlaced_dct;
2530     d->qscale= s->qscale;
2531
2532     d->esc3_level_length= s->esc3_level_length;
2533 }
2534
2535 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2536                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2537                            int *dmin, int *next_block, int motion_x, int motion_y)
2538 {
2539     int score;
2540     uint8_t *dest_backup[3];
2541
2542     copy_context_before_encode(s, backup, type);
2543
2544     s->block= s->blocks[*next_block];
2545     s->pb= pb[*next_block];
2546     if(s->data_partitioning){
2547         s->pb2   = pb2   [*next_block];
2548         s->tex_pb= tex_pb[*next_block];
2549     }
2550
2551     if(*next_block){
2552         memcpy(dest_backup, s->dest, sizeof(s->dest));
2553         s->dest[0] = s->sc.rd_scratchpad;
2554         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2555         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2556         assert(s->linesize >= 32); //FIXME
2557     }
2558
2559     encode_mb(s, motion_x, motion_y);
2560
2561     score= put_bits_count(&s->pb);
2562     if(s->data_partitioning){
2563         score+= put_bits_count(&s->pb2);
2564         score+= put_bits_count(&s->tex_pb);
2565     }
2566
2567     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2568         ff_mpv_decode_mb(s, s->block);
2569
2570         score *= s->lambda2;
2571         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2572     }
2573
2574     if(*next_block){
2575         memcpy(s->dest, dest_backup, sizeof(s->dest));
2576     }
2577
2578     if(score<*dmin){
2579         *dmin= score;
2580         *next_block^=1;
2581
2582         copy_context_after_encode(best, s, type);
2583     }
2584 }
2585
2586 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2587     uint32_t *sq = ff_square_tab + 256;
2588     int acc=0;
2589     int x,y;
2590
2591     if(w==16 && h==16)
2592         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2593     else if(w==8 && h==8)
2594         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2595
2596     for(y=0; y<h; y++){
2597         for(x=0; x<w; x++){
2598             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2599         }
2600     }
2601
2602     assert(acc>=0);
2603
2604     return acc;
2605 }
2606
2607 static int sse_mb(MpegEncContext *s){
2608     int w= 16;
2609     int h= 16;
2610
2611     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2612     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2613
2614     if(w==16 && h==16)
2615       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2616         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2617                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2618                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2619       }else{
2620         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2621                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2622                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2623       }
2624     else
2625         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2626                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2627                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2628 }
2629
2630 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2631     MpegEncContext *s= *(void**)arg;
2632
2633
2634     s->me.pre_pass=1;
2635     s->me.dia_size= s->avctx->pre_dia_size;
2636     s->first_slice_line=1;
2637     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2638         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2639             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2640         }
2641         s->first_slice_line=0;
2642     }
2643
2644     s->me.pre_pass=0;
2645
2646     return 0;
2647 }
2648
2649 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2650     MpegEncContext *s= *(void**)arg;
2651
2652     s->me.dia_size= s->avctx->dia_size;
2653     s->first_slice_line=1;
2654     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2655         s->mb_x=0; //for block init below
2656         ff_init_block_index(s);
2657         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2658             s->block_index[0]+=2;
2659             s->block_index[1]+=2;
2660             s->block_index[2]+=2;
2661             s->block_index[3]+=2;
2662
2663             /* compute motion vector & mb_type and store in context */
2664             if(s->pict_type==AV_PICTURE_TYPE_B)
2665                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2666             else
2667                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2668         }
2669         s->first_slice_line=0;
2670     }
2671     return 0;
2672 }
2673
2674 static int mb_var_thread(AVCodecContext *c, void *arg){
2675     MpegEncContext *s= *(void**)arg;
2676     int mb_x, mb_y;
2677
2678     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2679         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2680             int xx = mb_x * 16;
2681             int yy = mb_y * 16;
2682             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2683             int varc;
2684             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2685
2686             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2687                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2688
2689             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2690             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2691             s->me.mb_var_sum_temp    += varc;
2692         }
2693     }
2694     return 0;
2695 }
2696
2697 static void write_slice_end(MpegEncContext *s){
2698     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2699         if(s->partitioned_frame){
2700             ff_mpeg4_merge_partitions(s);
2701         }
2702
2703         ff_mpeg4_stuffing(&s->pb);
2704     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2705         ff_mjpeg_encode_stuffing(&s->pb);
2706     }
2707
2708     avpriv_align_put_bits(&s->pb);
2709     flush_put_bits(&s->pb);
2710
2711     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2712         s->misc_bits+= get_bits_diff(s);
2713 }
2714
2715 static void write_mb_info(MpegEncContext *s)
2716 {
2717     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2718     int offset = put_bits_count(&s->pb);
2719     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2720     int gobn = s->mb_y / s->gob_index;
2721     int pred_x, pred_y;
2722     if (CONFIG_H263_ENCODER)
2723         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2724     bytestream_put_le32(&ptr, offset);
2725     bytestream_put_byte(&ptr, s->qscale);
2726     bytestream_put_byte(&ptr, gobn);
2727     bytestream_put_le16(&ptr, mba);
2728     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2729     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2730     /* 4MV not implemented */
2731     bytestream_put_byte(&ptr, 0); /* hmv2 */
2732     bytestream_put_byte(&ptr, 0); /* vmv2 */
2733 }
2734
2735 static void update_mb_info(MpegEncContext *s, int startcode)
2736 {
2737     if (!s->mb_info)
2738         return;
2739     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2740         s->mb_info_size += 12;
2741         s->prev_mb_info = s->last_mb_info;
2742     }
2743     if (startcode) {
2744         s->prev_mb_info = put_bits_count(&s->pb)/8;
2745         /* This might have incremented mb_info_size above, and we return without
2746          * actually writing any info into that slot yet. But in that case,
2747          * this will be called again at the start of the after writing the
2748          * start code, actually writing the mb info. */
2749         return;
2750     }
2751
2752     s->last_mb_info = put_bits_count(&s->pb)/8;
2753     if (!s->mb_info_size)
2754         s->mb_info_size += 12;
2755     write_mb_info(s);
2756 }
2757
2758 static int encode_thread(AVCodecContext *c, void *arg){
2759     MpegEncContext *s= *(void**)arg;
2760     int mb_x, mb_y;
2761     int chr_h= 16>>s->chroma_y_shift;
2762     int i, j;
2763     MpegEncContext best_s = { 0 }, backup_s;
2764     uint8_t bit_buf[2][MAX_MB_BYTES];
2765     uint8_t bit_buf2[2][MAX_MB_BYTES];
2766     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2767     PutBitContext pb[2], pb2[2], tex_pb[2];
2768
2769     for(i=0; i<2; i++){
2770         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2771         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2772         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2773     }
2774
2775     s->last_bits= put_bits_count(&s->pb);
2776     s->mv_bits=0;
2777     s->misc_bits=0;
2778     s->i_tex_bits=0;
2779     s->p_tex_bits=0;
2780     s->i_count=0;
2781     s->f_count=0;
2782     s->b_count=0;
2783     s->skip_count=0;
2784
2785     for(i=0; i<3; i++){
2786         /* init last dc values */
2787         /* note: quant matrix value (8) is implied here */
2788         s->last_dc[i] = 128 << s->intra_dc_precision;
2789
2790         s->current_picture.encoding_error[i] = 0;
2791     }
2792     s->mb_skip_run = 0;
2793     memset(s->last_mv, 0, sizeof(s->last_mv));
2794
2795     s->last_mv_dir = 0;
2796
2797     switch(s->codec_id){
2798     case AV_CODEC_ID_H263:
2799     case AV_CODEC_ID_H263P:
2800     case AV_CODEC_ID_FLV1:
2801         if (CONFIG_H263_ENCODER)
2802             s->gob_index = H263_GOB_HEIGHT(s->height);
2803         break;
2804     case AV_CODEC_ID_MPEG4:
2805         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2806             ff_mpeg4_init_partitions(s);
2807         break;
2808     }
2809
2810     s->resync_mb_x=0;
2811     s->resync_mb_y=0;
2812     s->first_slice_line = 1;
2813     s->ptr_lastgob = s->pb.buf;
2814     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2815         s->mb_x=0;
2816         s->mb_y= mb_y;
2817
2818         ff_set_qscale(s, s->qscale);
2819         ff_init_block_index(s);
2820
2821         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2822             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2823             int mb_type= s->mb_type[xy];
2824 //            int d;
2825             int dmin= INT_MAX;
2826             int dir;
2827
2828             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2829                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2830                 return -1;
2831             }
2832             if(s->data_partitioning){
2833                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2834                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2835                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2836                     return -1;
2837                 }
2838             }
2839
2840             s->mb_x = mb_x;
2841             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2842             ff_update_block_index(s);
2843
2844             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2845                 ff_h261_reorder_mb_index(s);
2846                 xy= s->mb_y*s->mb_stride + s->mb_x;
2847                 mb_type= s->mb_type[xy];
2848             }
2849
2850             /* write gob / video packet header  */
2851             if(s->rtp_mode){
2852                 int current_packet_size, is_gob_start;
2853
2854                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2855
2856                 is_gob_start = s->rtp_payload_size &&
2857                                current_packet_size >= s->rtp_payload_size &&
2858                                mb_y + mb_x > 0;
2859
2860                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2861
2862                 switch(s->codec_id){
2863                 case AV_CODEC_ID_H263:
2864                 case AV_CODEC_ID_H263P:
2865                     if(!s->h263_slice_structured)
2866                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2867                     break;
2868                 case AV_CODEC_ID_MPEG2VIDEO:
2869                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2870                 case AV_CODEC_ID_MPEG1VIDEO:
2871                     if(s->mb_skip_run) is_gob_start=0;
2872                     break;
2873                 }
2874
2875                 if(is_gob_start){
2876                     if(s->start_mb_y != mb_y || mb_x!=0){
2877                         write_slice_end(s);
2878
2879                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2880                             ff_mpeg4_init_partitions(s);
2881                         }
2882                     }
2883
2884                     assert((put_bits_count(&s->pb)&7) == 0);
2885                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2886
2887                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2888                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2889                         int d = 100 / s->error_rate;
2890                         if(r % d == 0){
2891                             current_packet_size=0;
2892                             s->pb.buf_ptr= s->ptr_lastgob;
2893                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2894                         }
2895                     }
2896
2897 #if FF_API_RTP_CALLBACK
2898 FF_DISABLE_DEPRECATION_WARNINGS
2899                     if (s->avctx->rtp_callback){
2900                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2901                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2902                     }
2903 FF_ENABLE_DEPRECATION_WARNINGS
2904 #endif
2905                     update_mb_info(s, 1);
2906
2907                     switch(s->codec_id){
2908                     case AV_CODEC_ID_MPEG4:
2909                         if (CONFIG_MPEG4_ENCODER) {
2910                             ff_mpeg4_encode_video_packet_header(s);
2911                             ff_mpeg4_clean_buffers(s);
2912                         }
2913                     break;
2914                     case AV_CODEC_ID_MPEG1VIDEO:
2915                     case AV_CODEC_ID_MPEG2VIDEO:
2916                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2917                             ff_mpeg1_encode_slice_header(s);
2918                             ff_mpeg1_clean_buffers(s);
2919                         }
2920                     break;
2921                     case AV_CODEC_ID_H263:
2922                     case AV_CODEC_ID_H263P:
2923                         if (CONFIG_H263_ENCODER)
2924                             ff_h263_encode_gob_header(s, mb_y);
2925                     break;
2926                     }
2927
2928                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2929                         int bits= put_bits_count(&s->pb);
2930                         s->misc_bits+= bits - s->last_bits;
2931                         s->last_bits= bits;
2932                     }
2933
2934                     s->ptr_lastgob += current_packet_size;
2935                     s->first_slice_line=1;
2936                     s->resync_mb_x=mb_x;
2937                     s->resync_mb_y=mb_y;
2938                 }
2939             }
2940
2941             if(  (s->resync_mb_x   == s->mb_x)
2942                && s->resync_mb_y+1 == s->mb_y){
2943                 s->first_slice_line=0;
2944             }
2945
2946             s->mb_skipped=0;
2947             s->dquant=0; //only for QP_RD
2948
2949             update_mb_info(s, 0);
2950
2951             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2952                 int next_block=0;
2953                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2954
2955                 copy_context_before_encode(&backup_s, s, -1);
2956                 backup_s.pb= s->pb;
2957                 best_s.data_partitioning= s->data_partitioning;
2958                 best_s.partitioned_frame= s->partitioned_frame;
2959                 if(s->data_partitioning){
2960                     backup_s.pb2= s->pb2;
2961                     backup_s.tex_pb= s->tex_pb;
2962                 }
2963
2964                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2965                     s->mv_dir = MV_DIR_FORWARD;
2966                     s->mv_type = MV_TYPE_16X16;
2967                     s->mb_intra= 0;
2968                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2969                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2970                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2971                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2972                 }
2973                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2974                     s->mv_dir = MV_DIR_FORWARD;
2975                     s->mv_type = MV_TYPE_FIELD;
2976                     s->mb_intra= 0;
2977                     for(i=0; i<2; i++){
2978                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2979                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2980                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2981                     }
2982                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2983                                  &dmin, &next_block, 0, 0);
2984                 }
2985                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2986                     s->mv_dir = MV_DIR_FORWARD;
2987                     s->mv_type = MV_TYPE_16X16;
2988                     s->mb_intra= 0;
2989                     s->mv[0][0][0] = 0;
2990                     s->mv[0][0][1] = 0;
2991                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2992                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2993                 }
2994                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2995                     s->mv_dir = MV_DIR_FORWARD;
2996                     s->mv_type = MV_TYPE_8X8;
2997                     s->mb_intra= 0;
2998                     for(i=0; i<4; i++){
2999                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3000                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3001                     }
3002                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3003                                  &dmin, &next_block, 0, 0);
3004                 }
3005                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3006                     s->mv_dir = MV_DIR_FORWARD;
3007                     s->mv_type = MV_TYPE_16X16;
3008                     s->mb_intra= 0;
3009                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3010                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3011                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3012                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3013                 }
3014                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3015                     s->mv_dir = MV_DIR_BACKWARD;
3016                     s->mv_type = MV_TYPE_16X16;
3017                     s->mb_intra= 0;
3018                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3019                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3020                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3021                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3022                 }
3023                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3024                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3025                     s->mv_type = MV_TYPE_16X16;
3026                     s->mb_intra= 0;
3027                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3028                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3029                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3030                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3031                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3032                                  &dmin, &next_block, 0, 0);
3033                 }
3034                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3035                     s->mv_dir = MV_DIR_FORWARD;
3036                     s->mv_type = MV_TYPE_FIELD;
3037                     s->mb_intra= 0;
3038                     for(i=0; i<2; i++){
3039                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3040                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3041                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3042                     }
3043                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3044                                  &dmin, &next_block, 0, 0);
3045                 }
3046                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3047                     s->mv_dir = MV_DIR_BACKWARD;
3048                     s->mv_type = MV_TYPE_FIELD;
3049                     s->mb_intra= 0;
3050                     for(i=0; i<2; i++){
3051                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3052                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3053                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3054                     }
3055                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3056                                  &dmin, &next_block, 0, 0);
3057                 }
3058                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3059                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3060                     s->mv_type = MV_TYPE_FIELD;
3061                     s->mb_intra= 0;
3062                     for(dir=0; dir<2; dir++){
3063                         for(i=0; i<2; i++){
3064                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3065                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3066                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3067                         }
3068                     }
3069                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3070                                  &dmin, &next_block, 0, 0);
3071                 }
3072                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3073                     s->mv_dir = 0;
3074                     s->mv_type = MV_TYPE_16X16;
3075                     s->mb_intra= 1;
3076                     s->mv[0][0][0] = 0;
3077                     s->mv[0][0][1] = 0;
3078                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3079                                  &dmin, &next_block, 0, 0);
3080                     if(s->h263_pred || s->h263_aic){
3081                         if(best_s.mb_intra)
3082                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3083                         else
3084                             ff_clean_intra_table_entries(s); //old mode?
3085                     }
3086                 }
3087
3088                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3089                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3090                         const int last_qp= backup_s.qscale;
3091                         int qpi, qp, dc[6];
3092                         int16_t ac[6][16];
3093                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3094                         static const int dquant_tab[4]={-1,1,-2,2};
3095
3096                         assert(backup_s.dquant == 0);
3097
3098                         //FIXME intra
3099                         s->mv_dir= best_s.mv_dir;
3100                         s->mv_type = MV_TYPE_16X16;
3101                         s->mb_intra= best_s.mb_intra;
3102                         s->mv[0][0][0] = best_s.mv[0][0][0];
3103                         s->mv[0][0][1] = best_s.mv[0][0][1];
3104                         s->mv[1][0][0] = best_s.mv[1][0][0];
3105                         s->mv[1][0][1] = best_s.mv[1][0][1];
3106
3107                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3108                         for(; qpi<4; qpi++){
3109                             int dquant= dquant_tab[qpi];
3110                             qp= last_qp + dquant;
3111                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3112                                 continue;
3113                             backup_s.dquant= dquant;
3114                             if(s->mb_intra && s->dc_val[0]){
3115                                 for(i=0; i<6; i++){
3116                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3117                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3118                                 }
3119                             }
3120
3121                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3122                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3123                             if(best_s.qscale != qp){
3124                                 if(s->mb_intra && s->dc_val[0]){
3125                                     for(i=0; i<6; i++){
3126                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3127                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3128                                     }
3129                                 }
3130                             }
3131                         }
3132                     }
3133                 }
3134                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3135                     int mx= s->b_direct_mv_table[xy][0];
3136                     int my= s->b_direct_mv_table[xy][1];
3137
3138                     backup_s.dquant = 0;
3139                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3140                     s->mb_intra= 0;
3141                     ff_mpeg4_set_direct_mv(s, mx, my);
3142                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3143                                  &dmin, &next_block, mx, my);
3144                 }
3145                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3146                     backup_s.dquant = 0;
3147                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3148                     s->mb_intra= 0;
3149                     ff_mpeg4_set_direct_mv(s, 0, 0);
3150                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3151                                  &dmin, &next_block, 0, 0);
3152                 }
3153                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3154                     int coded=0;
3155                     for(i=0; i<6; i++)
3156                         coded |= s->block_last_index[i];
3157                     if(coded){
3158                         int mx,my;
3159                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3160                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3161                             mx=my=0; //FIXME find the one we actually used
3162                             ff_mpeg4_set_direct_mv(s, mx, my);
3163                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3164                             mx= s->mv[1][0][0];
3165                             my= s->mv[1][0][1];
3166                         }else{
3167                             mx= s->mv[0][0][0];
3168                             my= s->mv[0][0][1];
3169                         }
3170
3171                         s->mv_dir= best_s.mv_dir;
3172                         s->mv_type = best_s.mv_type;
3173                         s->mb_intra= 0;
3174 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3175                         s->mv[0][0][1] = best_s.mv[0][0][1];
3176                         s->mv[1][0][0] = best_s.mv[1][0][0];
3177                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3178                         backup_s.dquant= 0;
3179                         s->skipdct=1;
3180                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3181                                         &dmin, &next_block, mx, my);
3182                         s->skipdct=0;
3183                     }
3184                 }
3185
3186                 s->current_picture.qscale_table[xy] = best_s.qscale;
3187
3188                 copy_context_after_encode(s, &best_s, -1);
3189
3190                 pb_bits_count= put_bits_count(&s->pb);
3191                 flush_put_bits(&s->pb);
3192                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3193                 s->pb= backup_s.pb;
3194
3195                 if(s->data_partitioning){
3196                     pb2_bits_count= put_bits_count(&s->pb2);
3197                     flush_put_bits(&s->pb2);
3198                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3199                     s->pb2= backup_s.pb2;
3200
3201                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3202                     flush_put_bits(&s->tex_pb);
3203                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3204                     s->tex_pb= backup_s.tex_pb;
3205                 }
3206                 s->last_bits= put_bits_count(&s->pb);
3207
3208                 if (CONFIG_H263_ENCODER &&
3209                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3210                     ff_h263_update_motion_val(s);
3211
3212                 if(next_block==0){ //FIXME 16 vs linesize16
3213                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3214                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3215                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3216                 }
3217
3218                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3219                     ff_mpv_decode_mb(s, s->block);
3220             } else {
3221                 int motion_x = 0, motion_y = 0;
3222                 s->mv_type=MV_TYPE_16X16;
3223                 // only one MB-Type possible
3224
3225                 switch(mb_type){
3226                 case CANDIDATE_MB_TYPE_INTRA:
3227                     s->mv_dir = 0;
3228                     s->mb_intra= 1;
3229                     motion_x= s->mv[0][0][0] = 0;
3230                     motion_y= s->mv[0][0][1] = 0;
3231                     break;
3232                 case CANDIDATE_MB_TYPE_INTER:
3233                     s->mv_dir = MV_DIR_FORWARD;
3234                     s->mb_intra= 0;
3235                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3236                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3237                     break;
3238                 case CANDIDATE_MB_TYPE_INTER_I:
3239                     s->mv_dir = MV_DIR_FORWARD;
3240                     s->mv_type = MV_TYPE_FIELD;
3241                     s->mb_intra= 0;
3242                     for(i=0; i<2; i++){
3243                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3244                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3245                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3246                     }
3247                     break;
3248                 case CANDIDATE_MB_TYPE_INTER4V:
3249                     s->mv_dir = MV_DIR_FORWARD;
3250                     s->mv_type = MV_TYPE_8X8;
3251                     s->mb_intra= 0;
3252                     for(i=0; i<4; i++){
3253                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3254                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3255                     }
3256                     break;
3257                 case CANDIDATE_MB_TYPE_DIRECT:
3258                     if (CONFIG_MPEG4_ENCODER) {
3259                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3260                         s->mb_intra= 0;
3261                         motion_x=s->b_direct_mv_table[xy][0];
3262                         motion_y=s->b_direct_mv_table[xy][1];
3263                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3264                     }
3265                     break;
3266                 case CANDIDATE_MB_TYPE_DIRECT0:
3267                     if (CONFIG_MPEG4_ENCODER) {
3268                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3269                         s->mb_intra= 0;
3270                         ff_mpeg4_set_direct_mv(s, 0, 0);
3271                     }
3272                     break;
3273                 case CANDIDATE_MB_TYPE_BIDIR:
3274                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3275                     s->mb_intra= 0;
3276                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3277                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3278                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3279                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3280                     break;
3281                 case CANDIDATE_MB_TYPE_BACKWARD:
3282                     s->mv_dir = MV_DIR_BACKWARD;
3283                     s->mb_intra= 0;
3284                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3285                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3286                     break;
3287                 case CANDIDATE_MB_TYPE_FORWARD:
3288                     s->mv_dir = MV_DIR_FORWARD;
3289                     s->mb_intra= 0;
3290                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3291                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3292                     break;
3293                 case CANDIDATE_MB_TYPE_FORWARD_I:
3294                     s->mv_dir = MV_DIR_FORWARD;
3295                     s->mv_type = MV_TYPE_FIELD;
3296                     s->mb_intra= 0;
3297                     for(i=0; i<2; i++){
3298                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3299                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3300                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3301                     }
3302                     break;
3303                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3304                     s->mv_dir = MV_DIR_BACKWARD;
3305                     s->mv_type = MV_TYPE_FIELD;
3306                     s->mb_intra= 0;
3307                     for(i=0; i<2; i++){
3308                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3309                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3310                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3311                     }
3312                     break;
3313                 case CANDIDATE_MB_TYPE_BIDIR_I:
3314                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3315                     s->mv_type = MV_TYPE_FIELD;
3316                     s->mb_intra= 0;
3317                     for(dir=0; dir<2; dir++){
3318                         for(i=0; i<2; i++){
3319                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3320                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3321                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3322                         }
3323                     }
3324                     break;
3325                 default:
3326                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3327                 }
3328
3329                 encode_mb(s, motion_x, motion_y);
3330
3331                 // RAL: Update last macroblock type
3332                 s->last_mv_dir = s->mv_dir;
3333
3334                 if (CONFIG_H263_ENCODER &&
3335                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3336                     ff_h263_update_motion_val(s);
3337
3338                 ff_mpv_decode_mb(s, s->block);
3339             }
3340
3341             /* clean the MV table in IPS frames for direct mode in B-frames */
3342             if(s->mb_intra /* && I,P,S_TYPE */){
3343                 s->p_mv_table[xy][0]=0;
3344                 s->p_mv_table[xy][1]=0;
3345             }
3346
3347             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3348                 int w= 16;
3349                 int h= 16;
3350
3351                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3352                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3353
3354                 s->current_picture.encoding_error[0] += sse(
3355                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3356                     s->dest[0], w, h, s->linesize);
3357                 s->current_picture.encoding_error[1] += sse(
3358                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3359                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3360                 s->current_picture.encoding_error[2] += sse(
3361                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3362                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3363             }
3364             if(s->loop_filter){
3365                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3366                     ff_h263_loop_filter(s);
3367             }
3368             ff_dlog(s->avctx, "MB %d %d bits\n",
3369                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3370         }
3371     }
3372
3373     //not beautiful here but we must write it before flushing so it has to be here
3374     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3375         ff_msmpeg4_encode_ext_header(s);
3376
3377     write_slice_end(s);
3378
3379 #if FF_API_RTP_CALLBACK
3380 FF_DISABLE_DEPRECATION_WARNINGS
3381     /* Send the last GOB if RTP */
3382     if (s->avctx->rtp_callback) {
3383         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3384         int pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3385         /* Call the RTP callback to send the last GOB */
3386         emms_c();
3387         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3388     }
3389 FF_ENABLE_DEPRECATION_WARNINGS
3390 #endif
3391
3392     return 0;
3393 }
3394
3395 #define MERGE(field) dst->field += src->field; src->field=0
3396 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3397     MERGE(me.scene_change_score);
3398     MERGE(me.mc_mb_var_sum_temp);
3399     MERGE(me.mb_var_sum_temp);
3400 }
3401
3402 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3403     int i;
3404
3405     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3406     MERGE(dct_count[1]);
3407     MERGE(mv_bits);
3408     MERGE(i_tex_bits);
3409     MERGE(p_tex_bits);
3410     MERGE(i_count);
3411     MERGE(f_count);
3412     MERGE(b_count);
3413     MERGE(skip_count);
3414     MERGE(misc_bits);
3415     MERGE(er.error_count);
3416     MERGE(padding_bug_score);
3417     MERGE(current_picture.encoding_error[0]);
3418     MERGE(current_picture.encoding_error[1]);
3419     MERGE(current_picture.encoding_error[2]);
3420
3421     if (dst->noise_reduction){
3422         for(i=0; i<64; i++){
3423             MERGE(dct_error_sum[0][i]);
3424             MERGE(dct_error_sum[1][i]);
3425         }
3426     }
3427
3428     assert(put_bits_count(&src->pb) % 8 ==0);
3429     assert(put_bits_count(&dst->pb) % 8 ==0);
3430     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3431     flush_put_bits(&dst->pb);
3432 }
3433
3434 static int estimate_qp(MpegEncContext *s, int dry_run){
3435     if (s->next_lambda){
3436         s->current_picture_ptr->f->quality =
3437         s->current_picture.f->quality = s->next_lambda;
3438         if(!dry_run) s->next_lambda= 0;
3439     } else if (!s->fixed_qscale) {
3440         int quality;
3441 #if CONFIG_LIBXVID
3442         if ((s->avctx->flags & AV_CODEC_FLAG_PASS2) && s->rc_strategy == 1)
3443             quality = ff_xvid_rate_estimate_qscale(s, dry_run);
3444         else
3445 #endif
3446         quality = ff_rate_estimate_qscale(s, dry_run);
3447         s->current_picture_ptr->f->quality =
3448         s->current_picture.f->quality = quality;
3449         if (s->current_picture.f->quality < 0)
3450             return -1;
3451     }
3452
3453     if(s->adaptive_quant){
3454         switch(s->codec_id){
3455         case AV_CODEC_ID_MPEG4:
3456             if (CONFIG_MPEG4_ENCODER)
3457                 ff_clean_mpeg4_qscales(s);
3458             break;
3459         case AV_CODEC_ID_H263:
3460         case AV_CODEC_ID_H263P:
3461         case AV_CODEC_ID_FLV1:
3462             if (CONFIG_H263_ENCODER)
3463                 ff_clean_h263_qscales(s);
3464             break;
3465         default:
3466             ff_init_qscale_tab(s);
3467         }
3468
3469         s->lambda= s->lambda_table[0];
3470         //FIXME broken
3471     }else
3472         s->lambda = s->current_picture.f->quality;
3473     update_qscale(s);
3474     return 0;
3475 }
3476
3477 /* must be called before writing the header */
3478 static void set_frame_distances(MpegEncContext * s){
3479     assert(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3480     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3481
3482     if(s->pict_type==AV_PICTURE_TYPE_B){
3483         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3484         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3485     }else{
3486         s->pp_time= s->time - s->last_non_b_time;
3487         s->last_non_b_time= s->time;
3488         assert(s->picture_number==0 || s->pp_time > 0);
3489     }
3490 }
3491
3492 static int encode_picture(MpegEncContext *s, int picture_number)
3493 {
3494     int i, ret;
3495     int bits;
3496     int context_count = s->slice_context_count;
3497
3498     s->picture_number = picture_number;
3499
3500     /* Reset the average MB variance */
3501     s->me.mb_var_sum_temp    =
3502     s->me.mc_mb_var_sum_temp = 0;
3503
3504     /* we need to initialize some time vars before we can encode B-frames */
3505     // RAL: Condition added for MPEG1VIDEO
3506     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3507         set_frame_distances(s);
3508     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3509         ff_set_mpeg4_time(s);
3510
3511     s->me.scene_change_score=0;
3512
3513 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3514
3515     if(s->pict_type==AV_PICTURE_TYPE_I){
3516         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3517         else                        s->no_rounding=0;
3518     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3519         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3520             s->no_rounding ^= 1;
3521     }
3522
3523     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3524         if (estimate_qp(s,1) < 0)
3525             return -1;
3526         ff_get_2pass_fcode(s);
3527     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3528         if(s->pict_type==AV_PICTURE_TYPE_B)
3529             s->lambda= s->last_lambda_for[s->pict_type];
3530         else
3531             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3532         update_qscale(s);
3533     }
3534
3535     s->mb_intra=0; //for the rate distortion & bit compare functions
3536     for(i=1; i<context_count; i++){
3537         ret = ff_update_duplicate_context(s->thread_context[i], s);
3538         if (ret < 0)
3539             return ret;
3540     }
3541
3542     if(ff_init_me(s)<0)
3543         return -1;
3544
3545     /* Estimate motion for every MB */
3546     if(s->pict_type != AV_PICTURE_TYPE_I){
3547         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3548         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3549         if (s->pict_type != AV_PICTURE_TYPE_B) {
3550             if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3551                 s->me_pre == 2) {
3552                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3553             }
3554         }
3555
3556         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3557     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3558         /* I-Frame */
3559         for(i=0; i<s->mb_stride*s->mb_height; i++)
3560             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3561
3562         if(!s->fixed_qscale){
3563             /* finding spatial complexity for I-frame rate control */
3564             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3565         }
3566     }
3567     for(i=1; i<context_count; i++){
3568         merge_context_after_me(s, s->thread_context[i]);
3569     }
3570     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3571     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3572     emms_c();
3573
3574     if (s->me.scene_change_score > s->scenechange_threshold &&
3575         s->pict_type == AV_PICTURE_TYPE_P) {
3576         s->pict_type= AV_PICTURE_TYPE_I;
3577         for(i=0; i<s->mb_stride*s->mb_height; i++)
3578             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3579         ff_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3580                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3581     }
3582
3583     if(!s->umvplus){
3584         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3585             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3586
3587             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3588                 int a,b;
3589                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3590                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3591                 s->f_code= FFMAX3(s->f_code, a, b);
3592             }
3593
3594             ff_fix_long_p_mvs(s);
3595             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3596             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3597                 int j;
3598                 for(i=0; i<2; i++){
3599                     for(j=0; j<2; j++)
3600                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3601                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3602                 }
3603             }
3604         }
3605
3606         if(s->pict_type==AV_PICTURE_TYPE_B){
3607             int a, b;
3608
3609             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3610             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3611             s->f_code = FFMAX(a, b);
3612
3613             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3614             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3615             s->b_code = FFMAX(a, b);
3616
3617             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3618             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3619             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3620             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3621             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3622                 int dir, j;
3623                 for(dir=0; dir<2; dir++){
3624                     for(i=0; i<2; i++){
3625                         for(j=0; j<2; j++){
3626                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3627                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3628                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3629                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3630                         }
3631                     }
3632                 }
3633             }
3634         }
3635     }
3636
3637     if (estimate_qp(s, 0) < 0)
3638         return -1;
3639
3640     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3641         s->pict_type == AV_PICTURE_TYPE_I &&
3642         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3643         s->qscale= 3; //reduce clipping problems
3644
3645     if (s->out_format == FMT_MJPEG) {
3646         /* for mjpeg, we do include qscale in the matrix */
3647         for(i=1;i<64;i++){
3648             int j = s->idsp.idct_permutation[i];
3649
3650             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3651         }
3652         s->y_dc_scale_table=
3653         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3654         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3655         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3656                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3657         s->qscale= 8;
3658     }
3659
3660     //FIXME var duplication
3661     s->current_picture_ptr->f->key_frame =
3662     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3663     s->current_picture_ptr->f->pict_type =
3664     s->current_picture.f->pict_type = s->pict_type;
3665
3666     if (s->current_picture.f->key_frame)
3667         s->picture_in_gop_number=0;
3668
3669     s->last_bits= put_bits_count(&s->pb);
3670     switch(s->out_format) {
3671     case FMT_MJPEG:
3672         if (CONFIG_MJPEG_ENCODER)
3673             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3674                                            s->pred, s->intra_matrix);
3675         break;
3676     case FMT_H261:
3677         if (CONFIG_H261_ENCODER)
3678             ff_h261_encode_picture_header(s, picture_number);
3679         break;
3680     case FMT_H263:
3681         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3682             ff_wmv2_encode_picture_header(s, picture_number);
3683         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3684             ff_msmpeg4_encode_picture_header(s, picture_number);
3685         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3686             ff_mpeg4_encode_picture_header(s, picture_number);
3687         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3688             ret = ff_rv10_encode_picture_header(s, picture_number);
3689             if (ret < 0)
3690                 return ret;
3691         }
3692         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3693             ff_rv20_encode_picture_header(s, picture_number);
3694         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3695             ff_flv_encode_picture_header(s, picture_number);
3696         else if (CONFIG_H263_ENCODER)
3697             ff_h263_encode_picture_header(s, picture_number);
3698         break;
3699     case FMT_MPEG1:
3700         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3701             ff_mpeg1_encode_picture_header(s, picture_number);
3702         break;
3703     default:
3704         assert(0);
3705     }
3706     bits= put_bits_count(&s->pb);
3707     s->header_bits= bits - s->last_bits;
3708
3709     for(i=1; i<context_count; i++){
3710         update_duplicate_context_after_me(s->thread_context[i], s);
3711     }
3712     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3713     for(i=1; i<context_count; i++){
3714         merge_context_after_encode(s, s->thread_context[i]);
3715     }
3716     emms_c();
3717     return 0;
3718 }
3719
3720 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3721     const int intra= s->mb_intra;
3722     int i;
3723
3724     s->dct_count[intra]++;
3725
3726     for(i=0; i<64; i++){
3727         int level= block[i];
3728
3729         if(level){
3730             if(level>0){
3731                 s->dct_error_sum[intra][i] += level;
3732                 level -= s->dct_offset[intra][i];
3733                 if(level<0) level=0;
3734             }else{
3735                 s->dct_error_sum[intra][i] -= level;
3736                 level += s->dct_offset[intra][i];
3737                 if(level>0) level=0;
3738             }
3739             block[i]= level;
3740         }
3741     }
3742 }
3743
3744 static int dct_quantize_trellis_c(MpegEncContext *s,
3745                                   int16_t *block, int n,
3746                                   int qscale, int *overflow){
3747     const int *qmat;
3748     const uint8_t *scantable= s->intra_scantable.scantable;
3749     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3750     int max=0;
3751     unsigned int threshold1, threshold2;
3752     int bias=0;
3753     int run_tab[65];
3754     int level_tab[65];
3755     int score_tab[65];
3756     int survivor[65];
3757     int survivor_count;
3758     int last_run=0;
3759     int last_level=0;
3760     int last_score= 0;
3761     int last_i;
3762     int coeff[2][64];
3763     int coeff_count[64];
3764     int qmul, qadd, start_i, last_non_zero, i, dc;
3765     const int esc_length= s->ac_esc_length;
3766     uint8_t * length;
3767     uint8_t * last_length;
3768     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3769
3770     s->fdsp.fdct(block);
3771
3772     if(s->dct_error_sum)
3773         s->denoise_dct(s, block);
3774     qmul= qscale*16;
3775     qadd= ((qscale-1)|1)*8;
3776
3777     if (s->mb_intra) {
3778         int q;
3779         if (!s->h263_aic) {
3780             if (n < 4)
3781                 q = s->y_dc_scale;
3782             else
3783                 q = s->c_dc_scale;
3784             q = q << 3;
3785         } else{
3786             /* For AIC we skip quant/dequant of INTRADC */
3787             q = 1 << 3;
3788             qadd=0;
3789         }
3790
3791         /* note: block[0] is assumed to be positive */
3792         block[0] = (block[0] + (q >> 1)) / q;
3793         start_i = 1;
3794         last_non_zero = 0;
3795         qmat = s->q_intra_matrix[qscale];
3796         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3797             bias= 1<<(QMAT_SHIFT-1);
3798         length     = s->intra_ac_vlc_length;
3799         last_length= s->intra_ac_vlc_last_length;
3800     } else {
3801         start_i = 0;
3802         last_non_zero = -1;
3803         qmat = s->q_inter_matrix[qscale];
3804         length     = s->inter_ac_vlc_length;
3805         last_length= s->inter_ac_vlc_last_length;
3806     }
3807     last_i= start_i;
3808
3809     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3810     threshold2= (threshold1<<1);
3811
3812     for(i=63; i>=start_i; i--) {
3813         const int j = scantable[i];
3814         int level = block[j] * qmat[j];
3815
3816         if(((unsigned)(level+threshold1))>threshold2){
3817             last_non_zero = i;
3818             break;
3819         }
3820     }
3821
3822     for(i=start_i; i<=last_non_zero; i++) {
3823         const int j = scantable[i];
3824         int level = block[j] * qmat[j];
3825
3826 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3827 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3828         if(((unsigned)(level+threshold1))>threshold2){
3829             if(level>0){
3830                 level= (bias + level)>>QMAT_SHIFT;
3831                 coeff[0][i]= level;
3832                 coeff[1][i]= level-1;
3833 //                coeff[2][k]= level-2;
3834             }else{
3835                 level= (bias - level)>>QMAT_SHIFT;
3836                 coeff[0][i]= -level;
3837                 coeff[1][i]= -level+1;
3838 //                coeff[2][k]= -level+2;
3839             }
3840             coeff_count[i]= FFMIN(level, 2);
3841             assert(coeff_count[i]);
3842             max |=level;
3843         }else{
3844             coeff[0][i]= (level>>31)|1;
3845             coeff_count[i]= 1;
3846         }
3847     }
3848
3849     *overflow= s->max_qcoeff < max; //overflow might have happened
3850
3851     if(last_non_zero < start_i){
3852         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3853         return last_non_zero;
3854     }
3855
3856     score_tab[start_i]= 0;
3857     survivor[0]= start_i;
3858     survivor_count= 1;
3859
3860     for(i=start_i; i<=last_non_zero; i++){
3861         int level_index, j, zero_distortion;
3862         int dct_coeff= FFABS(block[ scantable[i] ]);
3863         int best_score=256*256*256*120;
3864
3865         if (s->fdsp.fdct == ff_fdct_ifast)
3866             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3867         zero_distortion= dct_coeff*dct_coeff;
3868
3869         for(level_index=0; level_index < coeff_count[i]; level_index++){
3870             int distortion;
3871             int level= coeff[level_index][i];
3872             const int alevel= FFABS(level);
3873             int unquant_coeff;
3874
3875             assert(level);
3876
3877             if(s->out_format == FMT_H263){
3878                 unquant_coeff= alevel*qmul + qadd;
3879             } else { // MPEG-1
3880                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3881                 if(s->mb_intra){
3882                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3883                         unquant_coeff =   (unquant_coeff - 1) | 1;
3884                 }else{
3885                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3886                         unquant_coeff =   (unquant_coeff - 1) | 1;
3887                 }
3888                 unquant_coeff<<= 3;
3889             }
3890
3891             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3892             level+=64;
3893             if((level&(~127)) == 0){
3894                 for(j=survivor_count-1; j>=0; j--){
3895                     int run= i - survivor[j];
3896                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3897                     score += score_tab[i-run];
3898
3899                     if(score < best_score){
3900                         best_score= score;
3901                         run_tab[i+1]= run;
3902                         level_tab[i+1]= level-64;
3903                     }
3904                 }
3905
3906                 if(s->out_format == FMT_H263){
3907                     for(j=survivor_count-1; j>=0; j--){
3908                         int run= i - survivor[j];
3909                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3910                         score += score_tab[i-run];
3911                         if(score < last_score){
3912                             last_score= score;
3913                             last_run= run;
3914                             last_level= level-64;
3915                             last_i= i+1;
3916                         }
3917                     }
3918                 }
3919             }else{
3920                 distortion += esc_length*lambda;
3921                 for(j=survivor_count-1; j>=0; j--){
3922                     int run= i - survivor[j];
3923                     int score= distortion + score_tab[i-run];
3924
3925                     if(score < best_score){
3926                         best_score= score;
3927                         run_tab[i+1]= run;
3928                         level_tab[i+1]= level-64;
3929                     }
3930                 }
3931
3932                 if(s->out_format == FMT_H263){
3933                   for(j=survivor_count-1; j>=0; j--){
3934                         int run= i - survivor[j];
3935                         int score= distortion + score_tab[i-run];
3936                         if(score < last_score){
3937                             last_score= score;
3938                             last_run= run;
3939                             last_level= level-64;
3940                             last_i= i+1;
3941                         }
3942                     }
3943                 }
3944             }
3945         }
3946
3947         score_tab[i+1]= best_score;
3948
3949         // Note: there is a vlc code in MPEG-4 which is 1 bit shorter then another one with a shorter run and the same level
3950         if(last_non_zero <= 27){
3951             for(; survivor_count; survivor_count--){
3952                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3953                     break;
3954             }
3955         }else{
3956             for(; survivor_count; survivor_count--){
3957                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3958                     break;
3959             }
3960         }
3961
3962         survivor[ survivor_count++ ]= i+1;
3963     }
3964
3965     if(s->out_format != FMT_H263){
3966         last_score= 256*256*256*120;
3967         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3968             int score= score_tab[i];
3969             if (i)
3970                 score += lambda * 2; // FIXME more exact?
3971
3972             if(score < last_score){
3973                 last_score= score;
3974                 last_i= i;
3975                 last_level= level_tab[i];
3976                 last_run= run_tab[i];
3977             }
3978         }
3979     }
3980
3981     s->coded_score[n] = last_score;
3982
3983     dc= FFABS(block[0]);
3984     last_non_zero= last_i - 1;
3985     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3986
3987     if(last_non_zero < start_i)
3988         return last_non_zero;
3989
3990     if(last_non_zero == 0 && start_i == 0){
3991         int best_level= 0;
3992         int best_score= dc * dc;
3993
3994         for(i=0; i<coeff_count[0]; i++){
3995             int level= coeff[i][0];
3996             int alevel= FFABS(level);
3997             int unquant_coeff, score, distortion;
3998
3999             if(s->out_format == FMT_H263){
4000                     unquant_coeff= (alevel*qmul + qadd)>>3;
4001             } else { // MPEG-1
4002                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
4003                     unquant_coeff =   (unquant_coeff - 1) | 1;
4004             }
4005             unquant_coeff = (unquant_coeff + 4) >> 3;
4006             unquant_coeff<<= 3 + 3;
4007
4008             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4009             level+=64;
4010             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4011             else                    score= distortion + esc_length*lambda;
4012
4013             if(score < best_score){
4014                 best_score= score;
4015                 best_level= level - 64;
4016             }
4017         }
4018         block[0]= best_level;
4019         s->coded_score[n] = best_score - dc*dc;
4020         if(best_level == 0) return -1;
4021         else                return last_non_zero;
4022     }
4023
4024     i= last_i;
4025     assert(last_level);
4026
4027     block[ perm_scantable[last_non_zero] ]= last_level;
4028     i -= last_run + 1;
4029
4030     for(; i>start_i; i -= run_tab[i] + 1){
4031         block[ perm_scantable[i-1] ]= level_tab[i];
4032     }
4033
4034     return last_non_zero;
4035 }
4036
4037 //#define REFINE_STATS 1
4038 static int16_t basis[64][64];
4039
4040 static void build_basis(uint8_t *perm){
4041     int i, j, x, y;
4042     emms_c();
4043     for(i=0; i<8; i++){
4044         for(j=0; j<8; j++){
4045             for(y=0; y<8; y++){
4046                 for(x=0; x<8; x++){
4047                     double s= 0.25*(1<<BASIS_SHIFT);
4048                     int index= 8*i + j;
4049                     int perm_index= perm[index];
4050                     if(i==0) s*= sqrt(0.5);
4051                     if(j==0) s*= sqrt(0.5);
4052                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4053                 }
4054             }
4055         }
4056     }
4057 }
4058
4059 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4060                         int16_t *block, int16_t *weight, int16_t *orig,
4061                         int n, int qscale){
4062     int16_t rem[64];
4063     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4064     const uint8_t *scantable= s->intra_scantable.scantable;
4065     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4066 //    unsigned int threshold1, threshold2;
4067 //    int bias=0;
4068     int run_tab[65];
4069     int prev_run=0;
4070     int prev_level=0;
4071     int qmul, qadd, start_i, last_non_zero, i, dc;
4072     uint8_t * length;
4073     uint8_t * last_length;
4074     int lambda;
4075     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4076 #ifdef REFINE_STATS
4077 static int count=0;
4078 static int after_last=0;
4079 static int to_zero=0;
4080 static int from_zero=0;
4081 static int raise=0;
4082 static int lower=0;
4083 static int messed_sign=0;
4084 #endif
4085
4086     if(basis[0][0] == 0)
4087         build_basis(s->idsp.idct_permutation);
4088
4089     qmul= qscale*2;
4090     qadd= (qscale-1)|1;
4091     if (s->mb_intra) {
4092         if (!s->h263_aic) {
4093             if (n < 4)
4094                 q = s->y_dc_scale;
4095             else
4096                 q = s->c_dc_scale;
4097         } else{
4098             /* For AIC we skip quant/dequant of INTRADC */
4099             q = 1;
4100             qadd=0;
4101         }
4102         q <<= RECON_SHIFT-3;
4103         /* note: block[0] is assumed to be positive */
4104         dc= block[0]*q;
4105 //        block[0] = (block[0] + (q >> 1)) / q;
4106         start_i = 1;
4107 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4108 //            bias= 1<<(QMAT_SHIFT-1);
4109         length     = s->intra_ac_vlc_length;
4110         last_length= s->intra_ac_vlc_last_length;
4111     } else {
4112         dc= 0;
4113         start_i = 0;
4114         length     = s->inter_ac_vlc_length;
4115         last_length= s->inter_ac_vlc_last_length;
4116     }
4117     last_non_zero = s->block_last_index[n];
4118
4119 #ifdef REFINE_STATS
4120 {START_TIMER
4121 #endif
4122     dc += (1<<(RECON_SHIFT-1));
4123     for(i=0; i<64; i++){
4124         rem[i] = dc - (orig[i] << RECON_SHIFT); // FIXME use orig directly instead of copying to rem[]
4125     }
4126 #ifdef REFINE_STATS
4127 STOP_TIMER("memset rem[]")}
4128 #endif
4129     sum=0;
4130     for(i=0; i<64; i++){
4131         int one= 36;
4132         int qns=4;
4133         int w;
4134
4135         w= FFABS(weight[i]) + qns*one;
4136         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4137
4138         weight[i] = w;
4139 //        w=weight[i] = (63*qns + (w/2)) / w;
4140
4141         assert(w>0);
4142         assert(w<(1<<6));
4143         sum += w*w;
4144     }
4145     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4146 #ifdef REFINE_STATS
4147 {START_TIMER
4148 #endif
4149     run=0;
4150     rle_index=0;
4151     for(i=start_i; i<=last_non_zero; i++){
4152         int j= perm_scantable[i];
4153         const int level= block[j];
4154         int coeff;
4155
4156         if(level){
4157             if(level<0) coeff= qmul*level - qadd;
4158             else        coeff= qmul*level + qadd;
4159             run_tab[rle_index++]=run;
4160             run=0;
4161
4162             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4163         }else{
4164             run++;
4165         }
4166     }
4167 #ifdef REFINE_STATS
4168 if(last_non_zero>0){
4169 STOP_TIMER("init rem[]")
4170 }
4171 }
4172
4173 {START_TIMER
4174 #endif
4175     for(;;){
4176         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4177         int best_coeff=0;
4178         int best_change=0;
4179         int run2, best_unquant_change=0, analyze_gradient;
4180 #ifdef REFINE_STATS
4181 {START_TIMER
4182 #endif
4183         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4184
4185         if(analyze_gradient){
4186 #ifdef REFINE_STATS
4187 {START_TIMER
4188 #endif
4189             for(i=0; i<64; i++){
4190                 int w= weight[i];
4191
4192                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4193             }
4194 #ifdef REFINE_STATS
4195 STOP_TIMER("rem*w*w")}
4196 {START_TIMER
4197 #endif
4198             s->fdsp.fdct(d1);
4199 #ifdef REFINE_STATS
4200 STOP_TIMER("dct")}
4201 #endif
4202         }
4203
4204         if(start_i){
4205             const int level= block[0];
4206             int change, old_coeff;
4207
4208             assert(s->mb_intra);
4209
4210             old_coeff= q*level;
4211
4212             for(change=-1; change<=1; change+=2){
4213                 int new_level= level + change;
4214                 int score, new_coeff;
4215
4216                 new_coeff= q*new_level;
4217                 if(new_coeff >= 2048 || new_coeff < 0)
4218                     continue;
4219
4220                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4221                                                   new_coeff - old_coeff);
4222                 if(score<best_score){
4223                     best_score= score;
4224                     best_coeff= 0;
4225                     best_change= change;
4226                     best_unquant_change= new_coeff - old_coeff;
4227                 }
4228             }
4229         }
4230
4231         run=0;
4232         rle_index=0;
4233         run2= run_tab[rle_index++];
4234         prev_level=0;
4235         prev_run=0;
4236
4237         for(i=start_i; i<64; i++){
4238             int j= perm_scantable[i];
4239             const int level= block[j];
4240             int change, old_coeff;
4241
4242             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4243                 break;
4244
4245             if(level){
4246                 if(level<0) old_coeff= qmul*level - qadd;
4247                 else        old_coeff= qmul*level + qadd;
4248                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4249             }else{
4250                 old_coeff=0;
4251                 run2--;
4252                 assert(run2>=0 || i >= last_non_zero );
4253             }
4254
4255             for(change=-1; change<=1; change+=2){
4256                 int new_level= level + change;
4257                 int score, new_coeff, unquant_change;
4258
4259                 score=0;
4260                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4261                    continue;
4262
4263                 if(new_level){
4264                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4265                     else            new_coeff= qmul*new_level + qadd;
4266                     if(new_coeff >= 2048 || new_coeff <= -2048)
4267                         continue;
4268                     //FIXME check for overflow
4269
4270                     if(level){
4271                         if(level < 63 && level > -63){
4272                             if(i < last_non_zero)
4273                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4274                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4275                             else
4276                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4277                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4278                         }
4279                     }else{
4280                         assert(FFABS(new_level)==1);
4281
4282                         if(analyze_gradient){
4283                             int g= d1[ scantable[i] ];
4284                             if(g && (g^new_level) >= 0)
4285                                 continue;
4286                         }
4287
4288                         if(i < last_non_zero){
4289                             int next_i= i + run2 + 1;
4290                             int next_level= block[ perm_scantable[next_i] ] + 64;
4291
4292                             if(next_level&(~127))
4293                                 next_level= 0;
4294
4295                             if(next_i < last_non_zero)
4296                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4297                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4298                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4299                             else
4300                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4301                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4302                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4303                         }else{
4304                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4305                             if(prev_level){
4306                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4307                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4308                             }
4309                         }
4310                     }
4311                 }else{
4312                     new_coeff=0;
4313                     assert(FFABS(level)==1);
4314
4315                     if(i < last_non_zero){
4316                         int next_i= i + run2 + 1;
4317                         int next_level= block[ perm_scantable[next_i] ] + 64;
4318
4319                         if(next_level&(~127))
4320                             next_level= 0;
4321
4322                         if(next_i < last_non_zero)
4323                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4324                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4325                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4326                         else
4327                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4328                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4329                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4330                     }else{
4331                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4332                         if(prev_level){
4333                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4334                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4335                         }
4336                     }
4337                 }
4338
4339                 score *= lambda;
4340
4341                 unquant_change= new_coeff - old_coeff;
4342                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
4343
4344                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4345                                                    unquant_change);
4346                 if(score<best_score){
4347                     best_score= score;
4348                     best_coeff= i;
4349                     best_change= change;
4350                     best_unquant_change= unquant_change;
4351                 }
4352             }
4353             if(level){
4354                 prev_level= level + 64;
4355                 if(prev_level&(~127))
4356                     prev_level= 0;
4357                 prev_run= run;
4358                 run=0;
4359             }else{
4360                 run++;
4361             }
4362         }
4363 #ifdef REFINE_STATS
4364 STOP_TIMER("iterative step")}
4365 #endif
4366
4367         if(best_change){
4368             int j= perm_scantable[ best_coeff ];
4369
4370             block[j] += best_change;
4371
4372             if(best_coeff > last_non_zero){
4373                 last_non_zero= best_coeff;
4374                 assert(block[j]);
4375 #ifdef REFINE_STATS
4376 after_last++;
4377 #endif
4378             }else{
4379 #ifdef REFINE_STATS
4380 if(block[j]){
4381     if(block[j] - best_change){
4382         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4383             raise++;
4384         }else{
4385             lower++;
4386         }
4387     }else{
4388         from_zero++;
4389     }
4390 }else{
4391     to_zero++;
4392 }
4393 #endif
4394                 for(; last_non_zero>=start_i; last_non_zero--){
4395                     if(block[perm_scantable[last_non_zero]])
4396                         break;
4397                 }
4398             }
4399 #ifdef REFINE_STATS
4400 count++;
4401 if(256*256*256*64 % count == 0){
4402     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4403 }
4404 #endif
4405             run=0;
4406             rle_index=0;
4407             for(i=start_i; i<=last_non_zero; i++){
4408                 int j= perm_scantable[i];
4409                 const int level= block[j];
4410
4411                  if(level){
4412                      run_tab[rle_index++]=run;
4413                      run=0;
4414                  }else{
4415                      run++;
4416                  }
4417             }
4418
4419             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4420         }else{
4421             break;
4422         }
4423     }
4424 #ifdef REFINE_STATS
4425 if(last_non_zero>0){
4426 STOP_TIMER("iterative search")
4427 }
4428 }
4429 #endif
4430
4431     return last_non_zero;
4432 }
4433
4434 /**
4435  * Permute an 8x8 block according to permutation.
4436  * @param block the block which will be permuted according to
4437  *              the given permutation vector
4438  * @param permutation the permutation vector
4439  * @param last the last non zero coefficient in scantable order, used to
4440  *             speed the permutation up
4441  * @param scantable the used scantable, this is only used to speed the
4442  *                  permutation up, the block is not (inverse) permutated
4443  *                  to scantable order!
4444  */
4445 static void block_permute(int16_t *block, uint8_t *permutation,
4446                           const uint8_t *scantable, int last)
4447 {
4448     int i;
4449     int16_t temp[64];
4450
4451     if (last <= 0)
4452         return;
4453     //FIXME it is ok but not clean and might fail for some permutations
4454     // if (permutation[1] == 1)
4455     // return;
4456
4457     for (i = 0; i <= last; i++) {
4458         const int j = scantable[i];
4459         temp[j] = block[j];
4460         block[j] = 0;
4461     }
4462
4463     for (i = 0; i <= last; i++) {
4464         const int j = scantable[i];
4465         const int perm_j = permutation[j];
4466         block[perm_j] = temp[j];
4467     }
4468 }
4469
4470 int ff_dct_quantize_c(MpegEncContext *s,
4471                         int16_t *block, int n,
4472                         int qscale, int *overflow)
4473 {
4474     int i, j, level, last_non_zero, q, start_i;
4475     const int *qmat;
4476     const uint8_t *scantable= s->intra_scantable.scantable;
4477     int bias;
4478     int max=0;
4479     unsigned int threshold1, threshold2;
4480
4481     s->fdsp.fdct(block);
4482
4483     if(s->dct_error_sum)
4484         s->denoise_dct(s, block);
4485
4486     if (s->mb_intra) {
4487         if (!s->h263_aic) {
4488             if (n < 4)
4489                 q = s->y_dc_scale;
4490             else
4491                 q = s->c_dc_scale;
4492             q = q << 3;
4493         } else
4494             /* For AIC we skip quant/dequant of INTRADC */
4495             q = 1 << 3;
4496
4497         /* note: block[0] is assumed to be positive */
4498         block[0] = (block[0] + (q >> 1)) / q;
4499         start_i = 1;
4500         last_non_zero = 0;
4501         qmat = s->q_intra_matrix[qscale];
4502         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4503     } else {
4504         start_i = 0;
4505         last_non_zero = -1;
4506         qmat = s->q_inter_matrix[qscale];
4507         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4508     }
4509     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4510     threshold2= (threshold1<<1);
4511     for(i=63;i>=start_i;i--) {
4512         j = scantable[i];
4513         level = block[j] * qmat[j];
4514
4515         if(((unsigned)(level+threshold1))>threshold2){
4516             last_non_zero = i;
4517             break;
4518         }else{
4519             block[j]=0;
4520         }
4521     }
4522     for(i=start_i; i<=last_non_zero; i++) {
4523         j = scantable[i];
4524         level = block[j] * qmat[j];
4525
4526 //        if(   bias+level >= (1<<QMAT_SHIFT)
4527 //           || bias-level >= (1<<QMAT_SHIFT)){
4528         if(((unsigned)(level+threshold1))>threshold2){
4529             if(level>0){
4530                 level= (bias + level)>>QMAT_SHIFT;
4531                 block[j]= level;
4532             }else{
4533                 level= (bias - level)>>QMAT_SHIFT;
4534                 block[j]= -level;
4535             }
4536             max |=level;
4537         }else{
4538             block[j]=0;
4539         }
4540     }
4541     *overflow= s->max_qcoeff < max; //overflow might have happened
4542
4543     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4544     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4545         block_permute(block, s->idsp.idct_permutation,
4546                       scantable, last_non_zero);
4547
4548     return last_non_zero;
4549 }
4550
4551 #define OFFSET(x) offsetof(MpegEncContext, x)
4552 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4553 static const AVOption h263_options[] = {
4554     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4555     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4556     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4557     FF_MPV_COMMON_OPTS
4558     { NULL },
4559 };
4560
4561 static const AVClass h263_class = {
4562     .class_name = "H.263 encoder",
4563     .item_name  = av_default_item_name,
4564     .option     = h263_options,
4565     .version    = LIBAVUTIL_VERSION_INT,
4566 };
4567
4568 AVCodec ff_h263_encoder = {
4569     .name           = "h263",
4570     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4571     .type           = AVMEDIA_TYPE_VIDEO,
4572     .id             = AV_CODEC_ID_H263,
4573     .priv_data_size = sizeof(MpegEncContext),
4574     .init           = ff_mpv_encode_init,
4575     .encode2        = ff_mpv_encode_picture,
4576     .close          = ff_mpv_encode_end,
4577     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4578     .priv_class     = &h263_class,
4579 };
4580
4581 static const AVOption h263p_options[] = {
4582     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4583     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4584     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4585     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4586     FF_MPV_COMMON_OPTS
4587     { NULL },
4588 };
4589 static const AVClass h263p_class = {
4590     .class_name = "H.263p encoder",
4591     .item_name  = av_default_item_name,
4592     .option     = h263p_options,
4593     .version    = LIBAVUTIL_VERSION_INT,
4594 };
4595
4596 AVCodec ff_h263p_encoder = {
4597     .name           = "h263p",
4598     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4599     .type           = AVMEDIA_TYPE_VIDEO,
4600     .id             = AV_CODEC_ID_H263P,
4601     .priv_data_size = sizeof(MpegEncContext),
4602     .init           = ff_mpv_encode_init,
4603     .encode2        = ff_mpv_encode_picture,
4604     .close          = ff_mpv_encode_end,
4605     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4606     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4607     .priv_class     = &h263p_class,
4608 };
4609
4610 static const AVClass msmpeg4v2_class = {
4611     .class_name = "msmpeg4v2 encoder",
4612     .item_name  = av_default_item_name,
4613     .option     = ff_mpv_generic_options,
4614     .version    = LIBAVUTIL_VERSION_INT,
4615 };
4616
4617 AVCodec ff_msmpeg4v2_encoder = {
4618     .name           = "msmpeg4v2",
4619     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4620     .type           = AVMEDIA_TYPE_VIDEO,
4621     .id             = AV_CODEC_ID_MSMPEG4V2,
4622     .priv_data_size = sizeof(MpegEncContext),
4623     .init           = ff_mpv_encode_init,
4624     .encode2        = ff_mpv_encode_picture,
4625     .close          = ff_mpv_encode_end,
4626     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4627     .priv_class     = &msmpeg4v2_class,
4628 };
4629
4630 static const AVClass msmpeg4v3_class = {
4631     .class_name = "msmpeg4v3 encoder",
4632     .item_name  = av_default_item_name,
4633     .option     = ff_mpv_generic_options,
4634     .version    = LIBAVUTIL_VERSION_INT,
4635 };
4636
4637 AVCodec ff_msmpeg4v3_encoder = {
4638     .name           = "msmpeg4",
4639     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4640     .type           = AVMEDIA_TYPE_VIDEO,
4641     .id             = AV_CODEC_ID_MSMPEG4V3,
4642     .priv_data_size = sizeof(MpegEncContext),
4643     .init           = ff_mpv_encode_init,
4644     .encode2        = ff_mpv_encode_picture,
4645     .close          = ff_mpv_encode_end,
4646     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4647     .priv_class     = &msmpeg4v3_class,
4648 };
4649
4650 static const AVClass wmv1_class = {
4651     .class_name = "wmv1 encoder",
4652     .item_name  = av_default_item_name,
4653     .option     = ff_mpv_generic_options,
4654     .version    = LIBAVUTIL_VERSION_INT,
4655 };
4656
4657 AVCodec ff_wmv1_encoder = {
4658     .name           = "wmv1",
4659     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4660     .type           = AVMEDIA_TYPE_VIDEO,
4661     .id             = AV_CODEC_ID_WMV1,
4662     .priv_data_size = sizeof(MpegEncContext),
4663     .init           = ff_mpv_encode_init,
4664     .encode2        = ff_mpv_encode_picture,
4665     .close          = ff_mpv_encode_end,
4666     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4667     .priv_class     = &wmv1_class,
4668 };