]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
mjpegdec: consider chroma subsampling in size check
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /*
26  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
27  */
28
29 /**
30  * @file
31  * The simplest mpeg encoder (well, it was the simplest!).
32  */
33
34 #include <stdint.h>
35
36 #include "libavutil/internal.h"
37 #include "libavutil/intmath.h"
38 #include "libavutil/mathematics.h"
39 #include "libavutil/pixdesc.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/timer.h"
42 #include "avcodec.h"
43 #include "dct.h"
44 #include "idctdsp.h"
45 #include "mpeg12.h"
46 #include "mpegvideo.h"
47 #include "mpegvideodata.h"
48 #include "h261.h"
49 #include "h263.h"
50 #include "h263data.h"
51 #include "mjpegenc_common.h"
52 #include "mathops.h"
53 #include "mpegutils.h"
54 #include "mjpegenc.h"
55 #include "msmpeg4.h"
56 #include "pixblockdsp.h"
57 #include "qpeldsp.h"
58 #include "faandct.h"
59 #include "thread.h"
60 #include "aandcttab.h"
61 #include "flv.h"
62 #include "mpeg4video.h"
63 #include "internal.h"
64 #include "bytestream.h"
65 #include "wmv2.h"
66 #include "rv10.h"
67 #include <limits.h>
68 #include "sp5x.h"
69
70 #define QUANT_BIAS_SHIFT 8
71
72 #define QMAT_SHIFT_MMX 16
73 #define QMAT_SHIFT 21
74
75 static int encode_picture(MpegEncContext *s, int picture_number);
76 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
77 static int sse_mb(MpegEncContext *s);
78 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
79 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
80
81 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
82 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
83
84 const AVOption ff_mpv_generic_options[] = {
85     FF_MPV_COMMON_OPTS
86     { NULL },
87 };
88
89 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
90                        uint16_t (*qmat16)[2][64],
91                        const uint16_t *quant_matrix,
92                        int bias, int qmin, int qmax, int intra)
93 {
94     FDCTDSPContext *fdsp = &s->fdsp;
95     int qscale;
96     int shift = 0;
97
98     for (qscale = qmin; qscale <= qmax; qscale++) {
99         int i;
100         int qscale2;
101
102         if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
103         else                 qscale2 = qscale << 1;
104
105         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
106 #if CONFIG_FAANDCT
107             fdsp->fdct == ff_faandct            ||
108 #endif /* CONFIG_FAANDCT */
109             fdsp->fdct == ff_jpeg_fdct_islow_10) {
110             for (i = 0; i < 64; i++) {
111                 const int j = s->idsp.idct_permutation[i];
112                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
113                 /* 16 <= qscale * quant_matrix[i] <= 7905
114                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
115                  *             19952 <=              x  <= 249205026
116                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
117                  *           3444240 >= (1 << 36) / (x) >= 275 */
118
119                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
120             }
121         } else if (fdsp->fdct == ff_fdct_ifast) {
122             for (i = 0; i < 64; i++) {
123                 const int j = s->idsp.idct_permutation[i];
124                 int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
125                 /* 16 <= qscale * quant_matrix[i] <= 7905
126                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
127                  *             19952 <=              x  <= 249205026
128                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
129                  *           3444240 >= (1 << 36) / (x) >= 275 */
130
131                 qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
132             }
133         } else {
134             for (i = 0; i < 64; i++) {
135                 const int j = s->idsp.idct_permutation[i];
136                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
137                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
138                  * Assume x = qscale * quant_matrix[i]
139                  * So             16 <=              x  <= 7905
140                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
141                  * so          32768 >= (1 << 19) / (x) >= 67 */
142                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
143                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
144                 //                    (qscale * quant_matrix[i]);
145                 qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
146
147                 if (qmat16[qscale][0][i] == 0 ||
148                     qmat16[qscale][0][i] == 128 * 256)
149                     qmat16[qscale][0][i] = 128 * 256 - 1;
150                 qmat16[qscale][1][i] =
151                     ROUNDED_DIV(bias * (1<<(16 - QUANT_BIAS_SHIFT)),
152                                 qmat16[qscale][0][i]);
153             }
154         }
155
156         for (i = intra; i < 64; i++) {
157             int64_t max = 8191;
158             if (fdsp->fdct == ff_fdct_ifast) {
159                 max = (8191LL * ff_aanscales[i]) >> 14;
160             }
161             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
162                 shift++;
163             }
164         }
165     }
166     if (shift) {
167         av_log(NULL, AV_LOG_INFO,
168                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
169                QMAT_SHIFT - shift);
170     }
171 }
172
173 static inline void update_qscale(MpegEncContext *s)
174 {
175     if (s->q_scale_type == 1 && 0) {
176         int i;
177         int bestdiff=INT_MAX;
178         int best = 1;
179
180         for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
181             int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
182             if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
183                 (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
184                 continue;
185             if (diff < bestdiff) {
186                 bestdiff = diff;
187                 best = i;
188             }
189         }
190         s->qscale = best;
191     } else {
192         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
193                     (FF_LAMBDA_SHIFT + 7);
194         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
195     }
196
197     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
198                  FF_LAMBDA_SHIFT;
199 }
200
201 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
202 {
203     int i;
204
205     if (matrix) {
206         put_bits(pb, 1, 1);
207         for (i = 0; i < 64; i++) {
208             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
209         }
210     } else
211         put_bits(pb, 1, 0);
212 }
213
214 /**
215  * init s->current_picture.qscale_table from s->lambda_table
216  */
217 void ff_init_qscale_tab(MpegEncContext *s)
218 {
219     int8_t * const qscale_table = s->current_picture.qscale_table;
220     int i;
221
222     for (i = 0; i < s->mb_num; i++) {
223         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
224         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
225         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
226                                                   s->avctx->qmax);
227     }
228 }
229
230 static void update_duplicate_context_after_me(MpegEncContext *dst,
231                                               MpegEncContext *src)
232 {
233 #define COPY(a) dst->a= src->a
234     COPY(pict_type);
235     COPY(current_picture);
236     COPY(f_code);
237     COPY(b_code);
238     COPY(qscale);
239     COPY(lambda);
240     COPY(lambda2);
241     COPY(picture_in_gop_number);
242     COPY(gop_picture_number);
243     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
244     COPY(progressive_frame);    // FIXME don't set in encode_header
245     COPY(partitioned_frame);    // FIXME don't set in encode_header
246 #undef COPY
247 }
248
249 /**
250  * Set the given MpegEncContext to defaults for encoding.
251  * the changed fields will not depend upon the prior state of the MpegEncContext.
252  */
253 static void mpv_encode_defaults(MpegEncContext *s)
254 {
255     int i;
256     ff_mpv_common_defaults(s);
257
258     for (i = -16; i < 16; i++) {
259         default_fcode_tab[i + MAX_MV] = 1;
260     }
261     s->me.mv_penalty = default_mv_penalty;
262     s->fcode_tab     = default_fcode_tab;
263
264     s->input_picture_number  = 0;
265     s->picture_in_gop_number = 0;
266 }
267
268 av_cold int ff_dct_encode_init(MpegEncContext *s) {
269     if (ARCH_X86)
270         ff_dct_encode_init_x86(s);
271
272     if (CONFIG_H263_ENCODER)
273         ff_h263dsp_init(&s->h263dsp);
274     if (!s->dct_quantize)
275         s->dct_quantize = ff_dct_quantize_c;
276     if (!s->denoise_dct)
277         s->denoise_dct  = denoise_dct_c;
278     s->fast_dct_quantize = s->dct_quantize;
279     if (s->avctx->trellis)
280         s->dct_quantize  = dct_quantize_trellis_c;
281
282     return 0;
283 }
284
285 /* init video encoder */
286 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
287 {
288     MpegEncContext *s = avctx->priv_data;
289     int i, ret, format_supported;
290
291     mpv_encode_defaults(s);
292
293     switch (avctx->codec_id) {
294     case AV_CODEC_ID_MPEG2VIDEO:
295         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
296             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
297             av_log(avctx, AV_LOG_ERROR,
298                    "only YUV420 and YUV422 are supported\n");
299             return -1;
300         }
301         break;
302     case AV_CODEC_ID_MJPEG:
303     case AV_CODEC_ID_AMV:
304         format_supported = 0;
305         /* JPEG color space */
306         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
307             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
308             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
309             (avctx->color_range == AVCOL_RANGE_JPEG &&
310              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
311               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
312               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
313             format_supported = 1;
314         /* MPEG color space */
315         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
316                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
317                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
318                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
319             format_supported = 1;
320
321         if (!format_supported) {
322             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
323             return -1;
324         }
325         break;
326     default:
327         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
328             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
329             return -1;
330         }
331     }
332
333     switch (avctx->pix_fmt) {
334     case AV_PIX_FMT_YUVJ444P:
335     case AV_PIX_FMT_YUV444P:
336         s->chroma_format = CHROMA_444;
337         break;
338     case AV_PIX_FMT_YUVJ422P:
339     case AV_PIX_FMT_YUV422P:
340         s->chroma_format = CHROMA_422;
341         break;
342     case AV_PIX_FMT_YUVJ420P:
343     case AV_PIX_FMT_YUV420P:
344     default:
345         s->chroma_format = CHROMA_420;
346         break;
347     }
348
349     s->bit_rate = avctx->bit_rate;
350     s->width    = avctx->width;
351     s->height   = avctx->height;
352     if (avctx->gop_size > 600 &&
353         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
354         av_log(avctx, AV_LOG_WARNING,
355                "keyframe interval too large!, reducing it from %d to %d\n",
356                avctx->gop_size, 600);
357         avctx->gop_size = 600;
358     }
359     s->gop_size     = avctx->gop_size;
360     s->avctx        = avctx;
361     if (avctx->max_b_frames > MAX_B_FRAMES) {
362         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
363                "is %d.\n", MAX_B_FRAMES);
364         avctx->max_b_frames = MAX_B_FRAMES;
365     }
366     s->max_b_frames = avctx->max_b_frames;
367     s->codec_id     = avctx->codec->id;
368     s->strict_std_compliance = avctx->strict_std_compliance;
369     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
370     s->mpeg_quant         = avctx->mpeg_quant;
371     s->rtp_mode           = !!avctx->rtp_payload_size;
372     s->intra_dc_precision = avctx->intra_dc_precision;
373
374     // workaround some differences between how applications specify dc precision
375     if (s->intra_dc_precision < 0) {
376         s->intra_dc_precision += 8;
377     } else if (s->intra_dc_precision >= 8)
378         s->intra_dc_precision -= 8;
379
380     if (s->intra_dc_precision < 0) {
381         av_log(avctx, AV_LOG_ERROR,
382                 "intra dc precision must be positive, note some applications use"
383                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
384         return AVERROR(EINVAL);
385     }
386
387     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
388         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
389         return AVERROR(EINVAL);
390     }
391     s->user_specified_pts = AV_NOPTS_VALUE;
392
393     if (s->gop_size <= 1) {
394         s->intra_only = 1;
395         s->gop_size   = 12;
396     } else {
397         s->intra_only = 0;
398     }
399
400 #if FF_API_MOTION_EST
401 FF_DISABLE_DEPRECATION_WARNINGS
402     s->me_method = avctx->me_method;
403 FF_ENABLE_DEPRECATION_WARNINGS
404 #endif
405
406     /* Fixed QSCALE */
407     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
408
409 #if FF_API_MPV_OPT
410     FF_DISABLE_DEPRECATION_WARNINGS
411     if (avctx->border_masking != 0.0)
412         s->border_masking = avctx->border_masking;
413     FF_ENABLE_DEPRECATION_WARNINGS
414 #endif
415
416     s->adaptive_quant = (s->avctx->lumi_masking ||
417                          s->avctx->dark_masking ||
418                          s->avctx->temporal_cplx_masking ||
419                          s->avctx->spatial_cplx_masking  ||
420                          s->avctx->p_masking      ||
421                          s->border_masking ||
422                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
423                         !s->fixed_qscale;
424
425     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
426
427     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
428         switch(avctx->codec_id) {
429         case AV_CODEC_ID_MPEG1VIDEO:
430         case AV_CODEC_ID_MPEG2VIDEO:
431             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
432             break;
433         case AV_CODEC_ID_MPEG4:
434         case AV_CODEC_ID_MSMPEG4V1:
435         case AV_CODEC_ID_MSMPEG4V2:
436         case AV_CODEC_ID_MSMPEG4V3:
437             if       (avctx->rc_max_rate >= 15000000) {
438                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
439             } else if(avctx->rc_max_rate >=  2000000) {
440                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
441             } else if(avctx->rc_max_rate >=   384000) {
442                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
443             } else
444                 avctx->rc_buffer_size = 40;
445             avctx->rc_buffer_size *= 16384;
446             break;
447         }
448         if (avctx->rc_buffer_size) {
449             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
450         }
451     }
452
453     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
454         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
455         return -1;
456     }
457
458     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
459         av_log(avctx, AV_LOG_INFO,
460                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
461     }
462
463     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
464         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
465         return -1;
466     }
467
468     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
469         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
470         return -1;
471     }
472
473     if (avctx->rc_max_rate &&
474         avctx->rc_max_rate == avctx->bit_rate &&
475         avctx->rc_max_rate != avctx->rc_min_rate) {
476         av_log(avctx, AV_LOG_INFO,
477                "impossible bitrate constraints, this will fail\n");
478     }
479
480     if (avctx->rc_buffer_size &&
481         avctx->bit_rate * (int64_t)avctx->time_base.num >
482             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
483         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
484         return -1;
485     }
486
487     if (!s->fixed_qscale &&
488         avctx->bit_rate * av_q2d(avctx->time_base) >
489             avctx->bit_rate_tolerance) {
490         av_log(avctx, AV_LOG_WARNING,
491                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, (int64_t)avctx->bit_rate);
492         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
493     }
494
495     if (s->avctx->rc_max_rate &&
496         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
497         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
498          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
499         90000LL * (avctx->rc_buffer_size - 1) >
500             s->avctx->rc_max_rate * 0xFFFFLL) {
501         av_log(avctx, AV_LOG_INFO,
502                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
503                "specified vbv buffer is too large for the given bitrate!\n");
504     }
505
506     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
507         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
508         s->codec_id != AV_CODEC_ID_FLV1) {
509         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
510         return -1;
511     }
512
513     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
514         av_log(avctx, AV_LOG_ERROR,
515                "OBMC is only supported with simple mb decision\n");
516         return -1;
517     }
518
519     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
520         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
521         return -1;
522     }
523
524     if (s->max_b_frames                    &&
525         s->codec_id != AV_CODEC_ID_MPEG4      &&
526         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
527         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
528         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
529         return -1;
530     }
531     if (s->max_b_frames < 0) {
532         av_log(avctx, AV_LOG_ERROR,
533                "max b frames must be 0 or positive for mpegvideo based encoders\n");
534         return -1;
535     }
536
537     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
538          s->codec_id == AV_CODEC_ID_H263  ||
539          s->codec_id == AV_CODEC_ID_H263P) &&
540         (avctx->sample_aspect_ratio.num > 255 ||
541          avctx->sample_aspect_ratio.den > 255)) {
542         av_log(avctx, AV_LOG_WARNING,
543                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
544                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
545         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
546                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
547     }
548
549     if ((s->codec_id == AV_CODEC_ID_H263  ||
550          s->codec_id == AV_CODEC_ID_H263P) &&
551         (avctx->width  > 2048 ||
552          avctx->height > 1152 )) {
553         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
554         return -1;
555     }
556     if ((s->codec_id == AV_CODEC_ID_H263  ||
557          s->codec_id == AV_CODEC_ID_H263P) &&
558         ((avctx->width &3) ||
559          (avctx->height&3) )) {
560         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
561         return -1;
562     }
563
564     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
565         (avctx->width  > 4095 ||
566          avctx->height > 4095 )) {
567         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
568         return -1;
569     }
570
571     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
572         (avctx->width  > 16383 ||
573          avctx->height > 16383 )) {
574         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
575         return -1;
576     }
577
578     if (s->codec_id == AV_CODEC_ID_RV10 &&
579         (avctx->width &15 ||
580          avctx->height&15 )) {
581         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
582         return AVERROR(EINVAL);
583     }
584
585     if (s->codec_id == AV_CODEC_ID_RV20 &&
586         (avctx->width &3 ||
587          avctx->height&3 )) {
588         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
589         return AVERROR(EINVAL);
590     }
591
592     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
593          s->codec_id == AV_CODEC_ID_WMV2) &&
594          avctx->width & 1) {
595          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
596          return -1;
597     }
598
599     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
600         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
601         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
602         return -1;
603     }
604
605     // FIXME mpeg2 uses that too
606     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
607                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
608         av_log(avctx, AV_LOG_ERROR,
609                "mpeg2 style quantization not supported by codec\n");
610         return -1;
611     }
612
613     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
614         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
615         return -1;
616     }
617
618     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
619         s->avctx->mb_decision != FF_MB_DECISION_RD) {
620         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
621         return -1;
622     }
623
624     if (s->avctx->scenechange_threshold < 1000000000 &&
625         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
626         av_log(avctx, AV_LOG_ERROR,
627                "closed gop with scene change detection are not supported yet, "
628                "set threshold to 1000000000\n");
629         return -1;
630     }
631
632     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
633         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
634             av_log(avctx, AV_LOG_ERROR,
635                   "low delay forcing is only available for mpeg2\n");
636             return -1;
637         }
638         if (s->max_b_frames != 0) {
639             av_log(avctx, AV_LOG_ERROR,
640                    "b frames cannot be used with low delay\n");
641             return -1;
642         }
643     }
644
645     if (s->q_scale_type == 1) {
646         if (avctx->qmax > 28) {
647             av_log(avctx, AV_LOG_ERROR,
648                    "non linear quant only supports qmax <= 28 currently\n");
649             return -1;
650         }
651     }
652
653     if (avctx->slices > 1 &&
654         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
655         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
656         return AVERROR(EINVAL);
657     }
658
659     if (s->avctx->thread_count > 1         &&
660         s->codec_id != AV_CODEC_ID_MPEG4      &&
661         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
662         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
663         s->codec_id != AV_CODEC_ID_MJPEG      &&
664         (s->codec_id != AV_CODEC_ID_H263P)) {
665         av_log(avctx, AV_LOG_ERROR,
666                "multi threaded encoding not supported by codec\n");
667         return -1;
668     }
669
670     if (s->avctx->thread_count < 1) {
671         av_log(avctx, AV_LOG_ERROR,
672                "automatic thread number detection not supported by codec, "
673                "patch welcome\n");
674         return -1;
675     }
676
677     if (!avctx->time_base.den || !avctx->time_base.num) {
678         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
679         return -1;
680     }
681
682     if (avctx->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
683         av_log(avctx, AV_LOG_INFO,
684                "notice: b_frame_strategy only affects the first pass\n");
685         avctx->b_frame_strategy = 0;
686     }
687
688     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
689     if (i > 1) {
690         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
691         avctx->time_base.den /= i;
692         avctx->time_base.num /= i;
693         //return -1;
694     }
695
696     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
697         // (a + x * 3 / 8) / x
698         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
699         s->inter_quant_bias = 0;
700     } else {
701         s->intra_quant_bias = 0;
702         // (a - x / 4) / x
703         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
704     }
705
706     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
707         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
708         return AVERROR(EINVAL);
709     }
710
711 #if FF_API_QUANT_BIAS
712 FF_DISABLE_DEPRECATION_WARNINGS
713     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
714         s->intra_quant_bias = avctx->intra_quant_bias;
715     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
716         s->inter_quant_bias = avctx->inter_quant_bias;
717 FF_ENABLE_DEPRECATION_WARNINGS
718 #endif
719
720     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
721
722     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
723         s->avctx->time_base.den > (1 << 16) - 1) {
724         av_log(avctx, AV_LOG_ERROR,
725                "timebase %d/%d not supported by MPEG 4 standard, "
726                "the maximum admitted value for the timebase denominator "
727                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
728                (1 << 16) - 1);
729         return -1;
730     }
731     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
732
733     switch (avctx->codec->id) {
734     case AV_CODEC_ID_MPEG1VIDEO:
735         s->out_format = FMT_MPEG1;
736         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
737         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
738         break;
739     case AV_CODEC_ID_MPEG2VIDEO:
740         s->out_format = FMT_MPEG1;
741         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
742         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
743         s->rtp_mode   = 1;
744         break;
745     case AV_CODEC_ID_MJPEG:
746     case AV_CODEC_ID_AMV:
747         s->out_format = FMT_MJPEG;
748         s->intra_only = 1; /* force intra only for jpeg */
749         if (!CONFIG_MJPEG_ENCODER ||
750             ff_mjpeg_encode_init(s) < 0)
751             return -1;
752         avctx->delay = 0;
753         s->low_delay = 1;
754         break;
755     case AV_CODEC_ID_H261:
756         if (!CONFIG_H261_ENCODER)
757             return -1;
758         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
759             av_log(avctx, AV_LOG_ERROR,
760                    "The specified picture size of %dx%d is not valid for the "
761                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
762                     s->width, s->height);
763             return -1;
764         }
765         s->out_format = FMT_H261;
766         avctx->delay  = 0;
767         s->low_delay  = 1;
768         s->rtp_mode   = 0; /* Sliced encoding not supported */
769         break;
770     case AV_CODEC_ID_H263:
771         if (!CONFIG_H263_ENCODER)
772             return -1;
773         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
774                              s->width, s->height) == 8) {
775             av_log(avctx, AV_LOG_ERROR,
776                    "The specified picture size of %dx%d is not valid for "
777                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
778                    "352x288, 704x576, and 1408x1152. "
779                    "Try H.263+.\n", s->width, s->height);
780             return -1;
781         }
782         s->out_format = FMT_H263;
783         avctx->delay  = 0;
784         s->low_delay  = 1;
785         break;
786     case AV_CODEC_ID_H263P:
787         s->out_format = FMT_H263;
788         s->h263_plus  = 1;
789         /* Fx */
790         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
791         s->modified_quant  = s->h263_aic;
792         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
793         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
794
795         /* /Fx */
796         /* These are just to be sure */
797         avctx->delay = 0;
798         s->low_delay = 1;
799         break;
800     case AV_CODEC_ID_FLV1:
801         s->out_format      = FMT_H263;
802         s->h263_flv        = 2; /* format = 1; 11-bit codes */
803         s->unrestricted_mv = 1;
804         s->rtp_mode  = 0; /* don't allow GOB */
805         avctx->delay = 0;
806         s->low_delay = 1;
807         break;
808     case AV_CODEC_ID_RV10:
809         s->out_format = FMT_H263;
810         avctx->delay  = 0;
811         s->low_delay  = 1;
812         break;
813     case AV_CODEC_ID_RV20:
814         s->out_format      = FMT_H263;
815         avctx->delay       = 0;
816         s->low_delay       = 1;
817         s->modified_quant  = 1;
818         s->h263_aic        = 1;
819         s->h263_plus       = 1;
820         s->loop_filter     = 1;
821         s->unrestricted_mv = 0;
822         break;
823     case AV_CODEC_ID_MPEG4:
824         s->out_format      = FMT_H263;
825         s->h263_pred       = 1;
826         s->unrestricted_mv = 1;
827         s->low_delay       = s->max_b_frames ? 0 : 1;
828         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
829         break;
830     case AV_CODEC_ID_MSMPEG4V2:
831         s->out_format      = FMT_H263;
832         s->h263_pred       = 1;
833         s->unrestricted_mv = 1;
834         s->msmpeg4_version = 2;
835         avctx->delay       = 0;
836         s->low_delay       = 1;
837         break;
838     case AV_CODEC_ID_MSMPEG4V3:
839         s->out_format        = FMT_H263;
840         s->h263_pred         = 1;
841         s->unrestricted_mv   = 1;
842         s->msmpeg4_version   = 3;
843         s->flipflop_rounding = 1;
844         avctx->delay         = 0;
845         s->low_delay         = 1;
846         break;
847     case AV_CODEC_ID_WMV1:
848         s->out_format        = FMT_H263;
849         s->h263_pred         = 1;
850         s->unrestricted_mv   = 1;
851         s->msmpeg4_version   = 4;
852         s->flipflop_rounding = 1;
853         avctx->delay         = 0;
854         s->low_delay         = 1;
855         break;
856     case AV_CODEC_ID_WMV2:
857         s->out_format        = FMT_H263;
858         s->h263_pred         = 1;
859         s->unrestricted_mv   = 1;
860         s->msmpeg4_version   = 5;
861         s->flipflop_rounding = 1;
862         avctx->delay         = 0;
863         s->low_delay         = 1;
864         break;
865     default:
866         return -1;
867     }
868
869     avctx->has_b_frames = !s->low_delay;
870
871     s->encoding = 1;
872
873     s->progressive_frame    =
874     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
875                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
876                                 s->alternate_scan);
877
878     /* init */
879     ff_mpv_idct_init(s);
880     if (ff_mpv_common_init(s) < 0)
881         return -1;
882
883     ff_fdctdsp_init(&s->fdsp, avctx);
884     ff_me_cmp_init(&s->mecc, avctx);
885     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
886     ff_pixblockdsp_init(&s->pdsp, avctx);
887     ff_qpeldsp_init(&s->qdsp);
888
889     if (s->msmpeg4_version) {
890         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
891                           2 * 2 * (MAX_LEVEL + 1) *
892                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
893     }
894     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
895
896     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
897     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
898     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
899     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
900     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
901     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
902     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
903                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
904     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
905                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
906
907     if (s->avctx->noise_reduction) {
908         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
909                           2 * 64 * sizeof(uint16_t), fail);
910     }
911
912     ff_dct_encode_init(s);
913
914     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
915         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
916
917     if (s->slice_context_count > 1) {
918         s->rtp_mode = 1;
919
920         if (avctx->codec_id == AV_CODEC_ID_H263P)
921             s->h263_slice_structured = 1;
922     }
923
924     s->quant_precision = 5;
925
926     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
927     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
928
929     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
930         ff_h261_encode_init(s);
931     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
932         ff_h263_encode_init(s);
933     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
934         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
935             return ret;
936     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
937         && s->out_format == FMT_MPEG1)
938         ff_mpeg1_encode_init(s);
939
940     /* init q matrix */
941     for (i = 0; i < 64; i++) {
942         int j = s->idsp.idct_permutation[i];
943         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
944             s->mpeg_quant) {
945             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
946             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
947         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
948             s->intra_matrix[j] =
949             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
950         } else {
951             /* mpeg1/2 */
952             s->chroma_intra_matrix[j] =
953             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
954             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
955         }
956         if (s->avctx->intra_matrix)
957             s->intra_matrix[j] = s->avctx->intra_matrix[i];
958         if (s->avctx->inter_matrix)
959             s->inter_matrix[j] = s->avctx->inter_matrix[i];
960     }
961
962     /* precompute matrix */
963     /* for mjpeg, we do include qscale in the matrix */
964     if (s->out_format != FMT_MJPEG) {
965         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
966                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
967                           31, 1);
968         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
969                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
970                           31, 0);
971     }
972
973     if (ff_rate_control_init(s) < 0)
974         return -1;
975
976 #if FF_API_ERROR_RATE
977     FF_DISABLE_DEPRECATION_WARNINGS
978     if (avctx->error_rate)
979         s->error_rate = avctx->error_rate;
980     FF_ENABLE_DEPRECATION_WARNINGS;
981 #endif
982
983 #if FF_API_NORMALIZE_AQP
984     FF_DISABLE_DEPRECATION_WARNINGS
985     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
986         s->mpv_flags |= FF_MPV_FLAG_NAQ;
987     FF_ENABLE_DEPRECATION_WARNINGS;
988 #endif
989
990 #if FF_API_MV0
991     FF_DISABLE_DEPRECATION_WARNINGS
992     if (avctx->flags & CODEC_FLAG_MV0)
993         s->mpv_flags |= FF_MPV_FLAG_MV0;
994     FF_ENABLE_DEPRECATION_WARNINGS
995 #endif
996
997 #if FF_API_MPV_OPT
998     FF_DISABLE_DEPRECATION_WARNINGS
999     if (avctx->rc_qsquish != 0.0)
1000         s->rc_qsquish = avctx->rc_qsquish;
1001     if (avctx->rc_qmod_amp != 0.0)
1002         s->rc_qmod_amp = avctx->rc_qmod_amp;
1003     if (avctx->rc_qmod_freq)
1004         s->rc_qmod_freq = avctx->rc_qmod_freq;
1005     if (avctx->rc_buffer_aggressivity != 1.0)
1006         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
1007     if (avctx->rc_initial_cplx != 0.0)
1008         s->rc_initial_cplx = avctx->rc_initial_cplx;
1009     if (avctx->lmin)
1010         s->lmin = avctx->lmin;
1011     if (avctx->lmax)
1012         s->lmax = avctx->lmax;
1013
1014     if (avctx->rc_eq) {
1015         av_freep(&s->rc_eq);
1016         s->rc_eq = av_strdup(avctx->rc_eq);
1017         if (!s->rc_eq)
1018             return AVERROR(ENOMEM);
1019     }
1020     FF_ENABLE_DEPRECATION_WARNINGS
1021 #endif
1022
1023     if (avctx->b_frame_strategy == 2) {
1024         for (i = 0; i < s->max_b_frames + 2; i++) {
1025             s->tmp_frames[i] = av_frame_alloc();
1026             if (!s->tmp_frames[i])
1027                 return AVERROR(ENOMEM);
1028
1029             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
1030             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
1031             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
1032
1033             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
1034             if (ret < 0)
1035                 return ret;
1036         }
1037     }
1038
1039     return 0;
1040 fail:
1041     ff_mpv_encode_end(avctx);
1042     return AVERROR_UNKNOWN;
1043 }
1044
1045 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1046 {
1047     MpegEncContext *s = avctx->priv_data;
1048     int i;
1049
1050     ff_rate_control_uninit(s);
1051
1052     ff_mpv_common_end(s);
1053     if (CONFIG_MJPEG_ENCODER &&
1054         s->out_format == FMT_MJPEG)
1055         ff_mjpeg_encode_close(s);
1056
1057     av_freep(&avctx->extradata);
1058
1059     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1060         av_frame_free(&s->tmp_frames[i]);
1061
1062     ff_free_picture_tables(&s->new_picture);
1063     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1064
1065     av_freep(&s->avctx->stats_out);
1066     av_freep(&s->ac_stats);
1067
1068     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1069     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1070     s->q_chroma_intra_matrix=   NULL;
1071     s->q_chroma_intra_matrix16= NULL;
1072     av_freep(&s->q_intra_matrix);
1073     av_freep(&s->q_inter_matrix);
1074     av_freep(&s->q_intra_matrix16);
1075     av_freep(&s->q_inter_matrix16);
1076     av_freep(&s->input_picture);
1077     av_freep(&s->reordered_input_picture);
1078     av_freep(&s->dct_offset);
1079
1080     return 0;
1081 }
1082
1083 static int get_sae(uint8_t *src, int ref, int stride)
1084 {
1085     int x,y;
1086     int acc = 0;
1087
1088     for (y = 0; y < 16; y++) {
1089         for (x = 0; x < 16; x++) {
1090             acc += FFABS(src[x + y * stride] - ref);
1091         }
1092     }
1093
1094     return acc;
1095 }
1096
1097 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1098                            uint8_t *ref, int stride)
1099 {
1100     int x, y, w, h;
1101     int acc = 0;
1102
1103     w = s->width  & ~15;
1104     h = s->height & ~15;
1105
1106     for (y = 0; y < h; y += 16) {
1107         for (x = 0; x < w; x += 16) {
1108             int offset = x + y * stride;
1109             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1110                                       stride, 16);
1111             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1112             int sae  = get_sae(src + offset, mean, stride);
1113
1114             acc += sae + 500 < sad;
1115         }
1116     }
1117     return acc;
1118 }
1119
1120 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1121 {
1122     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1123                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1124                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1125                             &s->linesize, &s->uvlinesize);
1126 }
1127
1128 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1129 {
1130     Picture *pic = NULL;
1131     int64_t pts;
1132     int i, display_picture_number = 0, ret;
1133     int encoding_delay = s->max_b_frames ? s->max_b_frames
1134                                          : (s->low_delay ? 0 : 1);
1135     int flush_offset = 1;
1136     int direct = 1;
1137
1138     if (pic_arg) {
1139         pts = pic_arg->pts;
1140         display_picture_number = s->input_picture_number++;
1141
1142         if (pts != AV_NOPTS_VALUE) {
1143             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1144                 int64_t last = s->user_specified_pts;
1145
1146                 if (pts <= last) {
1147                     av_log(s->avctx, AV_LOG_ERROR,
1148                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1149                            pts, last);
1150                     return AVERROR(EINVAL);
1151                 }
1152
1153                 if (!s->low_delay && display_picture_number == 1)
1154                     s->dts_delta = pts - last;
1155             }
1156             s->user_specified_pts = pts;
1157         } else {
1158             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1159                 s->user_specified_pts =
1160                 pts = s->user_specified_pts + 1;
1161                 av_log(s->avctx, AV_LOG_INFO,
1162                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1163                        pts);
1164             } else {
1165                 pts = display_picture_number;
1166             }
1167         }
1168
1169         if (!pic_arg->buf[0] ||
1170             pic_arg->linesize[0] != s->linesize ||
1171             pic_arg->linesize[1] != s->uvlinesize ||
1172             pic_arg->linesize[2] != s->uvlinesize)
1173             direct = 0;
1174         if ((s->width & 15) || (s->height & 15))
1175             direct = 0;
1176         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1177             direct = 0;
1178         if (s->linesize & (STRIDE_ALIGN-1))
1179             direct = 0;
1180
1181         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1182                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1183
1184         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1185         if (i < 0)
1186             return i;
1187
1188         pic = &s->picture[i];
1189         pic->reference = 3;
1190
1191         if (direct) {
1192             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1193                 return ret;
1194         }
1195         ret = alloc_picture(s, pic, direct);
1196         if (ret < 0)
1197             return ret;
1198
1199         if (!direct) {
1200             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1201                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1202                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1203                 // empty
1204             } else {
1205                 int h_chroma_shift, v_chroma_shift;
1206                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1207                                                  &h_chroma_shift,
1208                                                  &v_chroma_shift);
1209
1210                 for (i = 0; i < 3; i++) {
1211                     int src_stride = pic_arg->linesize[i];
1212                     int dst_stride = i ? s->uvlinesize : s->linesize;
1213                     int h_shift = i ? h_chroma_shift : 0;
1214                     int v_shift = i ? v_chroma_shift : 0;
1215                     int w = s->width  >> h_shift;
1216                     int h = s->height >> v_shift;
1217                     uint8_t *src = pic_arg->data[i];
1218                     uint8_t *dst = pic->f->data[i];
1219                     int vpad = 16;
1220
1221                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1222                         && !s->progressive_sequence
1223                         && FFALIGN(s->height, 32) - s->height > 16)
1224                         vpad = 32;
1225
1226                     if (!s->avctx->rc_buffer_size)
1227                         dst += INPLACE_OFFSET;
1228
1229                     if (src_stride == dst_stride)
1230                         memcpy(dst, src, src_stride * h);
1231                     else {
1232                         int h2 = h;
1233                         uint8_t *dst2 = dst;
1234                         while (h2--) {
1235                             memcpy(dst2, src, w);
1236                             dst2 += dst_stride;
1237                             src += src_stride;
1238                         }
1239                     }
1240                     if ((s->width & 15) || (s->height & (vpad-1))) {
1241                         s->mpvencdsp.draw_edges(dst, dst_stride,
1242                                                 w, h,
1243                                                 16 >> h_shift,
1244                                                 vpad >> v_shift,
1245                                                 EDGE_BOTTOM);
1246                     }
1247                 }
1248             }
1249         }
1250         ret = av_frame_copy_props(pic->f, pic_arg);
1251         if (ret < 0)
1252             return ret;
1253
1254         pic->f->display_picture_number = display_picture_number;
1255         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1256     } else {
1257         /* Flushing: When we have not received enough input frames,
1258          * ensure s->input_picture[0] contains the first picture */
1259         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1260             if (s->input_picture[flush_offset])
1261                 break;
1262
1263         if (flush_offset <= 1)
1264             flush_offset = 1;
1265         else
1266             encoding_delay = encoding_delay - flush_offset + 1;
1267     }
1268
1269     /* shift buffer entries */
1270     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1271         s->input_picture[i - flush_offset] = s->input_picture[i];
1272
1273     s->input_picture[encoding_delay] = (Picture*) pic;
1274
1275     return 0;
1276 }
1277
1278 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1279 {
1280     int x, y, plane;
1281     int score = 0;
1282     int64_t score64 = 0;
1283
1284     for (plane = 0; plane < 3; plane++) {
1285         const int stride = p->f->linesize[plane];
1286         const int bw = plane ? 1 : 2;
1287         for (y = 0; y < s->mb_height * bw; y++) {
1288             for (x = 0; x < s->mb_width * bw; x++) {
1289                 int off = p->shared ? 0 : 16;
1290                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1291                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1292                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1293
1294                 switch (FFABS(s->avctx->frame_skip_exp)) {
1295                 case 0: score    =  FFMAX(score, v);          break;
1296                 case 1: score   += FFABS(v);                  break;
1297                 case 2: score64 += v * (int64_t)v;                       break;
1298                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1299                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1300                 }
1301             }
1302         }
1303     }
1304     emms_c();
1305
1306     if (score)
1307         score64 = score;
1308     if (s->avctx->frame_skip_exp < 0)
1309         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1310                       -1.0/s->avctx->frame_skip_exp);
1311
1312     if (score64 < s->avctx->frame_skip_threshold)
1313         return 1;
1314     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1315         return 1;
1316     return 0;
1317 }
1318
1319 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1320 {
1321     AVPacket pkt = { 0 };
1322     int ret, got_output;
1323
1324     av_init_packet(&pkt);
1325     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1326     if (ret < 0)
1327         return ret;
1328
1329     ret = pkt.size;
1330     av_packet_unref(&pkt);
1331     return ret;
1332 }
1333
1334 static int estimate_best_b_count(MpegEncContext *s)
1335 {
1336     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1337     AVCodecContext *c = avcodec_alloc_context3(NULL);
1338     const int scale = s->avctx->brd_scale;
1339     int i, j, out_size, p_lambda, b_lambda, lambda2;
1340     int64_t best_rd  = INT64_MAX;
1341     int best_b_count = -1;
1342
1343     if (!c)
1344         return AVERROR(ENOMEM);
1345     av_assert0(scale >= 0 && scale <= 3);
1346
1347     //emms_c();
1348     //s->next_picture_ptr->quality;
1349     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1350     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1351     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1352     if (!b_lambda) // FIXME we should do this somewhere else
1353         b_lambda = p_lambda;
1354     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1355                FF_LAMBDA_SHIFT;
1356
1357     c->width        = s->width  >> scale;
1358     c->height       = s->height >> scale;
1359     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1360     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1361     c->mb_decision  = s->avctx->mb_decision;
1362     c->me_cmp       = s->avctx->me_cmp;
1363     c->mb_cmp       = s->avctx->mb_cmp;
1364     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1365     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1366     c->time_base    = s->avctx->time_base;
1367     c->max_b_frames = s->max_b_frames;
1368
1369     if (avcodec_open2(c, codec, NULL) < 0)
1370         return -1;
1371
1372     for (i = 0; i < s->max_b_frames + 2; i++) {
1373         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1374                                                 s->next_picture_ptr;
1375         uint8_t *data[4];
1376
1377         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1378             pre_input = *pre_input_ptr;
1379             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1380
1381             if (!pre_input.shared && i) {
1382                 data[0] += INPLACE_OFFSET;
1383                 data[1] += INPLACE_OFFSET;
1384                 data[2] += INPLACE_OFFSET;
1385             }
1386
1387             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1388                                        s->tmp_frames[i]->linesize[0],
1389                                        data[0],
1390                                        pre_input.f->linesize[0],
1391                                        c->width, c->height);
1392             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1393                                        s->tmp_frames[i]->linesize[1],
1394                                        data[1],
1395                                        pre_input.f->linesize[1],
1396                                        c->width >> 1, c->height >> 1);
1397             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1398                                        s->tmp_frames[i]->linesize[2],
1399                                        data[2],
1400                                        pre_input.f->linesize[2],
1401                                        c->width >> 1, c->height >> 1);
1402         }
1403     }
1404
1405     for (j = 0; j < s->max_b_frames + 1; j++) {
1406         int64_t rd = 0;
1407
1408         if (!s->input_picture[j])
1409             break;
1410
1411         c->error[0] = c->error[1] = c->error[2] = 0;
1412
1413         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1414         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1415
1416         out_size = encode_frame(c, s->tmp_frames[0]);
1417
1418         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1419
1420         for (i = 0; i < s->max_b_frames + 1; i++) {
1421             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1422
1423             s->tmp_frames[i + 1]->pict_type = is_p ?
1424                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1425             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1426
1427             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1428
1429             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1430         }
1431
1432         /* get the delayed frames */
1433         while (out_size) {
1434             out_size = encode_frame(c, NULL);
1435             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1436         }
1437
1438         rd += c->error[0] + c->error[1] + c->error[2];
1439
1440         if (rd < best_rd) {
1441             best_rd = rd;
1442             best_b_count = j;
1443         }
1444     }
1445
1446     avcodec_close(c);
1447     av_freep(&c);
1448
1449     return best_b_count;
1450 }
1451
1452 static int select_input_picture(MpegEncContext *s)
1453 {
1454     int i, ret;
1455
1456     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1457         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1458     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1459
1460     /* set next picture type & ordering */
1461     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1462         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1463             if (s->picture_in_gop_number < s->gop_size &&
1464                 s->next_picture_ptr &&
1465                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1466                 // FIXME check that te gop check above is +-1 correct
1467                 av_frame_unref(s->input_picture[0]->f);
1468
1469                 ff_vbv_update(s, 0);
1470
1471                 goto no_output_pic;
1472             }
1473         }
1474
1475         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1476             !s->next_picture_ptr || s->intra_only) {
1477             s->reordered_input_picture[0] = s->input_picture[0];
1478             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1479             s->reordered_input_picture[0]->f->coded_picture_number =
1480                 s->coded_picture_number++;
1481         } else {
1482             int b_frames;
1483
1484             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1485                 for (i = 0; i < s->max_b_frames + 1; i++) {
1486                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1487
1488                     if (pict_num >= s->rc_context.num_entries)
1489                         break;
1490                     if (!s->input_picture[i]) {
1491                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1492                         break;
1493                     }
1494
1495                     s->input_picture[i]->f->pict_type =
1496                         s->rc_context.entry[pict_num].new_pict_type;
1497                 }
1498             }
1499
1500             if (s->avctx->b_frame_strategy == 0) {
1501                 b_frames = s->max_b_frames;
1502                 while (b_frames && !s->input_picture[b_frames])
1503                     b_frames--;
1504             } else if (s->avctx->b_frame_strategy == 1) {
1505                 for (i = 1; i < s->max_b_frames + 1; i++) {
1506                     if (s->input_picture[i] &&
1507                         s->input_picture[i]->b_frame_score == 0) {
1508                         s->input_picture[i]->b_frame_score =
1509                             get_intra_count(s,
1510                                             s->input_picture[i    ]->f->data[0],
1511                                             s->input_picture[i - 1]->f->data[0],
1512                                             s->linesize) + 1;
1513                     }
1514                 }
1515                 for (i = 0; i < s->max_b_frames + 1; i++) {
1516                     if (!s->input_picture[i] ||
1517                         s->input_picture[i]->b_frame_score - 1 >
1518                             s->mb_num / s->avctx->b_sensitivity)
1519                         break;
1520                 }
1521
1522                 b_frames = FFMAX(0, i - 1);
1523
1524                 /* reset scores */
1525                 for (i = 0; i < b_frames + 1; i++) {
1526                     s->input_picture[i]->b_frame_score = 0;
1527                 }
1528             } else if (s->avctx->b_frame_strategy == 2) {
1529                 b_frames = estimate_best_b_count(s);
1530             } else {
1531                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1532                 b_frames = 0;
1533             }
1534
1535             emms_c();
1536
1537             for (i = b_frames - 1; i >= 0; i--) {
1538                 int type = s->input_picture[i]->f->pict_type;
1539                 if (type && type != AV_PICTURE_TYPE_B)
1540                     b_frames = i;
1541             }
1542             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1543                 b_frames == s->max_b_frames) {
1544                 av_log(s->avctx, AV_LOG_ERROR,
1545                        "warning, too many b frames in a row\n");
1546             }
1547
1548             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1549                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1550                     s->gop_size > s->picture_in_gop_number) {
1551                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1552                 } else {
1553                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1554                         b_frames = 0;
1555                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1556                 }
1557             }
1558
1559             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1560                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1561                 b_frames--;
1562
1563             s->reordered_input_picture[0] = s->input_picture[b_frames];
1564             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1565                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1566             s->reordered_input_picture[0]->f->coded_picture_number =
1567                 s->coded_picture_number++;
1568             for (i = 0; i < b_frames; i++) {
1569                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1570                 s->reordered_input_picture[i + 1]->f->pict_type =
1571                     AV_PICTURE_TYPE_B;
1572                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1573                     s->coded_picture_number++;
1574             }
1575         }
1576     }
1577 no_output_pic:
1578     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1579
1580     if (s->reordered_input_picture[0]) {
1581         s->reordered_input_picture[0]->reference =
1582            s->reordered_input_picture[0]->f->pict_type !=
1583                AV_PICTURE_TYPE_B ? 3 : 0;
1584
1585         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1586             return ret;
1587
1588         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1589             // input is a shared pix, so we can't modifiy it -> alloc a new
1590             // one & ensure that the shared one is reuseable
1591
1592             Picture *pic;
1593             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1594             if (i < 0)
1595                 return i;
1596             pic = &s->picture[i];
1597
1598             pic->reference = s->reordered_input_picture[0]->reference;
1599             if (alloc_picture(s, pic, 0) < 0) {
1600                 return -1;
1601             }
1602
1603             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1604             if (ret < 0)
1605                 return ret;
1606
1607             /* mark us unused / free shared pic */
1608             av_frame_unref(s->reordered_input_picture[0]->f);
1609             s->reordered_input_picture[0]->shared = 0;
1610
1611             s->current_picture_ptr = pic;
1612         } else {
1613             // input is not a shared pix -> reuse buffer for current_pix
1614             s->current_picture_ptr = s->reordered_input_picture[0];
1615             for (i = 0; i < 4; i++) {
1616                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1617             }
1618         }
1619         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1620         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1621                                        s->current_picture_ptr)) < 0)
1622             return ret;
1623
1624         s->picture_number = s->new_picture.f->display_picture_number;
1625     }
1626     return 0;
1627 }
1628
1629 static void frame_end(MpegEncContext *s)
1630 {
1631     if (s->unrestricted_mv &&
1632         s->current_picture.reference &&
1633         !s->intra_only) {
1634         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1635         int hshift = desc->log2_chroma_w;
1636         int vshift = desc->log2_chroma_h;
1637         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1638                                 s->current_picture.f->linesize[0],
1639                                 s->h_edge_pos, s->v_edge_pos,
1640                                 EDGE_WIDTH, EDGE_WIDTH,
1641                                 EDGE_TOP | EDGE_BOTTOM);
1642         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1643                                 s->current_picture.f->linesize[1],
1644                                 s->h_edge_pos >> hshift,
1645                                 s->v_edge_pos >> vshift,
1646                                 EDGE_WIDTH >> hshift,
1647                                 EDGE_WIDTH >> vshift,
1648                                 EDGE_TOP | EDGE_BOTTOM);
1649         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1650                                 s->current_picture.f->linesize[2],
1651                                 s->h_edge_pos >> hshift,
1652                                 s->v_edge_pos >> vshift,
1653                                 EDGE_WIDTH >> hshift,
1654                                 EDGE_WIDTH >> vshift,
1655                                 EDGE_TOP | EDGE_BOTTOM);
1656     }
1657
1658     emms_c();
1659
1660     s->last_pict_type                 = s->pict_type;
1661     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1662     if (s->pict_type!= AV_PICTURE_TYPE_B)
1663         s->last_non_b_pict_type = s->pict_type;
1664
1665 #if FF_API_CODED_FRAME
1666 FF_DISABLE_DEPRECATION_WARNINGS
1667     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1668 FF_ENABLE_DEPRECATION_WARNINGS
1669 #endif
1670 #if FF_API_ERROR_FRAME
1671 FF_DISABLE_DEPRECATION_WARNINGS
1672     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1673            sizeof(s->current_picture.encoding_error));
1674 FF_ENABLE_DEPRECATION_WARNINGS
1675 #endif
1676 }
1677
1678 static void update_noise_reduction(MpegEncContext *s)
1679 {
1680     int intra, i;
1681
1682     for (intra = 0; intra < 2; intra++) {
1683         if (s->dct_count[intra] > (1 << 16)) {
1684             for (i = 0; i < 64; i++) {
1685                 s->dct_error_sum[intra][i] >>= 1;
1686             }
1687             s->dct_count[intra] >>= 1;
1688         }
1689
1690         for (i = 0; i < 64; i++) {
1691             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1692                                        s->dct_count[intra] +
1693                                        s->dct_error_sum[intra][i] / 2) /
1694                                       (s->dct_error_sum[intra][i] + 1);
1695         }
1696     }
1697 }
1698
1699 static int frame_start(MpegEncContext *s)
1700 {
1701     int ret;
1702
1703     /* mark & release old frames */
1704     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1705         s->last_picture_ptr != s->next_picture_ptr &&
1706         s->last_picture_ptr->f->buf[0]) {
1707         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1708     }
1709
1710     s->current_picture_ptr->f->pict_type = s->pict_type;
1711     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1712
1713     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1714     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1715                                    s->current_picture_ptr)) < 0)
1716         return ret;
1717
1718     if (s->pict_type != AV_PICTURE_TYPE_B) {
1719         s->last_picture_ptr = s->next_picture_ptr;
1720         if (!s->droppable)
1721             s->next_picture_ptr = s->current_picture_ptr;
1722     }
1723
1724     if (s->last_picture_ptr) {
1725         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1726         if (s->last_picture_ptr->f->buf[0] &&
1727             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1728                                        s->last_picture_ptr)) < 0)
1729             return ret;
1730     }
1731     if (s->next_picture_ptr) {
1732         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1733         if (s->next_picture_ptr->f->buf[0] &&
1734             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1735                                        s->next_picture_ptr)) < 0)
1736             return ret;
1737     }
1738
1739     if (s->picture_structure!= PICT_FRAME) {
1740         int i;
1741         for (i = 0; i < 4; i++) {
1742             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1743                 s->current_picture.f->data[i] +=
1744                     s->current_picture.f->linesize[i];
1745             }
1746             s->current_picture.f->linesize[i] *= 2;
1747             s->last_picture.f->linesize[i]    *= 2;
1748             s->next_picture.f->linesize[i]    *= 2;
1749         }
1750     }
1751
1752     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1753         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1754         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1755     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1756         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1757         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1758     } else {
1759         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1760         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1761     }
1762
1763     if (s->dct_error_sum) {
1764         av_assert2(s->avctx->noise_reduction && s->encoding);
1765         update_noise_reduction(s);
1766     }
1767
1768     return 0;
1769 }
1770
1771 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1772                           const AVFrame *pic_arg, int *got_packet)
1773 {
1774     MpegEncContext *s = avctx->priv_data;
1775     int i, stuffing_count, ret;
1776     int context_count = s->slice_context_count;
1777
1778     s->vbv_ignore_qmax = 0;
1779
1780     s->picture_in_gop_number++;
1781
1782     if (load_input_picture(s, pic_arg) < 0)
1783         return -1;
1784
1785     if (select_input_picture(s) < 0) {
1786         return -1;
1787     }
1788
1789     /* output? */
1790     if (s->new_picture.f->data[0]) {
1791         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1792         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1793                                               :
1794                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1795         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1796             return ret;
1797         if (s->mb_info) {
1798             s->mb_info_ptr = av_packet_new_side_data(pkt,
1799                                  AV_PKT_DATA_H263_MB_INFO,
1800                                  s->mb_width*s->mb_height*12);
1801             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1802         }
1803
1804         for (i = 0; i < context_count; i++) {
1805             int start_y = s->thread_context[i]->start_mb_y;
1806             int   end_y = s->thread_context[i]->  end_mb_y;
1807             int h       = s->mb_height;
1808             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1809             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1810
1811             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1812         }
1813
1814         s->pict_type = s->new_picture.f->pict_type;
1815         //emms_c();
1816         ret = frame_start(s);
1817         if (ret < 0)
1818             return ret;
1819 vbv_retry:
1820         ret = encode_picture(s, s->picture_number);
1821         if (growing_buffer) {
1822             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1823             pkt->data = s->pb.buf;
1824             pkt->size = avctx->internal->byte_buffer_size;
1825         }
1826         if (ret < 0)
1827             return -1;
1828
1829         avctx->header_bits = s->header_bits;
1830         avctx->mv_bits     = s->mv_bits;
1831         avctx->misc_bits   = s->misc_bits;
1832         avctx->i_tex_bits  = s->i_tex_bits;
1833         avctx->p_tex_bits  = s->p_tex_bits;
1834         avctx->i_count     = s->i_count;
1835         // FIXME f/b_count in avctx
1836         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1837         avctx->skip_count  = s->skip_count;
1838
1839         frame_end(s);
1840
1841         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1842             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1843
1844         if (avctx->rc_buffer_size) {
1845             RateControlContext *rcc = &s->rc_context;
1846             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1847             int hq = (s->avctx->mb_decision == FF_MB_DECISION_RD || s->avctx->trellis);
1848             int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
1849
1850             if (put_bits_count(&s->pb) > max_size &&
1851                 s->lambda < s->lmax) {
1852                 s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
1853                                        (s->qscale + 1) / s->qscale);
1854                 if (s->adaptive_quant) {
1855                     int i;
1856                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1857                         s->lambda_table[i] =
1858                             FFMAX(s->lambda_table[i] + min_step,
1859                                   s->lambda_table[i] * (s->qscale + 1) /
1860                                   s->qscale);
1861                 }
1862                 s->mb_skipped = 0;        // done in frame_start()
1863                 // done in encode_picture() so we must undo it
1864                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1865                     if (s->flipflop_rounding          ||
1866                         s->codec_id == AV_CODEC_ID_H263P ||
1867                         s->codec_id == AV_CODEC_ID_MPEG4)
1868                         s->no_rounding ^= 1;
1869                 }
1870                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1871                     s->time_base       = s->last_time_base;
1872                     s->last_non_b_time = s->time - s->pp_time;
1873                 }
1874                 for (i = 0; i < context_count; i++) {
1875                     PutBitContext *pb = &s->thread_context[i]->pb;
1876                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1877                 }
1878                 s->vbv_ignore_qmax = 1;
1879                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1880                 goto vbv_retry;
1881             }
1882
1883             av_assert0(s->avctx->rc_max_rate);
1884         }
1885
1886         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1887             ff_write_pass1_stats(s);
1888
1889         for (i = 0; i < 4; i++) {
1890             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1891             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1892         }
1893         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1894                                        s->current_picture_ptr->encoding_error,
1895                                        (s->avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1896                                        s->pict_type);
1897
1898         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1899             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1900                    avctx->i_tex_bits + avctx->p_tex_bits ==
1901                        put_bits_count(&s->pb));
1902         flush_put_bits(&s->pb);
1903         s->frame_bits  = put_bits_count(&s->pb);
1904
1905         stuffing_count = ff_vbv_update(s, s->frame_bits);
1906         s->stuffing_bits = 8*stuffing_count;
1907         if (stuffing_count) {
1908             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1909                     stuffing_count + 50) {
1910                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1911                 return -1;
1912             }
1913
1914             switch (s->codec_id) {
1915             case AV_CODEC_ID_MPEG1VIDEO:
1916             case AV_CODEC_ID_MPEG2VIDEO:
1917                 while (stuffing_count--) {
1918                     put_bits(&s->pb, 8, 0);
1919                 }
1920             break;
1921             case AV_CODEC_ID_MPEG4:
1922                 put_bits(&s->pb, 16, 0);
1923                 put_bits(&s->pb, 16, 0x1C3);
1924                 stuffing_count -= 4;
1925                 while (stuffing_count--) {
1926                     put_bits(&s->pb, 8, 0xFF);
1927                 }
1928             break;
1929             default:
1930                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1931             }
1932             flush_put_bits(&s->pb);
1933             s->frame_bits  = put_bits_count(&s->pb);
1934         }
1935
1936         /* update mpeg1/2 vbv_delay for CBR */
1937         if (s->avctx->rc_max_rate                          &&
1938             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1939             s->out_format == FMT_MPEG1                     &&
1940             90000LL * (avctx->rc_buffer_size - 1) <=
1941                 s->avctx->rc_max_rate * 0xFFFFLL) {
1942             int vbv_delay, min_delay;
1943             double inbits  = s->avctx->rc_max_rate *
1944                              av_q2d(s->avctx->time_base);
1945             int    minbits = s->frame_bits - 8 *
1946                              (s->vbv_delay_ptr - s->pb.buf - 1);
1947             double bits    = s->rc_context.buffer_index + minbits - inbits;
1948
1949             if (bits < 0)
1950                 av_log(s->avctx, AV_LOG_ERROR,
1951                        "Internal error, negative bits\n");
1952
1953             assert(s->repeat_first_field == 0);
1954
1955             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1956             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1957                         s->avctx->rc_max_rate;
1958
1959             vbv_delay = FFMAX(vbv_delay, min_delay);
1960
1961             av_assert0(vbv_delay < 0xFFFF);
1962
1963             s->vbv_delay_ptr[0] &= 0xF8;
1964             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1965             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1966             s->vbv_delay_ptr[2] &= 0x07;
1967             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1968             avctx->vbv_delay     = vbv_delay * 300;
1969         }
1970         s->total_bits     += s->frame_bits;
1971         avctx->frame_bits  = s->frame_bits;
1972
1973         pkt->pts = s->current_picture.f->pts;
1974         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1975             if (!s->current_picture.f->coded_picture_number)
1976                 pkt->dts = pkt->pts - s->dts_delta;
1977             else
1978                 pkt->dts = s->reordered_pts;
1979             s->reordered_pts = pkt->pts;
1980         } else
1981             pkt->dts = pkt->pts;
1982         if (s->current_picture.f->key_frame)
1983             pkt->flags |= AV_PKT_FLAG_KEY;
1984         if (s->mb_info)
1985             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1986     } else {
1987         s->frame_bits = 0;
1988     }
1989
1990     /* release non-reference frames */
1991     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1992         if (!s->picture[i].reference)
1993             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1994     }
1995
1996     av_assert1((s->frame_bits & 7) == 0);
1997
1998     pkt->size = s->frame_bits / 8;
1999     *got_packet = !!pkt->size;
2000     return 0;
2001 }
2002
2003 static inline void dct_single_coeff_elimination(MpegEncContext *s,
2004                                                 int n, int threshold)
2005 {
2006     static const char tab[64] = {
2007         3, 2, 2, 1, 1, 1, 1, 1,
2008         1, 1, 1, 1, 1, 1, 1, 1,
2009         1, 1, 1, 1, 1, 1, 1, 1,
2010         0, 0, 0, 0, 0, 0, 0, 0,
2011         0, 0, 0, 0, 0, 0, 0, 0,
2012         0, 0, 0, 0, 0, 0, 0, 0,
2013         0, 0, 0, 0, 0, 0, 0, 0,
2014         0, 0, 0, 0, 0, 0, 0, 0
2015     };
2016     int score = 0;
2017     int run = 0;
2018     int i;
2019     int16_t *block = s->block[n];
2020     const int last_index = s->block_last_index[n];
2021     int skip_dc;
2022
2023     if (threshold < 0) {
2024         skip_dc = 0;
2025         threshold = -threshold;
2026     } else
2027         skip_dc = 1;
2028
2029     /* Are all we could set to zero already zero? */
2030     if (last_index <= skip_dc - 1)
2031         return;
2032
2033     for (i = 0; i <= last_index; i++) {
2034         const int j = s->intra_scantable.permutated[i];
2035         const int level = FFABS(block[j]);
2036         if (level == 1) {
2037             if (skip_dc && i == 0)
2038                 continue;
2039             score += tab[run];
2040             run = 0;
2041         } else if (level > 1) {
2042             return;
2043         } else {
2044             run++;
2045         }
2046     }
2047     if (score >= threshold)
2048         return;
2049     for (i = skip_dc; i <= last_index; i++) {
2050         const int j = s->intra_scantable.permutated[i];
2051         block[j] = 0;
2052     }
2053     if (block[0])
2054         s->block_last_index[n] = 0;
2055     else
2056         s->block_last_index[n] = -1;
2057 }
2058
2059 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2060                                int last_index)
2061 {
2062     int i;
2063     const int maxlevel = s->max_qcoeff;
2064     const int minlevel = s->min_qcoeff;
2065     int overflow = 0;
2066
2067     if (s->mb_intra) {
2068         i = 1; // skip clipping of intra dc
2069     } else
2070         i = 0;
2071
2072     for (; i <= last_index; i++) {
2073         const int j = s->intra_scantable.permutated[i];
2074         int level = block[j];
2075
2076         if (level > maxlevel) {
2077             level = maxlevel;
2078             overflow++;
2079         } else if (level < minlevel) {
2080             level = minlevel;
2081             overflow++;
2082         }
2083
2084         block[j] = level;
2085     }
2086
2087     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2088         av_log(s->avctx, AV_LOG_INFO,
2089                "warning, clipping %d dct coefficients to %d..%d\n",
2090                overflow, minlevel, maxlevel);
2091 }
2092
2093 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2094 {
2095     int x, y;
2096     // FIXME optimize
2097     for (y = 0; y < 8; y++) {
2098         for (x = 0; x < 8; x++) {
2099             int x2, y2;
2100             int sum = 0;
2101             int sqr = 0;
2102             int count = 0;
2103
2104             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2105                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2106                     int v = ptr[x2 + y2 * stride];
2107                     sum += v;
2108                     sqr += v * v;
2109                     count++;
2110                 }
2111             }
2112             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2113         }
2114     }
2115 }
2116
2117 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2118                                                 int motion_x, int motion_y,
2119                                                 int mb_block_height,
2120                                                 int mb_block_width,
2121                                                 int mb_block_count)
2122 {
2123     int16_t weight[12][64];
2124     int16_t orig[12][64];
2125     const int mb_x = s->mb_x;
2126     const int mb_y = s->mb_y;
2127     int i;
2128     int skip_dct[12];
2129     int dct_offset = s->linesize * 8; // default for progressive frames
2130     int uv_dct_offset = s->uvlinesize * 8;
2131     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2132     ptrdiff_t wrap_y, wrap_c;
2133
2134     for (i = 0; i < mb_block_count; i++)
2135         skip_dct[i] = s->skipdct;
2136
2137     if (s->adaptive_quant) {
2138         const int last_qp = s->qscale;
2139         const int mb_xy = mb_x + mb_y * s->mb_stride;
2140
2141         s->lambda = s->lambda_table[mb_xy];
2142         update_qscale(s);
2143
2144         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2145             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2146             s->dquant = s->qscale - last_qp;
2147
2148             if (s->out_format == FMT_H263) {
2149                 s->dquant = av_clip(s->dquant, -2, 2);
2150
2151                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2152                     if (!s->mb_intra) {
2153                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2154                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2155                                 s->dquant = 0;
2156                         }
2157                         if (s->mv_type == MV_TYPE_8X8)
2158                             s->dquant = 0;
2159                     }
2160                 }
2161             }
2162         }
2163         ff_set_qscale(s, last_qp + s->dquant);
2164     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2165         ff_set_qscale(s, s->qscale + s->dquant);
2166
2167     wrap_y = s->linesize;
2168     wrap_c = s->uvlinesize;
2169     ptr_y  = s->new_picture.f->data[0] +
2170              (mb_y * 16 * wrap_y)              + mb_x * 16;
2171     ptr_cb = s->new_picture.f->data[1] +
2172              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2173     ptr_cr = s->new_picture.f->data[2] +
2174              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2175
2176     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2177         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2178         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2179         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2180         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2181                                  wrap_y, wrap_y,
2182                                  16, 16, mb_x * 16, mb_y * 16,
2183                                  s->width, s->height);
2184         ptr_y = ebuf;
2185         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2186                                  wrap_c, wrap_c,
2187                                  mb_block_width, mb_block_height,
2188                                  mb_x * mb_block_width, mb_y * mb_block_height,
2189                                  cw, ch);
2190         ptr_cb = ebuf + 16 * wrap_y;
2191         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2192                                  wrap_c, wrap_c,
2193                                  mb_block_width, mb_block_height,
2194                                  mb_x * mb_block_width, mb_y * mb_block_height,
2195                                  cw, ch);
2196         ptr_cr = ebuf + 16 * wrap_y + 16;
2197     }
2198
2199     if (s->mb_intra) {
2200         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2201             int progressive_score, interlaced_score;
2202
2203             s->interlaced_dct = 0;
2204             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2205                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2206                                                      NULL, wrap_y, 8) - 400;
2207
2208             if (progressive_score > 0) {
2209                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2210                                                         NULL, wrap_y * 2, 8) +
2211                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2212                                                         NULL, wrap_y * 2, 8);
2213                 if (progressive_score > interlaced_score) {
2214                     s->interlaced_dct = 1;
2215
2216                     dct_offset = wrap_y;
2217                     uv_dct_offset = wrap_c;
2218                     wrap_y <<= 1;
2219                     if (s->chroma_format == CHROMA_422 ||
2220                         s->chroma_format == CHROMA_444)
2221                         wrap_c <<= 1;
2222                 }
2223             }
2224         }
2225
2226         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2227         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2228         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2229         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2230
2231         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2232             skip_dct[4] = 1;
2233             skip_dct[5] = 1;
2234         } else {
2235             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2236             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2237             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2238                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2239                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2240             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2241                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2242                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2243                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2244                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2245                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2246                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2247             }
2248         }
2249     } else {
2250         op_pixels_func (*op_pix)[4];
2251         qpel_mc_func (*op_qpix)[16];
2252         uint8_t *dest_y, *dest_cb, *dest_cr;
2253
2254         dest_y  = s->dest[0];
2255         dest_cb = s->dest[1];
2256         dest_cr = s->dest[2];
2257
2258         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2259             op_pix  = s->hdsp.put_pixels_tab;
2260             op_qpix = s->qdsp.put_qpel_pixels_tab;
2261         } else {
2262             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2263             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2264         }
2265
2266         if (s->mv_dir & MV_DIR_FORWARD) {
2267             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2268                           s->last_picture.f->data,
2269                           op_pix, op_qpix);
2270             op_pix  = s->hdsp.avg_pixels_tab;
2271             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2272         }
2273         if (s->mv_dir & MV_DIR_BACKWARD) {
2274             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2275                           s->next_picture.f->data,
2276                           op_pix, op_qpix);
2277         }
2278
2279         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2280             int progressive_score, interlaced_score;
2281
2282             s->interlaced_dct = 0;
2283             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2284                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2285                                                      ptr_y + wrap_y * 8,
2286                                                      wrap_y, 8) - 400;
2287
2288             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2289                 progressive_score -= 400;
2290
2291             if (progressive_score > 0) {
2292                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2293                                                         wrap_y * 2, 8) +
2294                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2295                                                         ptr_y + wrap_y,
2296                                                         wrap_y * 2, 8);
2297
2298                 if (progressive_score > interlaced_score) {
2299                     s->interlaced_dct = 1;
2300
2301                     dct_offset = wrap_y;
2302                     uv_dct_offset = wrap_c;
2303                     wrap_y <<= 1;
2304                     if (s->chroma_format == CHROMA_422)
2305                         wrap_c <<= 1;
2306                 }
2307             }
2308         }
2309
2310         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2311         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2312         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2313                             dest_y + dct_offset, wrap_y);
2314         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2315                             dest_y + dct_offset + 8, wrap_y);
2316
2317         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2318             skip_dct[4] = 1;
2319             skip_dct[5] = 1;
2320         } else {
2321             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2322             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2323             if (!s->chroma_y_shift) { /* 422 */
2324                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2325                                     dest_cb + uv_dct_offset, wrap_c);
2326                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2327                                     dest_cr + uv_dct_offset, wrap_c);
2328             }
2329         }
2330         /* pre quantization */
2331         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2332                 2 * s->qscale * s->qscale) {
2333             // FIXME optimize
2334             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2335                 skip_dct[0] = 1;
2336             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2337                 skip_dct[1] = 1;
2338             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2339                                wrap_y, 8) < 20 * s->qscale)
2340                 skip_dct[2] = 1;
2341             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2342                                wrap_y, 8) < 20 * s->qscale)
2343                 skip_dct[3] = 1;
2344             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2345                 skip_dct[4] = 1;
2346             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2347                 skip_dct[5] = 1;
2348             if (!s->chroma_y_shift) { /* 422 */
2349                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2350                                    dest_cb + uv_dct_offset,
2351                                    wrap_c, 8) < 20 * s->qscale)
2352                     skip_dct[6] = 1;
2353                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2354                                    dest_cr + uv_dct_offset,
2355                                    wrap_c, 8) < 20 * s->qscale)
2356                     skip_dct[7] = 1;
2357             }
2358         }
2359     }
2360
2361     if (s->quantizer_noise_shaping) {
2362         if (!skip_dct[0])
2363             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2364         if (!skip_dct[1])
2365             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2366         if (!skip_dct[2])
2367             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2368         if (!skip_dct[3])
2369             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2370         if (!skip_dct[4])
2371             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2372         if (!skip_dct[5])
2373             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2374         if (!s->chroma_y_shift) { /* 422 */
2375             if (!skip_dct[6])
2376                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2377                                   wrap_c);
2378             if (!skip_dct[7])
2379                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2380                                   wrap_c);
2381         }
2382         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2383     }
2384
2385     /* DCT & quantize */
2386     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2387     {
2388         for (i = 0; i < mb_block_count; i++) {
2389             if (!skip_dct[i]) {
2390                 int overflow;
2391                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2392                 // FIXME we could decide to change to quantizer instead of
2393                 // clipping
2394                 // JS: I don't think that would be a good idea it could lower
2395                 //     quality instead of improve it. Just INTRADC clipping
2396                 //     deserves changes in quantizer
2397                 if (overflow)
2398                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2399             } else
2400                 s->block_last_index[i] = -1;
2401         }
2402         if (s->quantizer_noise_shaping) {
2403             for (i = 0; i < mb_block_count; i++) {
2404                 if (!skip_dct[i]) {
2405                     s->block_last_index[i] =
2406                         dct_quantize_refine(s, s->block[i], weight[i],
2407                                             orig[i], i, s->qscale);
2408                 }
2409             }
2410         }
2411
2412         if (s->luma_elim_threshold && !s->mb_intra)
2413             for (i = 0; i < 4; i++)
2414                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2415         if (s->chroma_elim_threshold && !s->mb_intra)
2416             for (i = 4; i < mb_block_count; i++)
2417                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2418
2419         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2420             for (i = 0; i < mb_block_count; i++) {
2421                 if (s->block_last_index[i] == -1)
2422                     s->coded_score[i] = INT_MAX / 256;
2423             }
2424         }
2425     }
2426
2427     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2428         s->block_last_index[4] =
2429         s->block_last_index[5] = 0;
2430         s->block[4][0] =
2431         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2432         if (!s->chroma_y_shift) { /* 422 / 444 */
2433             for (i=6; i<12; i++) {
2434                 s->block_last_index[i] = 0;
2435                 s->block[i][0] = s->block[4][0];
2436             }
2437         }
2438     }
2439
2440     // non c quantize code returns incorrect block_last_index FIXME
2441     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2442         for (i = 0; i < mb_block_count; i++) {
2443             int j;
2444             if (s->block_last_index[i] > 0) {
2445                 for (j = 63; j > 0; j--) {
2446                     if (s->block[i][s->intra_scantable.permutated[j]])
2447                         break;
2448                 }
2449                 s->block_last_index[i] = j;
2450             }
2451         }
2452     }
2453
2454     /* huffman encode */
2455     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2456     case AV_CODEC_ID_MPEG1VIDEO:
2457     case AV_CODEC_ID_MPEG2VIDEO:
2458         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2459             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2460         break;
2461     case AV_CODEC_ID_MPEG4:
2462         if (CONFIG_MPEG4_ENCODER)
2463             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2464         break;
2465     case AV_CODEC_ID_MSMPEG4V2:
2466     case AV_CODEC_ID_MSMPEG4V3:
2467     case AV_CODEC_ID_WMV1:
2468         if (CONFIG_MSMPEG4_ENCODER)
2469             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2470         break;
2471     case AV_CODEC_ID_WMV2:
2472         if (CONFIG_WMV2_ENCODER)
2473             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2474         break;
2475     case AV_CODEC_ID_H261:
2476         if (CONFIG_H261_ENCODER)
2477             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2478         break;
2479     case AV_CODEC_ID_H263:
2480     case AV_CODEC_ID_H263P:
2481     case AV_CODEC_ID_FLV1:
2482     case AV_CODEC_ID_RV10:
2483     case AV_CODEC_ID_RV20:
2484         if (CONFIG_H263_ENCODER)
2485             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2486         break;
2487     case AV_CODEC_ID_MJPEG:
2488     case AV_CODEC_ID_AMV:
2489         if (CONFIG_MJPEG_ENCODER)
2490             ff_mjpeg_encode_mb(s, s->block);
2491         break;
2492     default:
2493         av_assert1(0);
2494     }
2495 }
2496
2497 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2498 {
2499     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2500     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2501     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2502 }
2503
2504 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2505     int i;
2506
2507     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2508
2509     /* mpeg1 */
2510     d->mb_skip_run= s->mb_skip_run;
2511     for(i=0; i<3; i++)
2512         d->last_dc[i] = s->last_dc[i];
2513
2514     /* statistics */
2515     d->mv_bits= s->mv_bits;
2516     d->i_tex_bits= s->i_tex_bits;
2517     d->p_tex_bits= s->p_tex_bits;
2518     d->i_count= s->i_count;
2519     d->f_count= s->f_count;
2520     d->b_count= s->b_count;
2521     d->skip_count= s->skip_count;
2522     d->misc_bits= s->misc_bits;
2523     d->last_bits= 0;
2524
2525     d->mb_skipped= 0;
2526     d->qscale= s->qscale;
2527     d->dquant= s->dquant;
2528
2529     d->esc3_level_length= s->esc3_level_length;
2530 }
2531
2532 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2533     int i;
2534
2535     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2536     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2537
2538     /* mpeg1 */
2539     d->mb_skip_run= s->mb_skip_run;
2540     for(i=0; i<3; i++)
2541         d->last_dc[i] = s->last_dc[i];
2542
2543     /* statistics */
2544     d->mv_bits= s->mv_bits;
2545     d->i_tex_bits= s->i_tex_bits;
2546     d->p_tex_bits= s->p_tex_bits;
2547     d->i_count= s->i_count;
2548     d->f_count= s->f_count;
2549     d->b_count= s->b_count;
2550     d->skip_count= s->skip_count;
2551     d->misc_bits= s->misc_bits;
2552
2553     d->mb_intra= s->mb_intra;
2554     d->mb_skipped= s->mb_skipped;
2555     d->mv_type= s->mv_type;
2556     d->mv_dir= s->mv_dir;
2557     d->pb= s->pb;
2558     if(s->data_partitioning){
2559         d->pb2= s->pb2;
2560         d->tex_pb= s->tex_pb;
2561     }
2562     d->block= s->block;
2563     for(i=0; i<8; i++)
2564         d->block_last_index[i]= s->block_last_index[i];
2565     d->interlaced_dct= s->interlaced_dct;
2566     d->qscale= s->qscale;
2567
2568     d->esc3_level_length= s->esc3_level_length;
2569 }
2570
2571 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2572                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2573                            int *dmin, int *next_block, int motion_x, int motion_y)
2574 {
2575     int score;
2576     uint8_t *dest_backup[3];
2577
2578     copy_context_before_encode(s, backup, type);
2579
2580     s->block= s->blocks[*next_block];
2581     s->pb= pb[*next_block];
2582     if(s->data_partitioning){
2583         s->pb2   = pb2   [*next_block];
2584         s->tex_pb= tex_pb[*next_block];
2585     }
2586
2587     if(*next_block){
2588         memcpy(dest_backup, s->dest, sizeof(s->dest));
2589         s->dest[0] = s->sc.rd_scratchpad;
2590         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2591         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2592         av_assert0(s->linesize >= 32); //FIXME
2593     }
2594
2595     encode_mb(s, motion_x, motion_y);
2596
2597     score= put_bits_count(&s->pb);
2598     if(s->data_partitioning){
2599         score+= put_bits_count(&s->pb2);
2600         score+= put_bits_count(&s->tex_pb);
2601     }
2602
2603     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2604         ff_mpv_decode_mb(s, s->block);
2605
2606         score *= s->lambda2;
2607         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2608     }
2609
2610     if(*next_block){
2611         memcpy(s->dest, dest_backup, sizeof(s->dest));
2612     }
2613
2614     if(score<*dmin){
2615         *dmin= score;
2616         *next_block^=1;
2617
2618         copy_context_after_encode(best, s, type);
2619     }
2620 }
2621
2622 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2623     uint32_t *sq = ff_square_tab + 256;
2624     int acc=0;
2625     int x,y;
2626
2627     if(w==16 && h==16)
2628         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2629     else if(w==8 && h==8)
2630         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2631
2632     for(y=0; y<h; y++){
2633         for(x=0; x<w; x++){
2634             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2635         }
2636     }
2637
2638     av_assert2(acc>=0);
2639
2640     return acc;
2641 }
2642
2643 static int sse_mb(MpegEncContext *s){
2644     int w= 16;
2645     int h= 16;
2646
2647     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2648     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2649
2650     if(w==16 && h==16)
2651       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2652         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2653                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2654                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2655       }else{
2656         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2657                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2658                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2659       }
2660     else
2661         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2662                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2663                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2664 }
2665
2666 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2667     MpegEncContext *s= *(void**)arg;
2668
2669
2670     s->me.pre_pass=1;
2671     s->me.dia_size= s->avctx->pre_dia_size;
2672     s->first_slice_line=1;
2673     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2674         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2675             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2676         }
2677         s->first_slice_line=0;
2678     }
2679
2680     s->me.pre_pass=0;
2681
2682     return 0;
2683 }
2684
2685 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2686     MpegEncContext *s= *(void**)arg;
2687
2688     ff_check_alignment();
2689
2690     s->me.dia_size= s->avctx->dia_size;
2691     s->first_slice_line=1;
2692     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2693         s->mb_x=0; //for block init below
2694         ff_init_block_index(s);
2695         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2696             s->block_index[0]+=2;
2697             s->block_index[1]+=2;
2698             s->block_index[2]+=2;
2699             s->block_index[3]+=2;
2700
2701             /* compute motion vector & mb_type and store in context */
2702             if(s->pict_type==AV_PICTURE_TYPE_B)
2703                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2704             else
2705                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2706         }
2707         s->first_slice_line=0;
2708     }
2709     return 0;
2710 }
2711
2712 static int mb_var_thread(AVCodecContext *c, void *arg){
2713     MpegEncContext *s= *(void**)arg;
2714     int mb_x, mb_y;
2715
2716     ff_check_alignment();
2717
2718     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2719         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2720             int xx = mb_x * 16;
2721             int yy = mb_y * 16;
2722             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2723             int varc;
2724             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2725
2726             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2727                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2728
2729             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2730             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2731             s->me.mb_var_sum_temp    += varc;
2732         }
2733     }
2734     return 0;
2735 }
2736
2737 static void write_slice_end(MpegEncContext *s){
2738     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2739         if(s->partitioned_frame){
2740             ff_mpeg4_merge_partitions(s);
2741         }
2742
2743         ff_mpeg4_stuffing(&s->pb);
2744     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2745         ff_mjpeg_encode_stuffing(s);
2746     }
2747
2748     avpriv_align_put_bits(&s->pb);
2749     flush_put_bits(&s->pb);
2750
2751     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2752         s->misc_bits+= get_bits_diff(s);
2753 }
2754
2755 static void write_mb_info(MpegEncContext *s)
2756 {
2757     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2758     int offset = put_bits_count(&s->pb);
2759     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2760     int gobn = s->mb_y / s->gob_index;
2761     int pred_x, pred_y;
2762     if (CONFIG_H263_ENCODER)
2763         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2764     bytestream_put_le32(&ptr, offset);
2765     bytestream_put_byte(&ptr, s->qscale);
2766     bytestream_put_byte(&ptr, gobn);
2767     bytestream_put_le16(&ptr, mba);
2768     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2769     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2770     /* 4MV not implemented */
2771     bytestream_put_byte(&ptr, 0); /* hmv2 */
2772     bytestream_put_byte(&ptr, 0); /* vmv2 */
2773 }
2774
2775 static void update_mb_info(MpegEncContext *s, int startcode)
2776 {
2777     if (!s->mb_info)
2778         return;
2779     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2780         s->mb_info_size += 12;
2781         s->prev_mb_info = s->last_mb_info;
2782     }
2783     if (startcode) {
2784         s->prev_mb_info = put_bits_count(&s->pb)/8;
2785         /* This might have incremented mb_info_size above, and we return without
2786          * actually writing any info into that slot yet. But in that case,
2787          * this will be called again at the start of the after writing the
2788          * start code, actually writing the mb info. */
2789         return;
2790     }
2791
2792     s->last_mb_info = put_bits_count(&s->pb)/8;
2793     if (!s->mb_info_size)
2794         s->mb_info_size += 12;
2795     write_mb_info(s);
2796 }
2797
2798 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2799 {
2800     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2801         && s->slice_context_count == 1
2802         && s->pb.buf == s->avctx->internal->byte_buffer) {
2803         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2804         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2805
2806         uint8_t *new_buffer = NULL;
2807         int new_buffer_size = 0;
2808
2809         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2810                               s->avctx->internal->byte_buffer_size + size_increase);
2811         if (!new_buffer)
2812             return AVERROR(ENOMEM);
2813
2814         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2815         av_free(s->avctx->internal->byte_buffer);
2816         s->avctx->internal->byte_buffer      = new_buffer;
2817         s->avctx->internal->byte_buffer_size = new_buffer_size;
2818         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2819         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2820         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2821     }
2822     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2823         return AVERROR(EINVAL);
2824     return 0;
2825 }
2826
2827 static int encode_thread(AVCodecContext *c, void *arg){
2828     MpegEncContext *s= *(void**)arg;
2829     int mb_x, mb_y, pdif = 0;
2830     int chr_h= 16>>s->chroma_y_shift;
2831     int i, j;
2832     MpegEncContext best_s = { 0 }, backup_s;
2833     uint8_t bit_buf[2][MAX_MB_BYTES];
2834     uint8_t bit_buf2[2][MAX_MB_BYTES];
2835     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2836     PutBitContext pb[2], pb2[2], tex_pb[2];
2837
2838     ff_check_alignment();
2839
2840     for(i=0; i<2; i++){
2841         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2842         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2843         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2844     }
2845
2846     s->last_bits= put_bits_count(&s->pb);
2847     s->mv_bits=0;
2848     s->misc_bits=0;
2849     s->i_tex_bits=0;
2850     s->p_tex_bits=0;
2851     s->i_count=0;
2852     s->f_count=0;
2853     s->b_count=0;
2854     s->skip_count=0;
2855
2856     for(i=0; i<3; i++){
2857         /* init last dc values */
2858         /* note: quant matrix value (8) is implied here */
2859         s->last_dc[i] = 128 << s->intra_dc_precision;
2860
2861         s->current_picture.encoding_error[i] = 0;
2862     }
2863     if(s->codec_id==AV_CODEC_ID_AMV){
2864         s->last_dc[0] = 128*8/13;
2865         s->last_dc[1] = 128*8/14;
2866         s->last_dc[2] = 128*8/14;
2867     }
2868     s->mb_skip_run = 0;
2869     memset(s->last_mv, 0, sizeof(s->last_mv));
2870
2871     s->last_mv_dir = 0;
2872
2873     switch(s->codec_id){
2874     case AV_CODEC_ID_H263:
2875     case AV_CODEC_ID_H263P:
2876     case AV_CODEC_ID_FLV1:
2877         if (CONFIG_H263_ENCODER)
2878             s->gob_index = H263_GOB_HEIGHT(s->height);
2879         break;
2880     case AV_CODEC_ID_MPEG4:
2881         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2882             ff_mpeg4_init_partitions(s);
2883         break;
2884     }
2885
2886     s->resync_mb_x=0;
2887     s->resync_mb_y=0;
2888     s->first_slice_line = 1;
2889     s->ptr_lastgob = s->pb.buf;
2890     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2891         s->mb_x=0;
2892         s->mb_y= mb_y;
2893
2894         ff_set_qscale(s, s->qscale);
2895         ff_init_block_index(s);
2896
2897         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2898             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2899             int mb_type= s->mb_type[xy];
2900 //            int d;
2901             int dmin= INT_MAX;
2902             int dir;
2903             int size_increase =  s->avctx->internal->byte_buffer_size/4
2904                                + s->mb_width*MAX_MB_BYTES;
2905
2906             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2907             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2908                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2909                 return -1;
2910             }
2911             if(s->data_partitioning){
2912                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2913                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2914                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2915                     return -1;
2916                 }
2917             }
2918
2919             s->mb_x = mb_x;
2920             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2921             ff_update_block_index(s);
2922
2923             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2924                 ff_h261_reorder_mb_index(s);
2925                 xy= s->mb_y*s->mb_stride + s->mb_x;
2926                 mb_type= s->mb_type[xy];
2927             }
2928
2929             /* write gob / video packet header  */
2930             if(s->rtp_mode){
2931                 int current_packet_size, is_gob_start;
2932
2933                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2934
2935                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2936
2937                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2938
2939                 switch(s->codec_id){
2940                 case AV_CODEC_ID_H263:
2941                 case AV_CODEC_ID_H263P:
2942                     if(!s->h263_slice_structured)
2943                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2944                     break;
2945                 case AV_CODEC_ID_MPEG2VIDEO:
2946                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2947                 case AV_CODEC_ID_MPEG1VIDEO:
2948                     if(s->mb_skip_run) is_gob_start=0;
2949                     break;
2950                 case AV_CODEC_ID_MJPEG:
2951                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2952                     break;
2953                 }
2954
2955                 if(is_gob_start){
2956                     if(s->start_mb_y != mb_y || mb_x!=0){
2957                         write_slice_end(s);
2958
2959                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2960                             ff_mpeg4_init_partitions(s);
2961                         }
2962                     }
2963
2964                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2965                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2966
2967                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2968                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2969                         int d = 100 / s->error_rate;
2970                         if(r % d == 0){
2971                             current_packet_size=0;
2972                             s->pb.buf_ptr= s->ptr_lastgob;
2973                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2974                         }
2975                     }
2976
2977 #if FF_API_RTP_CALLBACK
2978 FF_DISABLE_DEPRECATION_WARNINGS
2979                     if (s->avctx->rtp_callback){
2980                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2981                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2982                     }
2983 FF_ENABLE_DEPRECATION_WARNINGS
2984 #endif
2985                     update_mb_info(s, 1);
2986
2987                     switch(s->codec_id){
2988                     case AV_CODEC_ID_MPEG4:
2989                         if (CONFIG_MPEG4_ENCODER) {
2990                             ff_mpeg4_encode_video_packet_header(s);
2991                             ff_mpeg4_clean_buffers(s);
2992                         }
2993                     break;
2994                     case AV_CODEC_ID_MPEG1VIDEO:
2995                     case AV_CODEC_ID_MPEG2VIDEO:
2996                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2997                             ff_mpeg1_encode_slice_header(s);
2998                             ff_mpeg1_clean_buffers(s);
2999                         }
3000                     break;
3001                     case AV_CODEC_ID_H263:
3002                     case AV_CODEC_ID_H263P:
3003                         if (CONFIG_H263_ENCODER)
3004                             ff_h263_encode_gob_header(s, mb_y);
3005                     break;
3006                     }
3007
3008                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
3009                         int bits= put_bits_count(&s->pb);
3010                         s->misc_bits+= bits - s->last_bits;
3011                         s->last_bits= bits;
3012                     }
3013
3014                     s->ptr_lastgob += current_packet_size;
3015                     s->first_slice_line=1;
3016                     s->resync_mb_x=mb_x;
3017                     s->resync_mb_y=mb_y;
3018                 }
3019             }
3020
3021             if(  (s->resync_mb_x   == s->mb_x)
3022                && s->resync_mb_y+1 == s->mb_y){
3023                 s->first_slice_line=0;
3024             }
3025
3026             s->mb_skipped=0;
3027             s->dquant=0; //only for QP_RD
3028
3029             update_mb_info(s, 0);
3030
3031             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
3032                 int next_block=0;
3033                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3034
3035                 copy_context_before_encode(&backup_s, s, -1);
3036                 backup_s.pb= s->pb;
3037                 best_s.data_partitioning= s->data_partitioning;
3038                 best_s.partitioned_frame= s->partitioned_frame;
3039                 if(s->data_partitioning){
3040                     backup_s.pb2= s->pb2;
3041                     backup_s.tex_pb= s->tex_pb;
3042                 }
3043
3044                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
3045                     s->mv_dir = MV_DIR_FORWARD;
3046                     s->mv_type = MV_TYPE_16X16;
3047                     s->mb_intra= 0;
3048                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3049                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3050                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
3051                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3052                 }
3053                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
3054                     s->mv_dir = MV_DIR_FORWARD;
3055                     s->mv_type = MV_TYPE_FIELD;
3056                     s->mb_intra= 0;
3057                     for(i=0; i<2; i++){
3058                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3059                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3060                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3061                     }
3062                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3063                                  &dmin, &next_block, 0, 0);
3064                 }
3065                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3066                     s->mv_dir = MV_DIR_FORWARD;
3067                     s->mv_type = MV_TYPE_16X16;
3068                     s->mb_intra= 0;
3069                     s->mv[0][0][0] = 0;
3070                     s->mv[0][0][1] = 0;
3071                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3072                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3073                 }
3074                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3075                     s->mv_dir = MV_DIR_FORWARD;
3076                     s->mv_type = MV_TYPE_8X8;
3077                     s->mb_intra= 0;
3078                     for(i=0; i<4; i++){
3079                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3080                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3081                     }
3082                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3083                                  &dmin, &next_block, 0, 0);
3084                 }
3085                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3086                     s->mv_dir = MV_DIR_FORWARD;
3087                     s->mv_type = MV_TYPE_16X16;
3088                     s->mb_intra= 0;
3089                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3090                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3091                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3092                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3093                 }
3094                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3095                     s->mv_dir = MV_DIR_BACKWARD;
3096                     s->mv_type = MV_TYPE_16X16;
3097                     s->mb_intra= 0;
3098                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3099                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3100                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3101                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3102                 }
3103                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3104                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3105                     s->mv_type = MV_TYPE_16X16;
3106                     s->mb_intra= 0;
3107                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3108                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3109                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3110                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3111                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3112                                  &dmin, &next_block, 0, 0);
3113                 }
3114                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3115                     s->mv_dir = MV_DIR_FORWARD;
3116                     s->mv_type = MV_TYPE_FIELD;
3117                     s->mb_intra= 0;
3118                     for(i=0; i<2; i++){
3119                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3120                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3121                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3122                     }
3123                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3124                                  &dmin, &next_block, 0, 0);
3125                 }
3126                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3127                     s->mv_dir = MV_DIR_BACKWARD;
3128                     s->mv_type = MV_TYPE_FIELD;
3129                     s->mb_intra= 0;
3130                     for(i=0; i<2; i++){
3131                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3132                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3133                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3134                     }
3135                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3136                                  &dmin, &next_block, 0, 0);
3137                 }
3138                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3139                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3140                     s->mv_type = MV_TYPE_FIELD;
3141                     s->mb_intra= 0;
3142                     for(dir=0; dir<2; dir++){
3143                         for(i=0; i<2; i++){
3144                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3145                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3146                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3147                         }
3148                     }
3149                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3150                                  &dmin, &next_block, 0, 0);
3151                 }
3152                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3153                     s->mv_dir = 0;
3154                     s->mv_type = MV_TYPE_16X16;
3155                     s->mb_intra= 1;
3156                     s->mv[0][0][0] = 0;
3157                     s->mv[0][0][1] = 0;
3158                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3159                                  &dmin, &next_block, 0, 0);
3160                     if(s->h263_pred || s->h263_aic){
3161                         if(best_s.mb_intra)
3162                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3163                         else
3164                             ff_clean_intra_table_entries(s); //old mode?
3165                     }
3166                 }
3167
3168                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3169                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3170                         const int last_qp= backup_s.qscale;
3171                         int qpi, qp, dc[6];
3172                         int16_t ac[6][16];
3173                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3174                         static const int dquant_tab[4]={-1,1,-2,2};
3175                         int storecoefs = s->mb_intra && s->dc_val[0];
3176
3177                         av_assert2(backup_s.dquant == 0);
3178
3179                         //FIXME intra
3180                         s->mv_dir= best_s.mv_dir;
3181                         s->mv_type = MV_TYPE_16X16;
3182                         s->mb_intra= best_s.mb_intra;
3183                         s->mv[0][0][0] = best_s.mv[0][0][0];
3184                         s->mv[0][0][1] = best_s.mv[0][0][1];
3185                         s->mv[1][0][0] = best_s.mv[1][0][0];
3186                         s->mv[1][0][1] = best_s.mv[1][0][1];
3187
3188                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3189                         for(; qpi<4; qpi++){
3190                             int dquant= dquant_tab[qpi];
3191                             qp= last_qp + dquant;
3192                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3193                                 continue;
3194                             backup_s.dquant= dquant;
3195                             if(storecoefs){
3196                                 for(i=0; i<6; i++){
3197                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3198                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3199                                 }
3200                             }
3201
3202                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3203                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3204                             if(best_s.qscale != qp){
3205                                 if(storecoefs){
3206                                     for(i=0; i<6; i++){
3207                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3208                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3209                                     }
3210                                 }
3211                             }
3212                         }
3213                     }
3214                 }
3215                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3216                     int mx= s->b_direct_mv_table[xy][0];
3217                     int my= s->b_direct_mv_table[xy][1];
3218
3219                     backup_s.dquant = 0;
3220                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3221                     s->mb_intra= 0;
3222                     ff_mpeg4_set_direct_mv(s, mx, my);
3223                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3224                                  &dmin, &next_block, mx, my);
3225                 }
3226                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3227                     backup_s.dquant = 0;
3228                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3229                     s->mb_intra= 0;
3230                     ff_mpeg4_set_direct_mv(s, 0, 0);
3231                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3232                                  &dmin, &next_block, 0, 0);
3233                 }
3234                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3235                     int coded=0;
3236                     for(i=0; i<6; i++)
3237                         coded |= s->block_last_index[i];
3238                     if(coded){
3239                         int mx,my;
3240                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3241                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3242                             mx=my=0; //FIXME find the one we actually used
3243                             ff_mpeg4_set_direct_mv(s, mx, my);
3244                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3245                             mx= s->mv[1][0][0];
3246                             my= s->mv[1][0][1];
3247                         }else{
3248                             mx= s->mv[0][0][0];
3249                             my= s->mv[0][0][1];
3250                         }
3251
3252                         s->mv_dir= best_s.mv_dir;
3253                         s->mv_type = best_s.mv_type;
3254                         s->mb_intra= 0;
3255 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3256                         s->mv[0][0][1] = best_s.mv[0][0][1];
3257                         s->mv[1][0][0] = best_s.mv[1][0][0];
3258                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3259                         backup_s.dquant= 0;
3260                         s->skipdct=1;
3261                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3262                                         &dmin, &next_block, mx, my);
3263                         s->skipdct=0;
3264                     }
3265                 }
3266
3267                 s->current_picture.qscale_table[xy] = best_s.qscale;
3268
3269                 copy_context_after_encode(s, &best_s, -1);
3270
3271                 pb_bits_count= put_bits_count(&s->pb);
3272                 flush_put_bits(&s->pb);
3273                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3274                 s->pb= backup_s.pb;
3275
3276                 if(s->data_partitioning){
3277                     pb2_bits_count= put_bits_count(&s->pb2);
3278                     flush_put_bits(&s->pb2);
3279                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3280                     s->pb2= backup_s.pb2;
3281
3282                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3283                     flush_put_bits(&s->tex_pb);
3284                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3285                     s->tex_pb= backup_s.tex_pb;
3286                 }
3287                 s->last_bits= put_bits_count(&s->pb);
3288
3289                 if (CONFIG_H263_ENCODER &&
3290                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3291                     ff_h263_update_motion_val(s);
3292
3293                 if(next_block==0){ //FIXME 16 vs linesize16
3294                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3295                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3296                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3297                 }
3298
3299                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3300                     ff_mpv_decode_mb(s, s->block);
3301             } else {
3302                 int motion_x = 0, motion_y = 0;
3303                 s->mv_type=MV_TYPE_16X16;
3304                 // only one MB-Type possible
3305
3306                 switch(mb_type){
3307                 case CANDIDATE_MB_TYPE_INTRA:
3308                     s->mv_dir = 0;
3309                     s->mb_intra= 1;
3310                     motion_x= s->mv[0][0][0] = 0;
3311                     motion_y= s->mv[0][0][1] = 0;
3312                     break;
3313                 case CANDIDATE_MB_TYPE_INTER:
3314                     s->mv_dir = MV_DIR_FORWARD;
3315                     s->mb_intra= 0;
3316                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3317                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3318                     break;
3319                 case CANDIDATE_MB_TYPE_INTER_I:
3320                     s->mv_dir = MV_DIR_FORWARD;
3321                     s->mv_type = MV_TYPE_FIELD;
3322                     s->mb_intra= 0;
3323                     for(i=0; i<2; i++){
3324                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3325                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3326                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3327                     }
3328                     break;
3329                 case CANDIDATE_MB_TYPE_INTER4V:
3330                     s->mv_dir = MV_DIR_FORWARD;
3331                     s->mv_type = MV_TYPE_8X8;
3332                     s->mb_intra= 0;
3333                     for(i=0; i<4; i++){
3334                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3335                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3336                     }
3337                     break;
3338                 case CANDIDATE_MB_TYPE_DIRECT:
3339                     if (CONFIG_MPEG4_ENCODER) {
3340                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3341                         s->mb_intra= 0;
3342                         motion_x=s->b_direct_mv_table[xy][0];
3343                         motion_y=s->b_direct_mv_table[xy][1];
3344                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3345                     }
3346                     break;
3347                 case CANDIDATE_MB_TYPE_DIRECT0:
3348                     if (CONFIG_MPEG4_ENCODER) {
3349                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3350                         s->mb_intra= 0;
3351                         ff_mpeg4_set_direct_mv(s, 0, 0);
3352                     }
3353                     break;
3354                 case CANDIDATE_MB_TYPE_BIDIR:
3355                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3356                     s->mb_intra= 0;
3357                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3358                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3359                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3360                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3361                     break;
3362                 case CANDIDATE_MB_TYPE_BACKWARD:
3363                     s->mv_dir = MV_DIR_BACKWARD;
3364                     s->mb_intra= 0;
3365                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3366                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3367                     break;
3368                 case CANDIDATE_MB_TYPE_FORWARD:
3369                     s->mv_dir = MV_DIR_FORWARD;
3370                     s->mb_intra= 0;
3371                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3372                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3373                     break;
3374                 case CANDIDATE_MB_TYPE_FORWARD_I:
3375                     s->mv_dir = MV_DIR_FORWARD;
3376                     s->mv_type = MV_TYPE_FIELD;
3377                     s->mb_intra= 0;
3378                     for(i=0; i<2; i++){
3379                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3380                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3381                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3382                     }
3383                     break;
3384                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3385                     s->mv_dir = MV_DIR_BACKWARD;
3386                     s->mv_type = MV_TYPE_FIELD;
3387                     s->mb_intra= 0;
3388                     for(i=0; i<2; i++){
3389                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3390                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3391                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3392                     }
3393                     break;
3394                 case CANDIDATE_MB_TYPE_BIDIR_I:
3395                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3396                     s->mv_type = MV_TYPE_FIELD;
3397                     s->mb_intra= 0;
3398                     for(dir=0; dir<2; dir++){
3399                         for(i=0; i<2; i++){
3400                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3401                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3402                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3403                         }
3404                     }
3405                     break;
3406                 default:
3407                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3408                 }
3409
3410                 encode_mb(s, motion_x, motion_y);
3411
3412                 // RAL: Update last macroblock type
3413                 s->last_mv_dir = s->mv_dir;
3414
3415                 if (CONFIG_H263_ENCODER &&
3416                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3417                     ff_h263_update_motion_val(s);
3418
3419                 ff_mpv_decode_mb(s, s->block);
3420             }
3421
3422             /* clean the MV table in IPS frames for direct mode in B frames */
3423             if(s->mb_intra /* && I,P,S_TYPE */){
3424                 s->p_mv_table[xy][0]=0;
3425                 s->p_mv_table[xy][1]=0;
3426             }
3427
3428             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3429                 int w= 16;
3430                 int h= 16;
3431
3432                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3433                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3434
3435                 s->current_picture.encoding_error[0] += sse(
3436                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3437                     s->dest[0], w, h, s->linesize);
3438                 s->current_picture.encoding_error[1] += sse(
3439                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3440                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3441                 s->current_picture.encoding_error[2] += sse(
3442                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3443                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3444             }
3445             if(s->loop_filter){
3446                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3447                     ff_h263_loop_filter(s);
3448             }
3449             ff_dlog(s->avctx, "MB %d %d bits\n",
3450                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3451         }
3452     }
3453
3454     //not beautiful here but we must write it before flushing so it has to be here
3455     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3456         ff_msmpeg4_encode_ext_header(s);
3457
3458     write_slice_end(s);
3459
3460 #if FF_API_RTP_CALLBACK
3461 FF_DISABLE_DEPRECATION_WARNINGS
3462     /* Send the last GOB if RTP */
3463     if (s->avctx->rtp_callback) {
3464         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3465         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3466         /* Call the RTP callback to send the last GOB */
3467         emms_c();
3468         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3469     }
3470 FF_ENABLE_DEPRECATION_WARNINGS
3471 #endif
3472
3473     return 0;
3474 }
3475
3476 #define MERGE(field) dst->field += src->field; src->field=0
3477 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3478     MERGE(me.scene_change_score);
3479     MERGE(me.mc_mb_var_sum_temp);
3480     MERGE(me.mb_var_sum_temp);
3481 }
3482
3483 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3484     int i;
3485
3486     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3487     MERGE(dct_count[1]);
3488     MERGE(mv_bits);
3489     MERGE(i_tex_bits);
3490     MERGE(p_tex_bits);
3491     MERGE(i_count);
3492     MERGE(f_count);
3493     MERGE(b_count);
3494     MERGE(skip_count);
3495     MERGE(misc_bits);
3496     MERGE(er.error_count);
3497     MERGE(padding_bug_score);
3498     MERGE(current_picture.encoding_error[0]);
3499     MERGE(current_picture.encoding_error[1]);
3500     MERGE(current_picture.encoding_error[2]);
3501
3502     if(dst->avctx->noise_reduction){
3503         for(i=0; i<64; i++){
3504             MERGE(dct_error_sum[0][i]);
3505             MERGE(dct_error_sum[1][i]);
3506         }
3507     }
3508
3509     assert(put_bits_count(&src->pb) % 8 ==0);
3510     assert(put_bits_count(&dst->pb) % 8 ==0);
3511     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3512     flush_put_bits(&dst->pb);
3513 }
3514
3515 static int estimate_qp(MpegEncContext *s, int dry_run){
3516     if (s->next_lambda){
3517         s->current_picture_ptr->f->quality =
3518         s->current_picture.f->quality = s->next_lambda;
3519         if(!dry_run) s->next_lambda= 0;
3520     } else if (!s->fixed_qscale) {
3521         s->current_picture_ptr->f->quality =
3522         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3523         if (s->current_picture.f->quality < 0)
3524             return -1;
3525     }
3526
3527     if(s->adaptive_quant){
3528         switch(s->codec_id){
3529         case AV_CODEC_ID_MPEG4:
3530             if (CONFIG_MPEG4_ENCODER)
3531                 ff_clean_mpeg4_qscales(s);
3532             break;
3533         case AV_CODEC_ID_H263:
3534         case AV_CODEC_ID_H263P:
3535         case AV_CODEC_ID_FLV1:
3536             if (CONFIG_H263_ENCODER)
3537                 ff_clean_h263_qscales(s);
3538             break;
3539         default:
3540             ff_init_qscale_tab(s);
3541         }
3542
3543         s->lambda= s->lambda_table[0];
3544         //FIXME broken
3545     }else
3546         s->lambda = s->current_picture.f->quality;
3547     update_qscale(s);
3548     return 0;
3549 }
3550
3551 /* must be called before writing the header */
3552 static void set_frame_distances(MpegEncContext * s){
3553     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3554     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3555
3556     if(s->pict_type==AV_PICTURE_TYPE_B){
3557         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3558         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3559     }else{
3560         s->pp_time= s->time - s->last_non_b_time;
3561         s->last_non_b_time= s->time;
3562         assert(s->picture_number==0 || s->pp_time > 0);
3563     }
3564 }
3565
3566 static int encode_picture(MpegEncContext *s, int picture_number)
3567 {
3568     int i, ret;
3569     int bits;
3570     int context_count = s->slice_context_count;
3571
3572     s->picture_number = picture_number;
3573
3574     /* Reset the average MB variance */
3575     s->me.mb_var_sum_temp    =
3576     s->me.mc_mb_var_sum_temp = 0;
3577
3578     /* we need to initialize some time vars before we can encode b-frames */
3579     // RAL: Condition added for MPEG1VIDEO
3580     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3581         set_frame_distances(s);
3582     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3583         ff_set_mpeg4_time(s);
3584
3585     s->me.scene_change_score=0;
3586
3587 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3588
3589     if(s->pict_type==AV_PICTURE_TYPE_I){
3590         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3591         else                        s->no_rounding=0;
3592     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3593         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3594             s->no_rounding ^= 1;
3595     }
3596
3597     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3598         if (estimate_qp(s,1) < 0)
3599             return -1;
3600         ff_get_2pass_fcode(s);
3601     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3602         if(s->pict_type==AV_PICTURE_TYPE_B)
3603             s->lambda= s->last_lambda_for[s->pict_type];
3604         else
3605             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3606         update_qscale(s);
3607     }
3608
3609     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3610         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3611         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3612         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3613         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3614     }
3615
3616     s->mb_intra=0; //for the rate distortion & bit compare functions
3617     for(i=1; i<context_count; i++){
3618         ret = ff_update_duplicate_context(s->thread_context[i], s);
3619         if (ret < 0)
3620             return ret;
3621     }
3622
3623     if(ff_init_me(s)<0)
3624         return -1;
3625
3626     /* Estimate motion for every MB */
3627     if(s->pict_type != AV_PICTURE_TYPE_I){
3628         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3629         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3630         if (s->pict_type != AV_PICTURE_TYPE_B) {
3631             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3632                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3633             }
3634         }
3635
3636         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3637     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3638         /* I-Frame */
3639         for(i=0; i<s->mb_stride*s->mb_height; i++)
3640             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3641
3642         if(!s->fixed_qscale){
3643             /* finding spatial complexity for I-frame rate control */
3644             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3645         }
3646     }
3647     for(i=1; i<context_count; i++){
3648         merge_context_after_me(s, s->thread_context[i]);
3649     }
3650     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3651     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3652     emms_c();
3653
3654     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3655         s->pict_type= AV_PICTURE_TYPE_I;
3656         for(i=0; i<s->mb_stride*s->mb_height; i++)
3657             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3658         if(s->msmpeg4_version >= 3)
3659             s->no_rounding=1;
3660         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3661                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3662     }
3663
3664     if(!s->umvplus){
3665         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3666             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3667
3668             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3669                 int a,b;
3670                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3671                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3672                 s->f_code= FFMAX3(s->f_code, a, b);
3673             }
3674
3675             ff_fix_long_p_mvs(s);
3676             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3677             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3678                 int j;
3679                 for(i=0; i<2; i++){
3680                     for(j=0; j<2; j++)
3681                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3682                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3683                 }
3684             }
3685         }
3686
3687         if(s->pict_type==AV_PICTURE_TYPE_B){
3688             int a, b;
3689
3690             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3691             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3692             s->f_code = FFMAX(a, b);
3693
3694             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3695             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3696             s->b_code = FFMAX(a, b);
3697
3698             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3699             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3700             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3701             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3702             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3703                 int dir, j;
3704                 for(dir=0; dir<2; dir++){
3705                     for(i=0; i<2; i++){
3706                         for(j=0; j<2; j++){
3707                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3708                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3709                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3710                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3711                         }
3712                     }
3713                 }
3714             }
3715         }
3716     }
3717
3718     if (estimate_qp(s, 0) < 0)
3719         return -1;
3720
3721     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3722         s->pict_type == AV_PICTURE_TYPE_I &&
3723         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3724         s->qscale= 3; //reduce clipping problems
3725
3726     if (s->out_format == FMT_MJPEG) {
3727         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3728         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3729
3730         if (s->avctx->intra_matrix) {
3731             chroma_matrix =
3732             luma_matrix = s->avctx->intra_matrix;
3733         }
3734         if (s->avctx->chroma_intra_matrix)
3735             chroma_matrix = s->avctx->chroma_intra_matrix;
3736
3737         /* for mjpeg, we do include qscale in the matrix */
3738         for(i=1;i<64;i++){
3739             int j = s->idsp.idct_permutation[i];
3740
3741             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3742             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3743         }
3744         s->y_dc_scale_table=
3745         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3746         s->chroma_intra_matrix[0] =
3747         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3748         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3749                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3750         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3751                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3752         s->qscale= 8;
3753     }
3754     if(s->codec_id == AV_CODEC_ID_AMV){
3755         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3756         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3757         for(i=1;i<64;i++){
3758             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3759
3760             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3761             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3762         }
3763         s->y_dc_scale_table= y;
3764         s->c_dc_scale_table= c;
3765         s->intra_matrix[0] = 13;
3766         s->chroma_intra_matrix[0] = 14;
3767         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3768                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3769         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3770                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3771         s->qscale= 8;
3772     }
3773
3774     //FIXME var duplication
3775     s->current_picture_ptr->f->key_frame =
3776     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3777     s->current_picture_ptr->f->pict_type =
3778     s->current_picture.f->pict_type = s->pict_type;
3779
3780     if (s->current_picture.f->key_frame)
3781         s->picture_in_gop_number=0;
3782
3783     s->mb_x = s->mb_y = 0;
3784     s->last_bits= put_bits_count(&s->pb);
3785     switch(s->out_format) {
3786     case FMT_MJPEG:
3787         if (CONFIG_MJPEG_ENCODER)
3788             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3789                                            s->intra_matrix, s->chroma_intra_matrix);
3790         break;
3791     case FMT_H261:
3792         if (CONFIG_H261_ENCODER)
3793             ff_h261_encode_picture_header(s, picture_number);
3794         break;
3795     case FMT_H263:
3796         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3797             ff_wmv2_encode_picture_header(s, picture_number);
3798         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3799             ff_msmpeg4_encode_picture_header(s, picture_number);
3800         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3801             ff_mpeg4_encode_picture_header(s, picture_number);
3802         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3803             ret = ff_rv10_encode_picture_header(s, picture_number);
3804             if (ret < 0)
3805                 return ret;
3806         }
3807         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3808             ff_rv20_encode_picture_header(s, picture_number);
3809         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3810             ff_flv_encode_picture_header(s, picture_number);
3811         else if (CONFIG_H263_ENCODER)
3812             ff_h263_encode_picture_header(s, picture_number);
3813         break;
3814     case FMT_MPEG1:
3815         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3816             ff_mpeg1_encode_picture_header(s, picture_number);
3817         break;
3818     default:
3819         av_assert0(0);
3820     }
3821     bits= put_bits_count(&s->pb);
3822     s->header_bits= bits - s->last_bits;
3823
3824     for(i=1; i<context_count; i++){
3825         update_duplicate_context_after_me(s->thread_context[i], s);
3826     }
3827     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3828     for(i=1; i<context_count; i++){
3829         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3830             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3831         merge_context_after_encode(s, s->thread_context[i]);
3832     }
3833     emms_c();
3834     return 0;
3835 }
3836
3837 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3838     const int intra= s->mb_intra;
3839     int i;
3840
3841     s->dct_count[intra]++;
3842
3843     for(i=0; i<64; i++){
3844         int level= block[i];
3845
3846         if(level){
3847             if(level>0){
3848                 s->dct_error_sum[intra][i] += level;
3849                 level -= s->dct_offset[intra][i];
3850                 if(level<0) level=0;
3851             }else{
3852                 s->dct_error_sum[intra][i] -= level;
3853                 level += s->dct_offset[intra][i];
3854                 if(level>0) level=0;
3855             }
3856             block[i]= level;
3857         }
3858     }
3859 }
3860
3861 static int dct_quantize_trellis_c(MpegEncContext *s,
3862                                   int16_t *block, int n,
3863                                   int qscale, int *overflow){
3864     const int *qmat;
3865     const uint16_t *matrix;
3866     const uint8_t *scantable= s->intra_scantable.scantable;
3867     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3868     int max=0;
3869     unsigned int threshold1, threshold2;
3870     int bias=0;
3871     int run_tab[65];
3872     int level_tab[65];
3873     int score_tab[65];
3874     int survivor[65];
3875     int survivor_count;
3876     int last_run=0;
3877     int last_level=0;
3878     int last_score= 0;
3879     int last_i;
3880     int coeff[2][64];
3881     int coeff_count[64];
3882     int qmul, qadd, start_i, last_non_zero, i, dc;
3883     const int esc_length= s->ac_esc_length;
3884     uint8_t * length;
3885     uint8_t * last_length;
3886     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3887     int mpeg2_qscale;
3888
3889     s->fdsp.fdct(block);
3890
3891     if(s->dct_error_sum)
3892         s->denoise_dct(s, block);
3893     qmul= qscale*16;
3894     qadd= ((qscale-1)|1)*8;
3895
3896     if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
3897     else                 mpeg2_qscale = qscale << 1;
3898
3899     if (s->mb_intra) {
3900         int q;
3901         if (!s->h263_aic) {
3902             if (n < 4)
3903                 q = s->y_dc_scale;
3904             else
3905                 q = s->c_dc_scale;
3906             q = q << 3;
3907         } else{
3908             /* For AIC we skip quant/dequant of INTRADC */
3909             q = 1 << 3;
3910             qadd=0;
3911         }
3912
3913         /* note: block[0] is assumed to be positive */
3914         block[0] = (block[0] + (q >> 1)) / q;
3915         start_i = 1;
3916         last_non_zero = 0;
3917         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3918         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3919         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3920             bias= 1<<(QMAT_SHIFT-1);
3921
3922         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3923             length     = s->intra_chroma_ac_vlc_length;
3924             last_length= s->intra_chroma_ac_vlc_last_length;
3925         } else {
3926             length     = s->intra_ac_vlc_length;
3927             last_length= s->intra_ac_vlc_last_length;
3928         }
3929     } else {
3930         start_i = 0;
3931         last_non_zero = -1;
3932         qmat = s->q_inter_matrix[qscale];
3933         matrix = s->inter_matrix;
3934         length     = s->inter_ac_vlc_length;
3935         last_length= s->inter_ac_vlc_last_length;
3936     }
3937     last_i= start_i;
3938
3939     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3940     threshold2= (threshold1<<1);
3941
3942     for(i=63; i>=start_i; i--) {
3943         const int j = scantable[i];
3944         int level = block[j] * qmat[j];
3945
3946         if(((unsigned)(level+threshold1))>threshold2){
3947             last_non_zero = i;
3948             break;
3949         }
3950     }
3951
3952     for(i=start_i; i<=last_non_zero; i++) {
3953         const int j = scantable[i];
3954         int level = block[j] * qmat[j];
3955
3956 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3957 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3958         if(((unsigned)(level+threshold1))>threshold2){
3959             if(level>0){
3960                 level= (bias + level)>>QMAT_SHIFT;
3961                 coeff[0][i]= level;
3962                 coeff[1][i]= level-1;
3963 //                coeff[2][k]= level-2;
3964             }else{
3965                 level= (bias - level)>>QMAT_SHIFT;
3966                 coeff[0][i]= -level;
3967                 coeff[1][i]= -level+1;
3968 //                coeff[2][k]= -level+2;
3969             }
3970             coeff_count[i]= FFMIN(level, 2);
3971             av_assert2(coeff_count[i]);
3972             max |=level;
3973         }else{
3974             coeff[0][i]= (level>>31)|1;
3975             coeff_count[i]= 1;
3976         }
3977     }
3978
3979     *overflow= s->max_qcoeff < max; //overflow might have happened
3980
3981     if(last_non_zero < start_i){
3982         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3983         return last_non_zero;
3984     }
3985
3986     score_tab[start_i]= 0;
3987     survivor[0]= start_i;
3988     survivor_count= 1;
3989
3990     for(i=start_i; i<=last_non_zero; i++){
3991         int level_index, j, zero_distortion;
3992         int dct_coeff= FFABS(block[ scantable[i] ]);
3993         int best_score=256*256*256*120;
3994
3995         if (s->fdsp.fdct == ff_fdct_ifast)
3996             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3997         zero_distortion= dct_coeff*dct_coeff;
3998
3999         for(level_index=0; level_index < coeff_count[i]; level_index++){
4000             int distortion;
4001             int level= coeff[level_index][i];
4002             const int alevel= FFABS(level);
4003             int unquant_coeff;
4004
4005             av_assert2(level);
4006
4007             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4008                 unquant_coeff= alevel*qmul + qadd;
4009             } else if(s->out_format == FMT_MJPEG) {
4010                 j = s->idsp.idct_permutation[scantable[i]];
4011                 unquant_coeff = alevel * matrix[j] * 8;
4012             }else{ //MPEG1
4013                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
4014                 if(s->mb_intra){
4015                         unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
4016                         unquant_coeff =   (unquant_coeff - 1) | 1;
4017                 }else{
4018                         unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
4019                         unquant_coeff =   (unquant_coeff - 1) | 1;
4020                 }
4021                 unquant_coeff<<= 3;
4022             }
4023
4024             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
4025             level+=64;
4026             if((level&(~127)) == 0){
4027                 for(j=survivor_count-1; j>=0; j--){
4028                     int run= i - survivor[j];
4029                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4030                     score += score_tab[i-run];
4031
4032                     if(score < best_score){
4033                         best_score= score;
4034                         run_tab[i+1]= run;
4035                         level_tab[i+1]= level-64;
4036                     }
4037                 }
4038
4039                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4040                     for(j=survivor_count-1; j>=0; j--){
4041                         int run= i - survivor[j];
4042                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4043                         score += score_tab[i-run];
4044                         if(score < last_score){
4045                             last_score= score;
4046                             last_run= run;
4047                             last_level= level-64;
4048                             last_i= i+1;
4049                         }
4050                     }
4051                 }
4052             }else{
4053                 distortion += esc_length*lambda;
4054                 for(j=survivor_count-1; j>=0; j--){
4055                     int run= i - survivor[j];
4056                     int score= distortion + score_tab[i-run];
4057
4058                     if(score < best_score){
4059                         best_score= score;
4060                         run_tab[i+1]= run;
4061                         level_tab[i+1]= level-64;
4062                     }
4063                 }
4064
4065                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4066                   for(j=survivor_count-1; j>=0; j--){
4067                         int run= i - survivor[j];
4068                         int score= distortion + score_tab[i-run];
4069                         if(score < last_score){
4070                             last_score= score;
4071                             last_run= run;
4072                             last_level= level-64;
4073                             last_i= i+1;
4074                         }
4075                     }
4076                 }
4077             }
4078         }
4079
4080         score_tab[i+1]= best_score;
4081
4082         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4083         if(last_non_zero <= 27){
4084             for(; survivor_count; survivor_count--){
4085                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4086                     break;
4087             }
4088         }else{
4089             for(; survivor_count; survivor_count--){
4090                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4091                     break;
4092             }
4093         }
4094
4095         survivor[ survivor_count++ ]= i+1;
4096     }
4097
4098     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4099         last_score= 256*256*256*120;
4100         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4101             int score= score_tab[i];
4102             if(i) score += lambda*2; //FIXME exacter?
4103
4104             if(score < last_score){
4105                 last_score= score;
4106                 last_i= i;
4107                 last_level= level_tab[i];
4108                 last_run= run_tab[i];
4109             }
4110         }
4111     }
4112
4113     s->coded_score[n] = last_score;
4114
4115     dc= FFABS(block[0]);
4116     last_non_zero= last_i - 1;
4117     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4118
4119     if(last_non_zero < start_i)
4120         return last_non_zero;
4121
4122     if(last_non_zero == 0 && start_i == 0){
4123         int best_level= 0;
4124         int best_score= dc * dc;
4125
4126         for(i=0; i<coeff_count[0]; i++){
4127             int level= coeff[i][0];
4128             int alevel= FFABS(level);
4129             int unquant_coeff, score, distortion;
4130
4131             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4132                     unquant_coeff= (alevel*qmul + qadd)>>3;
4133             }else{ //MPEG1
4134                     unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
4135                     unquant_coeff =   (unquant_coeff - 1) | 1;
4136             }
4137             unquant_coeff = (unquant_coeff + 4) >> 3;
4138             unquant_coeff<<= 3 + 3;
4139
4140             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4141             level+=64;
4142             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4143             else                    score= distortion + esc_length*lambda;
4144
4145             if(score < best_score){
4146                 best_score= score;
4147                 best_level= level - 64;
4148             }
4149         }
4150         block[0]= best_level;
4151         s->coded_score[n] = best_score - dc*dc;
4152         if(best_level == 0) return -1;
4153         else                return last_non_zero;
4154     }
4155
4156     i= last_i;
4157     av_assert2(last_level);
4158
4159     block[ perm_scantable[last_non_zero] ]= last_level;
4160     i -= last_run + 1;
4161
4162     for(; i>start_i; i -= run_tab[i] + 1){
4163         block[ perm_scantable[i-1] ]= level_tab[i];
4164     }
4165
4166     return last_non_zero;
4167 }
4168
4169 //#define REFINE_STATS 1
4170 static int16_t basis[64][64];
4171
4172 static void build_basis(uint8_t *perm){
4173     int i, j, x, y;
4174     emms_c();
4175     for(i=0; i<8; i++){
4176         for(j=0; j<8; j++){
4177             for(y=0; y<8; y++){
4178                 for(x=0; x<8; x++){
4179                     double s= 0.25*(1<<BASIS_SHIFT);
4180                     int index= 8*i + j;
4181                     int perm_index= perm[index];
4182                     if(i==0) s*= sqrt(0.5);
4183                     if(j==0) s*= sqrt(0.5);
4184                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4185                 }
4186             }
4187         }
4188     }
4189 }
4190
4191 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4192                         int16_t *block, int16_t *weight, int16_t *orig,
4193                         int n, int qscale){
4194     int16_t rem[64];
4195     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4196     const uint8_t *scantable= s->intra_scantable.scantable;
4197     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4198 //    unsigned int threshold1, threshold2;
4199 //    int bias=0;
4200     int run_tab[65];
4201     int prev_run=0;
4202     int prev_level=0;
4203     int qmul, qadd, start_i, last_non_zero, i, dc;
4204     uint8_t * length;
4205     uint8_t * last_length;
4206     int lambda;
4207     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4208 #ifdef REFINE_STATS
4209 static int count=0;
4210 static int after_last=0;
4211 static int to_zero=0;
4212 static int from_zero=0;
4213 static int raise=0;
4214 static int lower=0;
4215 static int messed_sign=0;
4216 #endif
4217
4218     if(basis[0][0] == 0)
4219         build_basis(s->idsp.idct_permutation);
4220
4221     qmul= qscale*2;
4222     qadd= (qscale-1)|1;
4223     if (s->mb_intra) {
4224         if (!s->h263_aic) {
4225             if (n < 4)
4226                 q = s->y_dc_scale;
4227             else
4228                 q = s->c_dc_scale;
4229         } else{
4230             /* For AIC we skip quant/dequant of INTRADC */
4231             q = 1;
4232             qadd=0;
4233         }
4234         q <<= RECON_SHIFT-3;
4235         /* note: block[0] is assumed to be positive */
4236         dc= block[0]*q;
4237 //        block[0] = (block[0] + (q >> 1)) / q;
4238         start_i = 1;
4239 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4240 //            bias= 1<<(QMAT_SHIFT-1);
4241         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4242             length     = s->intra_chroma_ac_vlc_length;
4243             last_length= s->intra_chroma_ac_vlc_last_length;
4244         } else {
4245             length     = s->intra_ac_vlc_length;
4246             last_length= s->intra_ac_vlc_last_length;
4247         }
4248     } else {
4249         dc= 0;
4250         start_i = 0;
4251         length     = s->inter_ac_vlc_length;
4252         last_length= s->inter_ac_vlc_last_length;
4253     }
4254     last_non_zero = s->block_last_index[n];
4255
4256 #ifdef REFINE_STATS
4257 {START_TIMER
4258 #endif
4259     dc += (1<<(RECON_SHIFT-1));
4260     for(i=0; i<64; i++){
4261         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4262     }
4263 #ifdef REFINE_STATS
4264 STOP_TIMER("memset rem[]")}
4265 #endif
4266     sum=0;
4267     for(i=0; i<64; i++){
4268         int one= 36;
4269         int qns=4;
4270         int w;
4271
4272         w= FFABS(weight[i]) + qns*one;
4273         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4274
4275         weight[i] = w;
4276 //        w=weight[i] = (63*qns + (w/2)) / w;
4277
4278         av_assert2(w>0);
4279         av_assert2(w<(1<<6));
4280         sum += w*w;
4281     }
4282     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4283 #ifdef REFINE_STATS
4284 {START_TIMER
4285 #endif
4286     run=0;
4287     rle_index=0;
4288     for(i=start_i; i<=last_non_zero; i++){
4289         int j= perm_scantable[i];
4290         const int level= block[j];
4291         int coeff;
4292
4293         if(level){
4294             if(level<0) coeff= qmul*level - qadd;
4295             else        coeff= qmul*level + qadd;
4296             run_tab[rle_index++]=run;
4297             run=0;
4298
4299             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4300         }else{
4301             run++;
4302         }
4303     }
4304 #ifdef REFINE_STATS
4305 if(last_non_zero>0){
4306 STOP_TIMER("init rem[]")
4307 }
4308 }
4309
4310 {START_TIMER
4311 #endif
4312     for(;;){
4313         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4314         int best_coeff=0;
4315         int best_change=0;
4316         int run2, best_unquant_change=0, analyze_gradient;
4317 #ifdef REFINE_STATS
4318 {START_TIMER
4319 #endif
4320         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4321
4322         if(analyze_gradient){
4323 #ifdef REFINE_STATS
4324 {START_TIMER
4325 #endif
4326             for(i=0; i<64; i++){
4327                 int w= weight[i];
4328
4329                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4330             }
4331 #ifdef REFINE_STATS
4332 STOP_TIMER("rem*w*w")}
4333 {START_TIMER
4334 #endif
4335             s->fdsp.fdct(d1);
4336 #ifdef REFINE_STATS
4337 STOP_TIMER("dct")}
4338 #endif
4339         }
4340
4341         if(start_i){
4342             const int level= block[0];
4343             int change, old_coeff;
4344
4345             av_assert2(s->mb_intra);
4346
4347             old_coeff= q*level;
4348
4349             for(change=-1; change<=1; change+=2){
4350                 int new_level= level + change;
4351                 int score, new_coeff;
4352
4353                 new_coeff= q*new_level;
4354                 if(new_coeff >= 2048 || new_coeff < 0)
4355                     continue;
4356
4357                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4358                                                   new_coeff - old_coeff);
4359                 if(score<best_score){
4360                     best_score= score;
4361                     best_coeff= 0;
4362                     best_change= change;
4363                     best_unquant_change= new_coeff - old_coeff;
4364                 }
4365             }
4366         }
4367
4368         run=0;
4369         rle_index=0;
4370         run2= run_tab[rle_index++];
4371         prev_level=0;
4372         prev_run=0;
4373
4374         for(i=start_i; i<64; i++){
4375             int j= perm_scantable[i];
4376             const int level= block[j];
4377             int change, old_coeff;
4378
4379             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4380                 break;
4381
4382             if(level){
4383                 if(level<0) old_coeff= qmul*level - qadd;
4384                 else        old_coeff= qmul*level + qadd;
4385                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4386             }else{
4387                 old_coeff=0;
4388                 run2--;
4389                 av_assert2(run2>=0 || i >= last_non_zero );
4390             }
4391
4392             for(change=-1; change<=1; change+=2){
4393                 int new_level= level + change;
4394                 int score, new_coeff, unquant_change;
4395
4396                 score=0;
4397                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4398                    continue;
4399
4400                 if(new_level){
4401                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4402                     else            new_coeff= qmul*new_level + qadd;
4403                     if(new_coeff >= 2048 || new_coeff <= -2048)
4404                         continue;
4405                     //FIXME check for overflow
4406
4407                     if(level){
4408                         if(level < 63 && level > -63){
4409                             if(i < last_non_zero)
4410                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4411                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4412                             else
4413                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4414                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4415                         }
4416                     }else{
4417                         av_assert2(FFABS(new_level)==1);
4418
4419                         if(analyze_gradient){
4420                             int g= d1[ scantable[i] ];
4421                             if(g && (g^new_level) >= 0)
4422                                 continue;
4423                         }
4424
4425                         if(i < last_non_zero){
4426                             int next_i= i + run2 + 1;
4427                             int next_level= block[ perm_scantable[next_i] ] + 64;
4428
4429                             if(next_level&(~127))
4430                                 next_level= 0;
4431
4432                             if(next_i < last_non_zero)
4433                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4434                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4435                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4436                             else
4437                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4438                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4439                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4440                         }else{
4441                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4442                             if(prev_level){
4443                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4444                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4445                             }
4446                         }
4447                     }
4448                 }else{
4449                     new_coeff=0;
4450                     av_assert2(FFABS(level)==1);
4451
4452                     if(i < last_non_zero){
4453                         int next_i= i + run2 + 1;
4454                         int next_level= block[ perm_scantable[next_i] ] + 64;
4455
4456                         if(next_level&(~127))
4457                             next_level= 0;
4458
4459                         if(next_i < last_non_zero)
4460                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4461                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4462                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4463                         else
4464                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4465                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4466                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4467                     }else{
4468                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4469                         if(prev_level){
4470                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4471                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4472                         }
4473                     }
4474                 }
4475
4476                 score *= lambda;
4477
4478                 unquant_change= new_coeff - old_coeff;
4479                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4480
4481                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4482                                                    unquant_change);
4483                 if(score<best_score){
4484                     best_score= score;
4485                     best_coeff= i;
4486                     best_change= change;
4487                     best_unquant_change= unquant_change;
4488                 }
4489             }
4490             if(level){
4491                 prev_level= level + 64;
4492                 if(prev_level&(~127))
4493                     prev_level= 0;
4494                 prev_run= run;
4495                 run=0;
4496             }else{
4497                 run++;
4498             }
4499         }
4500 #ifdef REFINE_STATS
4501 STOP_TIMER("iterative step")}
4502 #endif
4503
4504         if(best_change){
4505             int j= perm_scantable[ best_coeff ];
4506
4507             block[j] += best_change;
4508
4509             if(best_coeff > last_non_zero){
4510                 last_non_zero= best_coeff;
4511                 av_assert2(block[j]);
4512 #ifdef REFINE_STATS
4513 after_last++;
4514 #endif
4515             }else{
4516 #ifdef REFINE_STATS
4517 if(block[j]){
4518     if(block[j] - best_change){
4519         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4520             raise++;
4521         }else{
4522             lower++;
4523         }
4524     }else{
4525         from_zero++;
4526     }
4527 }else{
4528     to_zero++;
4529 }
4530 #endif
4531                 for(; last_non_zero>=start_i; last_non_zero--){
4532                     if(block[perm_scantable[last_non_zero]])
4533                         break;
4534                 }
4535             }
4536 #ifdef REFINE_STATS
4537 count++;
4538 if(256*256*256*64 % count == 0){
4539     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4540 }
4541 #endif
4542             run=0;
4543             rle_index=0;
4544             for(i=start_i; i<=last_non_zero; i++){
4545                 int j= perm_scantable[i];
4546                 const int level= block[j];
4547
4548                  if(level){
4549                      run_tab[rle_index++]=run;
4550                      run=0;
4551                  }else{
4552                      run++;
4553                  }
4554             }
4555
4556             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4557         }else{
4558             break;
4559         }
4560     }
4561 #ifdef REFINE_STATS
4562 if(last_non_zero>0){
4563 STOP_TIMER("iterative search")
4564 }
4565 }
4566 #endif
4567
4568     return last_non_zero;
4569 }
4570
4571 /**
4572  * Permute an 8x8 block according to permuatation.
4573  * @param block the block which will be permuted according to
4574  *              the given permutation vector
4575  * @param permutation the permutation vector
4576  * @param last the last non zero coefficient in scantable order, used to
4577  *             speed the permutation up
4578  * @param scantable the used scantable, this is only used to speed the
4579  *                  permutation up, the block is not (inverse) permutated
4580  *                  to scantable order!
4581  */
4582 void ff_block_permute(int16_t *block, uint8_t *permutation,
4583                       const uint8_t *scantable, int last)
4584 {
4585     int i;
4586     int16_t temp[64];
4587
4588     if (last <= 0)
4589         return;
4590     //FIXME it is ok but not clean and might fail for some permutations
4591     // if (permutation[1] == 1)
4592     // return;
4593
4594     for (i = 0; i <= last; i++) {
4595         const int j = scantable[i];
4596         temp[j] = block[j];
4597         block[j] = 0;
4598     }
4599
4600     for (i = 0; i <= last; i++) {
4601         const int j = scantable[i];
4602         const int perm_j = permutation[j];
4603         block[perm_j] = temp[j];
4604     }
4605 }
4606
4607 int ff_dct_quantize_c(MpegEncContext *s,
4608                         int16_t *block, int n,
4609                         int qscale, int *overflow)
4610 {
4611     int i, j, level, last_non_zero, q, start_i;
4612     const int *qmat;
4613     const uint8_t *scantable= s->intra_scantable.scantable;
4614     int bias;
4615     int max=0;
4616     unsigned int threshold1, threshold2;
4617
4618     s->fdsp.fdct(block);
4619
4620     if(s->dct_error_sum)
4621         s->denoise_dct(s, block);
4622
4623     if (s->mb_intra) {
4624         if (!s->h263_aic) {
4625             if (n < 4)
4626                 q = s->y_dc_scale;
4627             else
4628                 q = s->c_dc_scale;
4629             q = q << 3;
4630         } else
4631             /* For AIC we skip quant/dequant of INTRADC */
4632             q = 1 << 3;
4633
4634         /* note: block[0] is assumed to be positive */
4635         block[0] = (block[0] + (q >> 1)) / q;
4636         start_i = 1;
4637         last_non_zero = 0;
4638         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4639         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4640     } else {
4641         start_i = 0;
4642         last_non_zero = -1;
4643         qmat = s->q_inter_matrix[qscale];
4644         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4645     }
4646     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4647     threshold2= (threshold1<<1);
4648     for(i=63;i>=start_i;i--) {
4649         j = scantable[i];
4650         level = block[j] * qmat[j];
4651
4652         if(((unsigned)(level+threshold1))>threshold2){
4653             last_non_zero = i;
4654             break;
4655         }else{
4656             block[j]=0;
4657         }
4658     }
4659     for(i=start_i; i<=last_non_zero; i++) {
4660         j = scantable[i];
4661         level = block[j] * qmat[j];
4662
4663 //        if(   bias+level >= (1<<QMAT_SHIFT)
4664 //           || bias-level >= (1<<QMAT_SHIFT)){
4665         if(((unsigned)(level+threshold1))>threshold2){
4666             if(level>0){
4667                 level= (bias + level)>>QMAT_SHIFT;
4668                 block[j]= level;
4669             }else{
4670                 level= (bias - level)>>QMAT_SHIFT;
4671                 block[j]= -level;
4672             }
4673             max |=level;
4674         }else{
4675             block[j]=0;
4676         }
4677     }
4678     *overflow= s->max_qcoeff < max; //overflow might have happened
4679
4680     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4681     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4682         ff_block_permute(block, s->idsp.idct_permutation,
4683                       scantable, last_non_zero);
4684
4685     return last_non_zero;
4686 }
4687
4688 #define OFFSET(x) offsetof(MpegEncContext, x)
4689 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4690 static const AVOption h263_options[] = {
4691     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4692     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4693     FF_MPV_COMMON_OPTS
4694     { NULL },
4695 };
4696
4697 static const AVClass h263_class = {
4698     .class_name = "H.263 encoder",
4699     .item_name  = av_default_item_name,
4700     .option     = h263_options,
4701     .version    = LIBAVUTIL_VERSION_INT,
4702 };
4703
4704 AVCodec ff_h263_encoder = {
4705     .name           = "h263",
4706     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4707     .type           = AVMEDIA_TYPE_VIDEO,
4708     .id             = AV_CODEC_ID_H263,
4709     .priv_data_size = sizeof(MpegEncContext),
4710     .init           = ff_mpv_encode_init,
4711     .encode2        = ff_mpv_encode_picture,
4712     .close          = ff_mpv_encode_end,
4713     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4714     .priv_class     = &h263_class,
4715 };
4716
4717 static const AVOption h263p_options[] = {
4718     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus),       AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4719     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4720     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4721     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE},
4722     FF_MPV_COMMON_OPTS
4723     { NULL },
4724 };
4725 static const AVClass h263p_class = {
4726     .class_name = "H.263p encoder",
4727     .item_name  = av_default_item_name,
4728     .option     = h263p_options,
4729     .version    = LIBAVUTIL_VERSION_INT,
4730 };
4731
4732 AVCodec ff_h263p_encoder = {
4733     .name           = "h263p",
4734     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4735     .type           = AVMEDIA_TYPE_VIDEO,
4736     .id             = AV_CODEC_ID_H263P,
4737     .priv_data_size = sizeof(MpegEncContext),
4738     .init           = ff_mpv_encode_init,
4739     .encode2        = ff_mpv_encode_picture,
4740     .close          = ff_mpv_encode_end,
4741     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4742     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4743     .priv_class     = &h263p_class,
4744 };
4745
4746 static const AVClass msmpeg4v2_class = {
4747     .class_name = "msmpeg4v2 encoder",
4748     .item_name  = av_default_item_name,
4749     .option     = ff_mpv_generic_options,
4750     .version    = LIBAVUTIL_VERSION_INT,
4751 };
4752
4753 AVCodec ff_msmpeg4v2_encoder = {
4754     .name           = "msmpeg4v2",
4755     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4756     .type           = AVMEDIA_TYPE_VIDEO,
4757     .id             = AV_CODEC_ID_MSMPEG4V2,
4758     .priv_data_size = sizeof(MpegEncContext),
4759     .init           = ff_mpv_encode_init,
4760     .encode2        = ff_mpv_encode_picture,
4761     .close          = ff_mpv_encode_end,
4762     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4763     .priv_class     = &msmpeg4v2_class,
4764 };
4765
4766 static const AVClass msmpeg4v3_class = {
4767     .class_name = "msmpeg4v3 encoder",
4768     .item_name  = av_default_item_name,
4769     .option     = ff_mpv_generic_options,
4770     .version    = LIBAVUTIL_VERSION_INT,
4771 };
4772
4773 AVCodec ff_msmpeg4v3_encoder = {
4774     .name           = "msmpeg4",
4775     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4776     .type           = AVMEDIA_TYPE_VIDEO,
4777     .id             = AV_CODEC_ID_MSMPEG4V3,
4778     .priv_data_size = sizeof(MpegEncContext),
4779     .init           = ff_mpv_encode_init,
4780     .encode2        = ff_mpv_encode_picture,
4781     .close          = ff_mpv_encode_end,
4782     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4783     .priv_class     = &msmpeg4v3_class,
4784 };
4785
4786 static const AVClass wmv1_class = {
4787     .class_name = "wmv1 encoder",
4788     .item_name  = av_default_item_name,
4789     .option     = ff_mpv_generic_options,
4790     .version    = LIBAVUTIL_VERSION_INT,
4791 };
4792
4793 AVCodec ff_wmv1_encoder = {
4794     .name           = "wmv1",
4795     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4796     .type           = AVMEDIA_TYPE_VIDEO,
4797     .id             = AV_CODEC_ID_WMV1,
4798     .priv_data_size = sizeof(MpegEncContext),
4799     .init           = ff_mpv_encode_init,
4800     .encode2        = ff_mpv_encode_picture,
4801     .close          = ff_mpv_encode_end,
4802     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4803     .priv_class     = &wmv1_class,
4804 };