]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit '8ad5124b7ecf7f727724e270a7b4bb8c7bcbf6a4'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /*
26  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
27  */
28
29 /**
30  * @file
31  * The simplest mpeg encoder (well, it was the simplest!).
32  */
33
34 #include <stdint.h>
35
36 #include "libavutil/internal.h"
37 #include "libavutil/intmath.h"
38 #include "libavutil/mathematics.h"
39 #include "libavutil/pixdesc.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/timer.h"
42 #include "avcodec.h"
43 #include "dct.h"
44 #include "idctdsp.h"
45 #include "mpeg12.h"
46 #include "mpegvideo.h"
47 #include "mpegvideodata.h"
48 #include "h261.h"
49 #include "h263.h"
50 #include "h263data.h"
51 #include "mjpegenc_common.h"
52 #include "mathops.h"
53 #include "mpegutils.h"
54 #include "mjpegenc.h"
55 #include "msmpeg4.h"
56 #include "pixblockdsp.h"
57 #include "qpeldsp.h"
58 #include "faandct.h"
59 #include "thread.h"
60 #include "aandcttab.h"
61 #include "flv.h"
62 #include "mpeg4video.h"
63 #include "internal.h"
64 #include "bytestream.h"
65 #include "wmv2.h"
66 #include "rv10.h"
67 #include <limits.h>
68 #include "sp5x.h"
69
70 #define QUANT_BIAS_SHIFT 8
71
72 #define QMAT_SHIFT_MMX 16
73 #define QMAT_SHIFT 21
74
75 static int encode_picture(MpegEncContext *s, int picture_number);
76 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
77 static int sse_mb(MpegEncContext *s);
78 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
79 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
80
81 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
82 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
83
84 const AVOption ff_mpv_generic_options[] = {
85     FF_MPV_COMMON_OPTS
86     { NULL },
87 };
88
89 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
90                        uint16_t (*qmat16)[2][64],
91                        const uint16_t *quant_matrix,
92                        int bias, int qmin, int qmax, int intra)
93 {
94     FDCTDSPContext *fdsp = &s->fdsp;
95     int qscale;
96     int shift = 0;
97
98     for (qscale = qmin; qscale <= qmax; qscale++) {
99         int i;
100         int qscale2;
101
102         if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
103         else                 qscale2 = qscale << 1;
104
105         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
106 #if CONFIG_FAANDCT
107             fdsp->fdct == ff_faandct            ||
108 #endif /* CONFIG_FAANDCT */
109             fdsp->fdct == ff_jpeg_fdct_islow_10) {
110             for (i = 0; i < 64; i++) {
111                 const int j = s->idsp.idct_permutation[i];
112                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
113                 /* 16 <= qscale * quant_matrix[i] <= 7905
114                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
115                  *             19952 <=              x  <= 249205026
116                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
117                  *           3444240 >= (1 << 36) / (x) >= 275 */
118
119                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
120             }
121         } else if (fdsp->fdct == ff_fdct_ifast) {
122             for (i = 0; i < 64; i++) {
123                 const int j = s->idsp.idct_permutation[i];
124                 int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
125                 /* 16 <= qscale * quant_matrix[i] <= 7905
126                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
127                  *             19952 <=              x  <= 249205026
128                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
129                  *           3444240 >= (1 << 36) / (x) >= 275 */
130
131                 qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
132             }
133         } else {
134             for (i = 0; i < 64; i++) {
135                 const int j = s->idsp.idct_permutation[i];
136                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
137                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
138                  * Assume x = qscale * quant_matrix[i]
139                  * So             16 <=              x  <= 7905
140                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
141                  * so          32768 >= (1 << 19) / (x) >= 67 */
142                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
143                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
144                 //                    (qscale * quant_matrix[i]);
145                 qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
146
147                 if (qmat16[qscale][0][i] == 0 ||
148                     qmat16[qscale][0][i] == 128 * 256)
149                     qmat16[qscale][0][i] = 128 * 256 - 1;
150                 qmat16[qscale][1][i] =
151                     ROUNDED_DIV(bias * (1<<(16 - QUANT_BIAS_SHIFT)),
152                                 qmat16[qscale][0][i]);
153             }
154         }
155
156         for (i = intra; i < 64; i++) {
157             int64_t max = 8191;
158             if (fdsp->fdct == ff_fdct_ifast) {
159                 max = (8191LL * ff_aanscales[i]) >> 14;
160             }
161             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
162                 shift++;
163             }
164         }
165     }
166     if (shift) {
167         av_log(NULL, AV_LOG_INFO,
168                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
169                QMAT_SHIFT - shift);
170     }
171 }
172
173 static inline void update_qscale(MpegEncContext *s)
174 {
175     if (s->q_scale_type == 1 && 0) {
176         int i;
177         int bestdiff=INT_MAX;
178         int best = 1;
179
180         for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
181             int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
182             if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
183                 (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
184                 continue;
185             if (diff < bestdiff) {
186                 bestdiff = diff;
187                 best = i;
188             }
189         }
190         s->qscale = best;
191     } else {
192         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
193                     (FF_LAMBDA_SHIFT + 7);
194         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
195     }
196
197     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
198                  FF_LAMBDA_SHIFT;
199 }
200
201 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
202 {
203     int i;
204
205     if (matrix) {
206         put_bits(pb, 1, 1);
207         for (i = 0; i < 64; i++) {
208             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
209         }
210     } else
211         put_bits(pb, 1, 0);
212 }
213
214 /**
215  * init s->current_picture.qscale_table from s->lambda_table
216  */
217 void ff_init_qscale_tab(MpegEncContext *s)
218 {
219     int8_t * const qscale_table = s->current_picture.qscale_table;
220     int i;
221
222     for (i = 0; i < s->mb_num; i++) {
223         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
224         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
225         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
226                                                   s->avctx->qmax);
227     }
228 }
229
230 static void update_duplicate_context_after_me(MpegEncContext *dst,
231                                               MpegEncContext *src)
232 {
233 #define COPY(a) dst->a= src->a
234     COPY(pict_type);
235     COPY(current_picture);
236     COPY(f_code);
237     COPY(b_code);
238     COPY(qscale);
239     COPY(lambda);
240     COPY(lambda2);
241     COPY(picture_in_gop_number);
242     COPY(gop_picture_number);
243     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
244     COPY(progressive_frame);    // FIXME don't set in encode_header
245     COPY(partitioned_frame);    // FIXME don't set in encode_header
246 #undef COPY
247 }
248
249 /**
250  * Set the given MpegEncContext to defaults for encoding.
251  * the changed fields will not depend upon the prior state of the MpegEncContext.
252  */
253 static void mpv_encode_defaults(MpegEncContext *s)
254 {
255     int i;
256     ff_mpv_common_defaults(s);
257
258     for (i = -16; i < 16; i++) {
259         default_fcode_tab[i + MAX_MV] = 1;
260     }
261     s->me.mv_penalty = default_mv_penalty;
262     s->fcode_tab     = default_fcode_tab;
263
264     s->input_picture_number  = 0;
265     s->picture_in_gop_number = 0;
266 }
267
268 av_cold int ff_dct_encode_init(MpegEncContext *s) {
269     if (ARCH_X86)
270         ff_dct_encode_init_x86(s);
271
272     if (CONFIG_H263_ENCODER)
273         ff_h263dsp_init(&s->h263dsp);
274     if (!s->dct_quantize)
275         s->dct_quantize = ff_dct_quantize_c;
276     if (!s->denoise_dct)
277         s->denoise_dct  = denoise_dct_c;
278     s->fast_dct_quantize = s->dct_quantize;
279     if (s->avctx->trellis)
280         s->dct_quantize  = dct_quantize_trellis_c;
281
282     return 0;
283 }
284
285 /* init video encoder */
286 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
287 {
288     MpegEncContext *s = avctx->priv_data;
289     int i, ret, format_supported;
290
291     mpv_encode_defaults(s);
292
293     switch (avctx->codec_id) {
294     case AV_CODEC_ID_MPEG2VIDEO:
295         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
296             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
297             av_log(avctx, AV_LOG_ERROR,
298                    "only YUV420 and YUV422 are supported\n");
299             return -1;
300         }
301         break;
302     case AV_CODEC_ID_MJPEG:
303     case AV_CODEC_ID_AMV:
304         format_supported = 0;
305         /* JPEG color space */
306         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
307             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
308             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
309             (avctx->color_range == AVCOL_RANGE_JPEG &&
310              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
311               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
312               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
313             format_supported = 1;
314         /* MPEG color space */
315         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
316                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
317                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
318                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
319             format_supported = 1;
320
321         if (!format_supported) {
322             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
323             return -1;
324         }
325         break;
326     default:
327         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
328             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
329             return -1;
330         }
331     }
332
333     switch (avctx->pix_fmt) {
334     case AV_PIX_FMT_YUVJ444P:
335     case AV_PIX_FMT_YUV444P:
336         s->chroma_format = CHROMA_444;
337         break;
338     case AV_PIX_FMT_YUVJ422P:
339     case AV_PIX_FMT_YUV422P:
340         s->chroma_format = CHROMA_422;
341         break;
342     case AV_PIX_FMT_YUVJ420P:
343     case AV_PIX_FMT_YUV420P:
344     default:
345         s->chroma_format = CHROMA_420;
346         break;
347     }
348
349     s->bit_rate = avctx->bit_rate;
350     s->width    = avctx->width;
351     s->height   = avctx->height;
352     if (avctx->gop_size > 600 &&
353         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
354         av_log(avctx, AV_LOG_WARNING,
355                "keyframe interval too large!, reducing it from %d to %d\n",
356                avctx->gop_size, 600);
357         avctx->gop_size = 600;
358     }
359     s->gop_size     = avctx->gop_size;
360     s->avctx        = avctx;
361     if (avctx->max_b_frames > MAX_B_FRAMES) {
362         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
363                "is %d.\n", MAX_B_FRAMES);
364         avctx->max_b_frames = MAX_B_FRAMES;
365     }
366     s->max_b_frames = avctx->max_b_frames;
367     s->codec_id     = avctx->codec->id;
368     s->strict_std_compliance = avctx->strict_std_compliance;
369     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
370     s->mpeg_quant         = avctx->mpeg_quant;
371     s->rtp_mode           = !!avctx->rtp_payload_size;
372     s->intra_dc_precision = avctx->intra_dc_precision;
373
374     // workaround some differences between how applications specify dc precision
375     if (s->intra_dc_precision < 0) {
376         s->intra_dc_precision += 8;
377     } else if (s->intra_dc_precision >= 8)
378         s->intra_dc_precision -= 8;
379
380     if (s->intra_dc_precision < 0) {
381         av_log(avctx, AV_LOG_ERROR,
382                 "intra dc precision must be positive, note some applications use"
383                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
384         return AVERROR(EINVAL);
385     }
386
387     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
388         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
389         return AVERROR(EINVAL);
390     }
391     s->user_specified_pts = AV_NOPTS_VALUE;
392
393     if (s->gop_size <= 1) {
394         s->intra_only = 1;
395         s->gop_size   = 12;
396     } else {
397         s->intra_only = 0;
398     }
399
400 #if FF_API_MOTION_EST
401 FF_DISABLE_DEPRECATION_WARNINGS
402     s->me_method = avctx->me_method;
403 FF_ENABLE_DEPRECATION_WARNINGS
404 #endif
405
406     /* Fixed QSCALE */
407     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
408
409 #if FF_API_MPV_OPT
410     FF_DISABLE_DEPRECATION_WARNINGS
411     if (avctx->border_masking != 0.0)
412         s->border_masking = avctx->border_masking;
413     FF_ENABLE_DEPRECATION_WARNINGS
414 #endif
415
416     s->adaptive_quant = (s->avctx->lumi_masking ||
417                          s->avctx->dark_masking ||
418                          s->avctx->temporal_cplx_masking ||
419                          s->avctx->spatial_cplx_masking  ||
420                          s->avctx->p_masking      ||
421                          s->border_masking ||
422                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
423                         !s->fixed_qscale;
424
425     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
426
427     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
428         switch(avctx->codec_id) {
429         case AV_CODEC_ID_MPEG1VIDEO:
430         case AV_CODEC_ID_MPEG2VIDEO:
431             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
432             break;
433         case AV_CODEC_ID_MPEG4:
434         case AV_CODEC_ID_MSMPEG4V1:
435         case AV_CODEC_ID_MSMPEG4V2:
436         case AV_CODEC_ID_MSMPEG4V3:
437             if       (avctx->rc_max_rate >= 15000000) {
438                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
439             } else if(avctx->rc_max_rate >=  2000000) {
440                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
441             } else if(avctx->rc_max_rate >=   384000) {
442                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
443             } else
444                 avctx->rc_buffer_size = 40;
445             avctx->rc_buffer_size *= 16384;
446             break;
447         }
448         if (avctx->rc_buffer_size) {
449             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
450         }
451     }
452
453     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
454         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
455         return -1;
456     }
457
458     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
459         av_log(avctx, AV_LOG_INFO,
460                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
461     }
462
463     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
464         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
465         return -1;
466     }
467
468     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
469         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
470         return -1;
471     }
472
473     if (avctx->rc_max_rate &&
474         avctx->rc_max_rate == avctx->bit_rate &&
475         avctx->rc_max_rate != avctx->rc_min_rate) {
476         av_log(avctx, AV_LOG_INFO,
477                "impossible bitrate constraints, this will fail\n");
478     }
479
480     if (avctx->rc_buffer_size &&
481         avctx->bit_rate * (int64_t)avctx->time_base.num >
482             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
483         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
484         return -1;
485     }
486
487     if (!s->fixed_qscale &&
488         avctx->bit_rate * av_q2d(avctx->time_base) >
489             avctx->bit_rate_tolerance) {
490         av_log(avctx, AV_LOG_WARNING,
491                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, (int64_t)avctx->bit_rate);
492         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
493     }
494
495     if (s->avctx->rc_max_rate &&
496         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
497         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
498          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
499         90000LL * (avctx->rc_buffer_size - 1) >
500             s->avctx->rc_max_rate * 0xFFFFLL) {
501         av_log(avctx, AV_LOG_INFO,
502                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
503                "specified vbv buffer is too large for the given bitrate!\n");
504     }
505
506     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
507         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
508         s->codec_id != AV_CODEC_ID_FLV1) {
509         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
510         return -1;
511     }
512
513     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
514         av_log(avctx, AV_LOG_ERROR,
515                "OBMC is only supported with simple mb decision\n");
516         return -1;
517     }
518
519     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
520         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
521         return -1;
522     }
523
524     if (s->max_b_frames                    &&
525         s->codec_id != AV_CODEC_ID_MPEG4      &&
526         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
527         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
528         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
529         return -1;
530     }
531     if (s->max_b_frames < 0) {
532         av_log(avctx, AV_LOG_ERROR,
533                "max b frames must be 0 or positive for mpegvideo based encoders\n");
534         return -1;
535     }
536
537     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
538          s->codec_id == AV_CODEC_ID_H263  ||
539          s->codec_id == AV_CODEC_ID_H263P) &&
540         (avctx->sample_aspect_ratio.num > 255 ||
541          avctx->sample_aspect_ratio.den > 255)) {
542         av_log(avctx, AV_LOG_WARNING,
543                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
544                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
545         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
546                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
547     }
548
549     if ((s->codec_id == AV_CODEC_ID_H263  ||
550          s->codec_id == AV_CODEC_ID_H263P) &&
551         (avctx->width  > 2048 ||
552          avctx->height > 1152 )) {
553         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
554         return -1;
555     }
556     if ((s->codec_id == AV_CODEC_ID_H263  ||
557          s->codec_id == AV_CODEC_ID_H263P) &&
558         ((avctx->width &3) ||
559          (avctx->height&3) )) {
560         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
561         return -1;
562     }
563
564     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
565         (avctx->width  > 4095 ||
566          avctx->height > 4095 )) {
567         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
568         return -1;
569     }
570
571     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
572         (avctx->width  > 16383 ||
573          avctx->height > 16383 )) {
574         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
575         return -1;
576     }
577
578     if (s->codec_id == AV_CODEC_ID_RV10 &&
579         (avctx->width &15 ||
580          avctx->height&15 )) {
581         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
582         return AVERROR(EINVAL);
583     }
584
585     if (s->codec_id == AV_CODEC_ID_RV20 &&
586         (avctx->width &3 ||
587          avctx->height&3 )) {
588         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
589         return AVERROR(EINVAL);
590     }
591
592     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
593          s->codec_id == AV_CODEC_ID_WMV2) &&
594          avctx->width & 1) {
595          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
596          return -1;
597     }
598
599     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
600         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
601         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
602         return -1;
603     }
604
605     // FIXME mpeg2 uses that too
606     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
607                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
608         av_log(avctx, AV_LOG_ERROR,
609                "mpeg2 style quantization not supported by codec\n");
610         return -1;
611     }
612
613     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
614         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
615         return -1;
616     }
617
618     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
619         s->avctx->mb_decision != FF_MB_DECISION_RD) {
620         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
621         return -1;
622     }
623
624     if (s->avctx->scenechange_threshold < 1000000000 &&
625         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
626         av_log(avctx, AV_LOG_ERROR,
627                "closed gop with scene change detection are not supported yet, "
628                "set threshold to 1000000000\n");
629         return -1;
630     }
631
632     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
633         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
634             av_log(avctx, AV_LOG_ERROR,
635                   "low delay forcing is only available for mpeg2\n");
636             return -1;
637         }
638         if (s->max_b_frames != 0) {
639             av_log(avctx, AV_LOG_ERROR,
640                    "b frames cannot be used with low delay\n");
641             return -1;
642         }
643     }
644
645     if (s->q_scale_type == 1) {
646         if (avctx->qmax > 28) {
647             av_log(avctx, AV_LOG_ERROR,
648                    "non linear quant only supports qmax <= 28 currently\n");
649             return -1;
650         }
651     }
652
653     if (s->avctx->thread_count > 1         &&
654         s->codec_id != AV_CODEC_ID_MPEG4      &&
655         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
656         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
657         s->codec_id != AV_CODEC_ID_MJPEG      &&
658         (s->codec_id != AV_CODEC_ID_H263P)) {
659         av_log(avctx, AV_LOG_ERROR,
660                "multi threaded encoding not supported by codec\n");
661         return -1;
662     }
663
664     if (s->avctx->thread_count < 1) {
665         av_log(avctx, AV_LOG_ERROR,
666                "automatic thread number detection not supported by codec, "
667                "patch welcome\n");
668         return -1;
669     }
670
671     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
672         s->rtp_mode = 1;
673
674     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
675         s->h263_slice_structured = 1;
676
677     if (!avctx->time_base.den || !avctx->time_base.num) {
678         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
679         return -1;
680     }
681
682     if (avctx->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
683         av_log(avctx, AV_LOG_INFO,
684                "notice: b_frame_strategy only affects the first pass\n");
685         avctx->b_frame_strategy = 0;
686     }
687
688     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
689     if (i > 1) {
690         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
691         avctx->time_base.den /= i;
692         avctx->time_base.num /= i;
693         //return -1;
694     }
695
696     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
697         // (a + x * 3 / 8) / x
698         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
699         s->inter_quant_bias = 0;
700     } else {
701         s->intra_quant_bias = 0;
702         // (a - x / 4) / x
703         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
704     }
705
706     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
707         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
708         return AVERROR(EINVAL);
709     }
710
711 #if FF_API_QUANT_BIAS
712 FF_DISABLE_DEPRECATION_WARNINGS
713     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
714         s->intra_quant_bias = avctx->intra_quant_bias;
715     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
716         s->inter_quant_bias = avctx->inter_quant_bias;
717 FF_ENABLE_DEPRECATION_WARNINGS
718 #endif
719
720     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
721
722     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
723         s->avctx->time_base.den > (1 << 16) - 1) {
724         av_log(avctx, AV_LOG_ERROR,
725                "timebase %d/%d not supported by MPEG 4 standard, "
726                "the maximum admitted value for the timebase denominator "
727                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
728                (1 << 16) - 1);
729         return -1;
730     }
731     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
732
733     switch (avctx->codec->id) {
734     case AV_CODEC_ID_MPEG1VIDEO:
735         s->out_format = FMT_MPEG1;
736         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
737         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
738         break;
739     case AV_CODEC_ID_MPEG2VIDEO:
740         s->out_format = FMT_MPEG1;
741         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
742         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
743         s->rtp_mode   = 1;
744         break;
745     case AV_CODEC_ID_MJPEG:
746     case AV_CODEC_ID_AMV:
747         s->out_format = FMT_MJPEG;
748         s->intra_only = 1; /* force intra only for jpeg */
749         if (!CONFIG_MJPEG_ENCODER ||
750             ff_mjpeg_encode_init(s) < 0)
751             return -1;
752         avctx->delay = 0;
753         s->low_delay = 1;
754         break;
755     case AV_CODEC_ID_H261:
756         if (!CONFIG_H261_ENCODER)
757             return -1;
758         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
759             av_log(avctx, AV_LOG_ERROR,
760                    "The specified picture size of %dx%d is not valid for the "
761                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
762                     s->width, s->height);
763             return -1;
764         }
765         s->out_format = FMT_H261;
766         avctx->delay  = 0;
767         s->low_delay  = 1;
768         s->rtp_mode   = 0; /* Sliced encoding not supported */
769         break;
770     case AV_CODEC_ID_H263:
771         if (!CONFIG_H263_ENCODER)
772             return -1;
773         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
774                              s->width, s->height) == 8) {
775             av_log(avctx, AV_LOG_ERROR,
776                    "The specified picture size of %dx%d is not valid for "
777                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
778                    "352x288, 704x576, and 1408x1152. "
779                    "Try H.263+.\n", s->width, s->height);
780             return -1;
781         }
782         s->out_format = FMT_H263;
783         avctx->delay  = 0;
784         s->low_delay  = 1;
785         break;
786     case AV_CODEC_ID_H263P:
787         s->out_format = FMT_H263;
788         s->h263_plus  = 1;
789         /* Fx */
790         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
791         s->modified_quant  = s->h263_aic;
792         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
793         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
794
795         /* /Fx */
796         /* These are just to be sure */
797         avctx->delay = 0;
798         s->low_delay = 1;
799         break;
800     case AV_CODEC_ID_FLV1:
801         s->out_format      = FMT_H263;
802         s->h263_flv        = 2; /* format = 1; 11-bit codes */
803         s->unrestricted_mv = 1;
804         s->rtp_mode  = 0; /* don't allow GOB */
805         avctx->delay = 0;
806         s->low_delay = 1;
807         break;
808     case AV_CODEC_ID_RV10:
809         s->out_format = FMT_H263;
810         avctx->delay  = 0;
811         s->low_delay  = 1;
812         break;
813     case AV_CODEC_ID_RV20:
814         s->out_format      = FMT_H263;
815         avctx->delay       = 0;
816         s->low_delay       = 1;
817         s->modified_quant  = 1;
818         s->h263_aic        = 1;
819         s->h263_plus       = 1;
820         s->loop_filter     = 1;
821         s->unrestricted_mv = 0;
822         break;
823     case AV_CODEC_ID_MPEG4:
824         s->out_format      = FMT_H263;
825         s->h263_pred       = 1;
826         s->unrestricted_mv = 1;
827         s->low_delay       = s->max_b_frames ? 0 : 1;
828         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
829         break;
830     case AV_CODEC_ID_MSMPEG4V2:
831         s->out_format      = FMT_H263;
832         s->h263_pred       = 1;
833         s->unrestricted_mv = 1;
834         s->msmpeg4_version = 2;
835         avctx->delay       = 0;
836         s->low_delay       = 1;
837         break;
838     case AV_CODEC_ID_MSMPEG4V3:
839         s->out_format        = FMT_H263;
840         s->h263_pred         = 1;
841         s->unrestricted_mv   = 1;
842         s->msmpeg4_version   = 3;
843         s->flipflop_rounding = 1;
844         avctx->delay         = 0;
845         s->low_delay         = 1;
846         break;
847     case AV_CODEC_ID_WMV1:
848         s->out_format        = FMT_H263;
849         s->h263_pred         = 1;
850         s->unrestricted_mv   = 1;
851         s->msmpeg4_version   = 4;
852         s->flipflop_rounding = 1;
853         avctx->delay         = 0;
854         s->low_delay         = 1;
855         break;
856     case AV_CODEC_ID_WMV2:
857         s->out_format        = FMT_H263;
858         s->h263_pred         = 1;
859         s->unrestricted_mv   = 1;
860         s->msmpeg4_version   = 5;
861         s->flipflop_rounding = 1;
862         avctx->delay         = 0;
863         s->low_delay         = 1;
864         break;
865     default:
866         return -1;
867     }
868
869     avctx->has_b_frames = !s->low_delay;
870
871     s->encoding = 1;
872
873     s->progressive_frame    =
874     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
875                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
876                                 s->alternate_scan);
877
878     /* init */
879     ff_mpv_idct_init(s);
880     if (ff_mpv_common_init(s) < 0)
881         return -1;
882
883     ff_fdctdsp_init(&s->fdsp, avctx);
884     ff_me_cmp_init(&s->mecc, avctx);
885     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
886     ff_pixblockdsp_init(&s->pdsp, avctx);
887     ff_qpeldsp_init(&s->qdsp);
888
889     if (s->msmpeg4_version) {
890         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
891                           2 * 2 * (MAX_LEVEL + 1) *
892                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
893     }
894     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
895
896     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
897     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
898     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
899     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
900     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
901     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
902     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
903                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
904     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
905                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
906
907     if (s->avctx->noise_reduction) {
908         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
909                           2 * 64 * sizeof(uint16_t), fail);
910     }
911
912     ff_dct_encode_init(s);
913
914     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
915         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
916
917     s->quant_precision = 5;
918
919     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
920     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
921
922     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
923         ff_h261_encode_init(s);
924     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
925         ff_h263_encode_init(s);
926     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
927         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
928             return ret;
929     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
930         && s->out_format == FMT_MPEG1)
931         ff_mpeg1_encode_init(s);
932
933     /* init q matrix */
934     for (i = 0; i < 64; i++) {
935         int j = s->idsp.idct_permutation[i];
936         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
937             s->mpeg_quant) {
938             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
939             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
940         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
941             s->intra_matrix[j] =
942             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
943         } else {
944             /* mpeg1/2 */
945             s->chroma_intra_matrix[j] =
946             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
947             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
948         }
949         if (s->avctx->intra_matrix)
950             s->intra_matrix[j] = s->avctx->intra_matrix[i];
951         if (s->avctx->inter_matrix)
952             s->inter_matrix[j] = s->avctx->inter_matrix[i];
953     }
954
955     /* precompute matrix */
956     /* for mjpeg, we do include qscale in the matrix */
957     if (s->out_format != FMT_MJPEG) {
958         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
959                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
960                           31, 1);
961         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
962                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
963                           31, 0);
964     }
965
966     if (ff_rate_control_init(s) < 0)
967         return -1;
968
969 #if FF_API_ERROR_RATE
970     FF_DISABLE_DEPRECATION_WARNINGS
971     if (avctx->error_rate)
972         s->error_rate = avctx->error_rate;
973     FF_ENABLE_DEPRECATION_WARNINGS;
974 #endif
975
976 #if FF_API_NORMALIZE_AQP
977     FF_DISABLE_DEPRECATION_WARNINGS
978     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
979         s->mpv_flags |= FF_MPV_FLAG_NAQ;
980     FF_ENABLE_DEPRECATION_WARNINGS;
981 #endif
982
983 #if FF_API_MV0
984     FF_DISABLE_DEPRECATION_WARNINGS
985     if (avctx->flags & CODEC_FLAG_MV0)
986         s->mpv_flags |= FF_MPV_FLAG_MV0;
987     FF_ENABLE_DEPRECATION_WARNINGS
988 #endif
989
990 #if FF_API_MPV_OPT
991     FF_DISABLE_DEPRECATION_WARNINGS
992     if (avctx->rc_qsquish != 0.0)
993         s->rc_qsquish = avctx->rc_qsquish;
994     if (avctx->rc_qmod_amp != 0.0)
995         s->rc_qmod_amp = avctx->rc_qmod_amp;
996     if (avctx->rc_qmod_freq)
997         s->rc_qmod_freq = avctx->rc_qmod_freq;
998     if (avctx->rc_buffer_aggressivity != 1.0)
999         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
1000     if (avctx->rc_initial_cplx != 0.0)
1001         s->rc_initial_cplx = avctx->rc_initial_cplx;
1002     if (avctx->lmin)
1003         s->lmin = avctx->lmin;
1004     if (avctx->lmax)
1005         s->lmax = avctx->lmax;
1006
1007     if (avctx->rc_eq) {
1008         av_freep(&s->rc_eq);
1009         s->rc_eq = av_strdup(avctx->rc_eq);
1010         if (!s->rc_eq)
1011             return AVERROR(ENOMEM);
1012     }
1013     FF_ENABLE_DEPRECATION_WARNINGS
1014 #endif
1015
1016     if (avctx->b_frame_strategy == 2) {
1017         for (i = 0; i < s->max_b_frames + 2; i++) {
1018             s->tmp_frames[i] = av_frame_alloc();
1019             if (!s->tmp_frames[i])
1020                 return AVERROR(ENOMEM);
1021
1022             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
1023             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
1024             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
1025
1026             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
1027             if (ret < 0)
1028                 return ret;
1029         }
1030     }
1031
1032     return 0;
1033 fail:
1034     ff_mpv_encode_end(avctx);
1035     return AVERROR_UNKNOWN;
1036 }
1037
1038 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1039 {
1040     MpegEncContext *s = avctx->priv_data;
1041     int i;
1042
1043     ff_rate_control_uninit(s);
1044
1045     ff_mpv_common_end(s);
1046     if (CONFIG_MJPEG_ENCODER &&
1047         s->out_format == FMT_MJPEG)
1048         ff_mjpeg_encode_close(s);
1049
1050     av_freep(&avctx->extradata);
1051
1052     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1053         av_frame_free(&s->tmp_frames[i]);
1054
1055     ff_free_picture_tables(&s->new_picture);
1056     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1057
1058     av_freep(&s->avctx->stats_out);
1059     av_freep(&s->ac_stats);
1060
1061     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1062     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1063     s->q_chroma_intra_matrix=   NULL;
1064     s->q_chroma_intra_matrix16= NULL;
1065     av_freep(&s->q_intra_matrix);
1066     av_freep(&s->q_inter_matrix);
1067     av_freep(&s->q_intra_matrix16);
1068     av_freep(&s->q_inter_matrix16);
1069     av_freep(&s->input_picture);
1070     av_freep(&s->reordered_input_picture);
1071     av_freep(&s->dct_offset);
1072
1073     return 0;
1074 }
1075
1076 static int get_sae(uint8_t *src, int ref, int stride)
1077 {
1078     int x,y;
1079     int acc = 0;
1080
1081     for (y = 0; y < 16; y++) {
1082         for (x = 0; x < 16; x++) {
1083             acc += FFABS(src[x + y * stride] - ref);
1084         }
1085     }
1086
1087     return acc;
1088 }
1089
1090 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1091                            uint8_t *ref, int stride)
1092 {
1093     int x, y, w, h;
1094     int acc = 0;
1095
1096     w = s->width  & ~15;
1097     h = s->height & ~15;
1098
1099     for (y = 0; y < h; y += 16) {
1100         for (x = 0; x < w; x += 16) {
1101             int offset = x + y * stride;
1102             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1103                                       stride, 16);
1104             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1105             int sae  = get_sae(src + offset, mean, stride);
1106
1107             acc += sae + 500 < sad;
1108         }
1109     }
1110     return acc;
1111 }
1112
1113 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1114 {
1115     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1116                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1117                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1118                             &s->linesize, &s->uvlinesize);
1119 }
1120
1121 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1122 {
1123     Picture *pic = NULL;
1124     int64_t pts;
1125     int i, display_picture_number = 0, ret;
1126     int encoding_delay = s->max_b_frames ? s->max_b_frames
1127                                          : (s->low_delay ? 0 : 1);
1128     int flush_offset = 1;
1129     int direct = 1;
1130
1131     if (pic_arg) {
1132         pts = pic_arg->pts;
1133         display_picture_number = s->input_picture_number++;
1134
1135         if (pts != AV_NOPTS_VALUE) {
1136             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1137                 int64_t last = s->user_specified_pts;
1138
1139                 if (pts <= last) {
1140                     av_log(s->avctx, AV_LOG_ERROR,
1141                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1142                            pts, last);
1143                     return AVERROR(EINVAL);
1144                 }
1145
1146                 if (!s->low_delay && display_picture_number == 1)
1147                     s->dts_delta = pts - last;
1148             }
1149             s->user_specified_pts = pts;
1150         } else {
1151             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1152                 s->user_specified_pts =
1153                 pts = s->user_specified_pts + 1;
1154                 av_log(s->avctx, AV_LOG_INFO,
1155                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1156                        pts);
1157             } else {
1158                 pts = display_picture_number;
1159             }
1160         }
1161
1162         if (!pic_arg->buf[0] ||
1163             pic_arg->linesize[0] != s->linesize ||
1164             pic_arg->linesize[1] != s->uvlinesize ||
1165             pic_arg->linesize[2] != s->uvlinesize)
1166             direct = 0;
1167         if ((s->width & 15) || (s->height & 15))
1168             direct = 0;
1169         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1170             direct = 0;
1171         if (s->linesize & (STRIDE_ALIGN-1))
1172             direct = 0;
1173
1174         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1175                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1176
1177         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1178         if (i < 0)
1179             return i;
1180
1181         pic = &s->picture[i];
1182         pic->reference = 3;
1183
1184         if (direct) {
1185             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1186                 return ret;
1187         }
1188         ret = alloc_picture(s, pic, direct);
1189         if (ret < 0)
1190             return ret;
1191
1192         if (!direct) {
1193             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1194                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1195                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1196                 // empty
1197             } else {
1198                 int h_chroma_shift, v_chroma_shift;
1199                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1200                                                  &h_chroma_shift,
1201                                                  &v_chroma_shift);
1202
1203                 for (i = 0; i < 3; i++) {
1204                     int src_stride = pic_arg->linesize[i];
1205                     int dst_stride = i ? s->uvlinesize : s->linesize;
1206                     int h_shift = i ? h_chroma_shift : 0;
1207                     int v_shift = i ? v_chroma_shift : 0;
1208                     int w = s->width  >> h_shift;
1209                     int h = s->height >> v_shift;
1210                     uint8_t *src = pic_arg->data[i];
1211                     uint8_t *dst = pic->f->data[i];
1212                     int vpad = 16;
1213
1214                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1215                         && !s->progressive_sequence
1216                         && FFALIGN(s->height, 32) - s->height > 16)
1217                         vpad = 32;
1218
1219                     if (!s->avctx->rc_buffer_size)
1220                         dst += INPLACE_OFFSET;
1221
1222                     if (src_stride == dst_stride)
1223                         memcpy(dst, src, src_stride * h);
1224                     else {
1225                         int h2 = h;
1226                         uint8_t *dst2 = dst;
1227                         while (h2--) {
1228                             memcpy(dst2, src, w);
1229                             dst2 += dst_stride;
1230                             src += src_stride;
1231                         }
1232                     }
1233                     if ((s->width & 15) || (s->height & (vpad-1))) {
1234                         s->mpvencdsp.draw_edges(dst, dst_stride,
1235                                                 w, h,
1236                                                 16 >> h_shift,
1237                                                 vpad >> v_shift,
1238                                                 EDGE_BOTTOM);
1239                     }
1240                 }
1241             }
1242         }
1243         ret = av_frame_copy_props(pic->f, pic_arg);
1244         if (ret < 0)
1245             return ret;
1246
1247         pic->f->display_picture_number = display_picture_number;
1248         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1249     } else {
1250         /* Flushing: When we have not received enough input frames,
1251          * ensure s->input_picture[0] contains the first picture */
1252         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1253             if (s->input_picture[flush_offset])
1254                 break;
1255
1256         if (flush_offset <= 1)
1257             flush_offset = 1;
1258         else
1259             encoding_delay = encoding_delay - flush_offset + 1;
1260     }
1261
1262     /* shift buffer entries */
1263     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1264         s->input_picture[i - flush_offset] = s->input_picture[i];
1265
1266     s->input_picture[encoding_delay] = (Picture*) pic;
1267
1268     return 0;
1269 }
1270
1271 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1272 {
1273     int x, y, plane;
1274     int score = 0;
1275     int64_t score64 = 0;
1276
1277     for (plane = 0; plane < 3; plane++) {
1278         const int stride = p->f->linesize[plane];
1279         const int bw = plane ? 1 : 2;
1280         for (y = 0; y < s->mb_height * bw; y++) {
1281             for (x = 0; x < s->mb_width * bw; x++) {
1282                 int off = p->shared ? 0 : 16;
1283                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1284                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1285                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1286
1287                 switch (FFABS(s->avctx->frame_skip_exp)) {
1288                 case 0: score    =  FFMAX(score, v);          break;
1289                 case 1: score   += FFABS(v);                  break;
1290                 case 2: score64 += v * (int64_t)v;                       break;
1291                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1292                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1293                 }
1294             }
1295         }
1296     }
1297     emms_c();
1298
1299     if (score)
1300         score64 = score;
1301     if (s->avctx->frame_skip_exp < 0)
1302         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1303                       -1.0/s->avctx->frame_skip_exp);
1304
1305     if (score64 < s->avctx->frame_skip_threshold)
1306         return 1;
1307     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1308         return 1;
1309     return 0;
1310 }
1311
1312 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1313 {
1314     AVPacket pkt = { 0 };
1315     int ret, got_output;
1316
1317     av_init_packet(&pkt);
1318     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1319     if (ret < 0)
1320         return ret;
1321
1322     ret = pkt.size;
1323     av_packet_unref(&pkt);
1324     return ret;
1325 }
1326
1327 static int estimate_best_b_count(MpegEncContext *s)
1328 {
1329     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1330     AVCodecContext *c = avcodec_alloc_context3(NULL);
1331     const int scale = s->avctx->brd_scale;
1332     int i, j, out_size, p_lambda, b_lambda, lambda2;
1333     int64_t best_rd  = INT64_MAX;
1334     int best_b_count = -1;
1335
1336     if (!c)
1337         return AVERROR(ENOMEM);
1338     av_assert0(scale >= 0 && scale <= 3);
1339
1340     //emms_c();
1341     //s->next_picture_ptr->quality;
1342     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1343     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1344     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1345     if (!b_lambda) // FIXME we should do this somewhere else
1346         b_lambda = p_lambda;
1347     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1348                FF_LAMBDA_SHIFT;
1349
1350     c->width        = s->width  >> scale;
1351     c->height       = s->height >> scale;
1352     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1353     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1354     c->mb_decision  = s->avctx->mb_decision;
1355     c->me_cmp       = s->avctx->me_cmp;
1356     c->mb_cmp       = s->avctx->mb_cmp;
1357     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1358     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1359     c->time_base    = s->avctx->time_base;
1360     c->max_b_frames = s->max_b_frames;
1361
1362     if (avcodec_open2(c, codec, NULL) < 0)
1363         return -1;
1364
1365     for (i = 0; i < s->max_b_frames + 2; i++) {
1366         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1367                                                 s->next_picture_ptr;
1368         uint8_t *data[4];
1369
1370         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1371             pre_input = *pre_input_ptr;
1372             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1373
1374             if (!pre_input.shared && i) {
1375                 data[0] += INPLACE_OFFSET;
1376                 data[1] += INPLACE_OFFSET;
1377                 data[2] += INPLACE_OFFSET;
1378             }
1379
1380             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1381                                        s->tmp_frames[i]->linesize[0],
1382                                        data[0],
1383                                        pre_input.f->linesize[0],
1384                                        c->width, c->height);
1385             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1386                                        s->tmp_frames[i]->linesize[1],
1387                                        data[1],
1388                                        pre_input.f->linesize[1],
1389                                        c->width >> 1, c->height >> 1);
1390             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1391                                        s->tmp_frames[i]->linesize[2],
1392                                        data[2],
1393                                        pre_input.f->linesize[2],
1394                                        c->width >> 1, c->height >> 1);
1395         }
1396     }
1397
1398     for (j = 0; j < s->max_b_frames + 1; j++) {
1399         int64_t rd = 0;
1400
1401         if (!s->input_picture[j])
1402             break;
1403
1404         c->error[0] = c->error[1] = c->error[2] = 0;
1405
1406         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1407         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1408
1409         out_size = encode_frame(c, s->tmp_frames[0]);
1410
1411         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1412
1413         for (i = 0; i < s->max_b_frames + 1; i++) {
1414             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1415
1416             s->tmp_frames[i + 1]->pict_type = is_p ?
1417                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1418             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1419
1420             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1421
1422             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1423         }
1424
1425         /* get the delayed frames */
1426         while (out_size) {
1427             out_size = encode_frame(c, NULL);
1428             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1429         }
1430
1431         rd += c->error[0] + c->error[1] + c->error[2];
1432
1433         if (rd < best_rd) {
1434             best_rd = rd;
1435             best_b_count = j;
1436         }
1437     }
1438
1439     avcodec_close(c);
1440     av_freep(&c);
1441
1442     return best_b_count;
1443 }
1444
1445 static int select_input_picture(MpegEncContext *s)
1446 {
1447     int i, ret;
1448
1449     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1450         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1451     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1452
1453     /* set next picture type & ordering */
1454     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1455         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1456             if (s->picture_in_gop_number < s->gop_size &&
1457                 s->next_picture_ptr &&
1458                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1459                 // FIXME check that te gop check above is +-1 correct
1460                 av_frame_unref(s->input_picture[0]->f);
1461
1462                 ff_vbv_update(s, 0);
1463
1464                 goto no_output_pic;
1465             }
1466         }
1467
1468         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1469             !s->next_picture_ptr || s->intra_only) {
1470             s->reordered_input_picture[0] = s->input_picture[0];
1471             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1472             s->reordered_input_picture[0]->f->coded_picture_number =
1473                 s->coded_picture_number++;
1474         } else {
1475             int b_frames;
1476
1477             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1478                 for (i = 0; i < s->max_b_frames + 1; i++) {
1479                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1480
1481                     if (pict_num >= s->rc_context.num_entries)
1482                         break;
1483                     if (!s->input_picture[i]) {
1484                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1485                         break;
1486                     }
1487
1488                     s->input_picture[i]->f->pict_type =
1489                         s->rc_context.entry[pict_num].new_pict_type;
1490                 }
1491             }
1492
1493             if (s->avctx->b_frame_strategy == 0) {
1494                 b_frames = s->max_b_frames;
1495                 while (b_frames && !s->input_picture[b_frames])
1496                     b_frames--;
1497             } else if (s->avctx->b_frame_strategy == 1) {
1498                 for (i = 1; i < s->max_b_frames + 1; i++) {
1499                     if (s->input_picture[i] &&
1500                         s->input_picture[i]->b_frame_score == 0) {
1501                         s->input_picture[i]->b_frame_score =
1502                             get_intra_count(s,
1503                                             s->input_picture[i    ]->f->data[0],
1504                                             s->input_picture[i - 1]->f->data[0],
1505                                             s->linesize) + 1;
1506                     }
1507                 }
1508                 for (i = 0; i < s->max_b_frames + 1; i++) {
1509                     if (!s->input_picture[i] ||
1510                         s->input_picture[i]->b_frame_score - 1 >
1511                             s->mb_num / s->avctx->b_sensitivity)
1512                         break;
1513                 }
1514
1515                 b_frames = FFMAX(0, i - 1);
1516
1517                 /* reset scores */
1518                 for (i = 0; i < b_frames + 1; i++) {
1519                     s->input_picture[i]->b_frame_score = 0;
1520                 }
1521             } else if (s->avctx->b_frame_strategy == 2) {
1522                 b_frames = estimate_best_b_count(s);
1523             } else {
1524                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1525                 b_frames = 0;
1526             }
1527
1528             emms_c();
1529
1530             for (i = b_frames - 1; i >= 0; i--) {
1531                 int type = s->input_picture[i]->f->pict_type;
1532                 if (type && type != AV_PICTURE_TYPE_B)
1533                     b_frames = i;
1534             }
1535             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1536                 b_frames == s->max_b_frames) {
1537                 av_log(s->avctx, AV_LOG_ERROR,
1538                        "warning, too many b frames in a row\n");
1539             }
1540
1541             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1542                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1543                     s->gop_size > s->picture_in_gop_number) {
1544                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1545                 } else {
1546                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1547                         b_frames = 0;
1548                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1549                 }
1550             }
1551
1552             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1553                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1554                 b_frames--;
1555
1556             s->reordered_input_picture[0] = s->input_picture[b_frames];
1557             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1558                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1559             s->reordered_input_picture[0]->f->coded_picture_number =
1560                 s->coded_picture_number++;
1561             for (i = 0; i < b_frames; i++) {
1562                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1563                 s->reordered_input_picture[i + 1]->f->pict_type =
1564                     AV_PICTURE_TYPE_B;
1565                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1566                     s->coded_picture_number++;
1567             }
1568         }
1569     }
1570 no_output_pic:
1571     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1572
1573     if (s->reordered_input_picture[0]) {
1574         s->reordered_input_picture[0]->reference =
1575            s->reordered_input_picture[0]->f->pict_type !=
1576                AV_PICTURE_TYPE_B ? 3 : 0;
1577
1578         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1579             return ret;
1580
1581         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1582             // input is a shared pix, so we can't modifiy it -> alloc a new
1583             // one & ensure that the shared one is reuseable
1584
1585             Picture *pic;
1586             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1587             if (i < 0)
1588                 return i;
1589             pic = &s->picture[i];
1590
1591             pic->reference = s->reordered_input_picture[0]->reference;
1592             if (alloc_picture(s, pic, 0) < 0) {
1593                 return -1;
1594             }
1595
1596             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1597             if (ret < 0)
1598                 return ret;
1599
1600             /* mark us unused / free shared pic */
1601             av_frame_unref(s->reordered_input_picture[0]->f);
1602             s->reordered_input_picture[0]->shared = 0;
1603
1604             s->current_picture_ptr = pic;
1605         } else {
1606             // input is not a shared pix -> reuse buffer for current_pix
1607             s->current_picture_ptr = s->reordered_input_picture[0];
1608             for (i = 0; i < 4; i++) {
1609                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1610             }
1611         }
1612         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1613         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1614                                        s->current_picture_ptr)) < 0)
1615             return ret;
1616
1617         s->picture_number = s->new_picture.f->display_picture_number;
1618     }
1619     return 0;
1620 }
1621
1622 static void frame_end(MpegEncContext *s)
1623 {
1624     if (s->unrestricted_mv &&
1625         s->current_picture.reference &&
1626         !s->intra_only) {
1627         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1628         int hshift = desc->log2_chroma_w;
1629         int vshift = desc->log2_chroma_h;
1630         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1631                                 s->current_picture.f->linesize[0],
1632                                 s->h_edge_pos, s->v_edge_pos,
1633                                 EDGE_WIDTH, EDGE_WIDTH,
1634                                 EDGE_TOP | EDGE_BOTTOM);
1635         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1636                                 s->current_picture.f->linesize[1],
1637                                 s->h_edge_pos >> hshift,
1638                                 s->v_edge_pos >> vshift,
1639                                 EDGE_WIDTH >> hshift,
1640                                 EDGE_WIDTH >> vshift,
1641                                 EDGE_TOP | EDGE_BOTTOM);
1642         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1643                                 s->current_picture.f->linesize[2],
1644                                 s->h_edge_pos >> hshift,
1645                                 s->v_edge_pos >> vshift,
1646                                 EDGE_WIDTH >> hshift,
1647                                 EDGE_WIDTH >> vshift,
1648                                 EDGE_TOP | EDGE_BOTTOM);
1649     }
1650
1651     emms_c();
1652
1653     s->last_pict_type                 = s->pict_type;
1654     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1655     if (s->pict_type!= AV_PICTURE_TYPE_B)
1656         s->last_non_b_pict_type = s->pict_type;
1657
1658 #if FF_API_CODED_FRAME
1659 FF_DISABLE_DEPRECATION_WARNINGS
1660     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1661 FF_ENABLE_DEPRECATION_WARNINGS
1662 #endif
1663 #if FF_API_ERROR_FRAME
1664 FF_DISABLE_DEPRECATION_WARNINGS
1665     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1666            sizeof(s->current_picture.encoding_error));
1667 FF_ENABLE_DEPRECATION_WARNINGS
1668 #endif
1669 }
1670
1671 static void update_noise_reduction(MpegEncContext *s)
1672 {
1673     int intra, i;
1674
1675     for (intra = 0; intra < 2; intra++) {
1676         if (s->dct_count[intra] > (1 << 16)) {
1677             for (i = 0; i < 64; i++) {
1678                 s->dct_error_sum[intra][i] >>= 1;
1679             }
1680             s->dct_count[intra] >>= 1;
1681         }
1682
1683         for (i = 0; i < 64; i++) {
1684             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1685                                        s->dct_count[intra] +
1686                                        s->dct_error_sum[intra][i] / 2) /
1687                                       (s->dct_error_sum[intra][i] + 1);
1688         }
1689     }
1690 }
1691
1692 static int frame_start(MpegEncContext *s)
1693 {
1694     int ret;
1695
1696     /* mark & release old frames */
1697     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1698         s->last_picture_ptr != s->next_picture_ptr &&
1699         s->last_picture_ptr->f->buf[0]) {
1700         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1701     }
1702
1703     s->current_picture_ptr->f->pict_type = s->pict_type;
1704     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1705
1706     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1707     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1708                                    s->current_picture_ptr)) < 0)
1709         return ret;
1710
1711     if (s->pict_type != AV_PICTURE_TYPE_B) {
1712         s->last_picture_ptr = s->next_picture_ptr;
1713         if (!s->droppable)
1714             s->next_picture_ptr = s->current_picture_ptr;
1715     }
1716
1717     if (s->last_picture_ptr) {
1718         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1719         if (s->last_picture_ptr->f->buf[0] &&
1720             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1721                                        s->last_picture_ptr)) < 0)
1722             return ret;
1723     }
1724     if (s->next_picture_ptr) {
1725         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1726         if (s->next_picture_ptr->f->buf[0] &&
1727             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1728                                        s->next_picture_ptr)) < 0)
1729             return ret;
1730     }
1731
1732     if (s->picture_structure!= PICT_FRAME) {
1733         int i;
1734         for (i = 0; i < 4; i++) {
1735             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1736                 s->current_picture.f->data[i] +=
1737                     s->current_picture.f->linesize[i];
1738             }
1739             s->current_picture.f->linesize[i] *= 2;
1740             s->last_picture.f->linesize[i]    *= 2;
1741             s->next_picture.f->linesize[i]    *= 2;
1742         }
1743     }
1744
1745     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1746         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1747         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1748     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1749         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1750         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1751     } else {
1752         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1753         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1754     }
1755
1756     if (s->dct_error_sum) {
1757         av_assert2(s->avctx->noise_reduction && s->encoding);
1758         update_noise_reduction(s);
1759     }
1760
1761     return 0;
1762 }
1763
1764 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1765                           const AVFrame *pic_arg, int *got_packet)
1766 {
1767     MpegEncContext *s = avctx->priv_data;
1768     int i, stuffing_count, ret;
1769     int context_count = s->slice_context_count;
1770
1771     s->vbv_ignore_qmax = 0;
1772
1773     s->picture_in_gop_number++;
1774
1775     if (load_input_picture(s, pic_arg) < 0)
1776         return -1;
1777
1778     if (select_input_picture(s) < 0) {
1779         return -1;
1780     }
1781
1782     /* output? */
1783     if (s->new_picture.f->data[0]) {
1784         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1785         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1786                                               :
1787                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1788         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1789             return ret;
1790         if (s->mb_info) {
1791             s->mb_info_ptr = av_packet_new_side_data(pkt,
1792                                  AV_PKT_DATA_H263_MB_INFO,
1793                                  s->mb_width*s->mb_height*12);
1794             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1795         }
1796
1797         for (i = 0; i < context_count; i++) {
1798             int start_y = s->thread_context[i]->start_mb_y;
1799             int   end_y = s->thread_context[i]->  end_mb_y;
1800             int h       = s->mb_height;
1801             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1802             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1803
1804             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1805         }
1806
1807         s->pict_type = s->new_picture.f->pict_type;
1808         //emms_c();
1809         ret = frame_start(s);
1810         if (ret < 0)
1811             return ret;
1812 vbv_retry:
1813         ret = encode_picture(s, s->picture_number);
1814         if (growing_buffer) {
1815             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1816             pkt->data = s->pb.buf;
1817             pkt->size = avctx->internal->byte_buffer_size;
1818         }
1819         if (ret < 0)
1820             return -1;
1821
1822         avctx->header_bits = s->header_bits;
1823         avctx->mv_bits     = s->mv_bits;
1824         avctx->misc_bits   = s->misc_bits;
1825         avctx->i_tex_bits  = s->i_tex_bits;
1826         avctx->p_tex_bits  = s->p_tex_bits;
1827         avctx->i_count     = s->i_count;
1828         // FIXME f/b_count in avctx
1829         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1830         avctx->skip_count  = s->skip_count;
1831
1832         frame_end(s);
1833
1834         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1835             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1836
1837         if (avctx->rc_buffer_size) {
1838             RateControlContext *rcc = &s->rc_context;
1839             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1840             int hq = (s->avctx->mb_decision == FF_MB_DECISION_RD || s->avctx->trellis);
1841             int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
1842
1843             if (put_bits_count(&s->pb) > max_size &&
1844                 s->lambda < s->lmax) {
1845                 s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
1846                                        (s->qscale + 1) / s->qscale);
1847                 if (s->adaptive_quant) {
1848                     int i;
1849                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1850                         s->lambda_table[i] =
1851                             FFMAX(s->lambda_table[i] + min_step,
1852                                   s->lambda_table[i] * (s->qscale + 1) /
1853                                   s->qscale);
1854                 }
1855                 s->mb_skipped = 0;        // done in frame_start()
1856                 // done in encode_picture() so we must undo it
1857                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1858                     if (s->flipflop_rounding          ||
1859                         s->codec_id == AV_CODEC_ID_H263P ||
1860                         s->codec_id == AV_CODEC_ID_MPEG4)
1861                         s->no_rounding ^= 1;
1862                 }
1863                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1864                     s->time_base       = s->last_time_base;
1865                     s->last_non_b_time = s->time - s->pp_time;
1866                 }
1867                 for (i = 0; i < context_count; i++) {
1868                     PutBitContext *pb = &s->thread_context[i]->pb;
1869                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1870                 }
1871                 s->vbv_ignore_qmax = 1;
1872                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1873                 goto vbv_retry;
1874             }
1875
1876             av_assert0(s->avctx->rc_max_rate);
1877         }
1878
1879         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1880             ff_write_pass1_stats(s);
1881
1882         for (i = 0; i < 4; i++) {
1883             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1884             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1885         }
1886         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1887                                        s->current_picture_ptr->encoding_error,
1888                                        (s->avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1889                                        s->pict_type);
1890
1891         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1892             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1893                    avctx->i_tex_bits + avctx->p_tex_bits ==
1894                        put_bits_count(&s->pb));
1895         flush_put_bits(&s->pb);
1896         s->frame_bits  = put_bits_count(&s->pb);
1897
1898         stuffing_count = ff_vbv_update(s, s->frame_bits);
1899         s->stuffing_bits = 8*stuffing_count;
1900         if (stuffing_count) {
1901             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1902                     stuffing_count + 50) {
1903                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1904                 return -1;
1905             }
1906
1907             switch (s->codec_id) {
1908             case AV_CODEC_ID_MPEG1VIDEO:
1909             case AV_CODEC_ID_MPEG2VIDEO:
1910                 while (stuffing_count--) {
1911                     put_bits(&s->pb, 8, 0);
1912                 }
1913             break;
1914             case AV_CODEC_ID_MPEG4:
1915                 put_bits(&s->pb, 16, 0);
1916                 put_bits(&s->pb, 16, 0x1C3);
1917                 stuffing_count -= 4;
1918                 while (stuffing_count--) {
1919                     put_bits(&s->pb, 8, 0xFF);
1920                 }
1921             break;
1922             default:
1923                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1924             }
1925             flush_put_bits(&s->pb);
1926             s->frame_bits  = put_bits_count(&s->pb);
1927         }
1928
1929         /* update mpeg1/2 vbv_delay for CBR */
1930         if (s->avctx->rc_max_rate                          &&
1931             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1932             s->out_format == FMT_MPEG1                     &&
1933             90000LL * (avctx->rc_buffer_size - 1) <=
1934                 s->avctx->rc_max_rate * 0xFFFFLL) {
1935             int vbv_delay, min_delay;
1936             double inbits  = s->avctx->rc_max_rate *
1937                              av_q2d(s->avctx->time_base);
1938             int    minbits = s->frame_bits - 8 *
1939                              (s->vbv_delay_ptr - s->pb.buf - 1);
1940             double bits    = s->rc_context.buffer_index + minbits - inbits;
1941
1942             if (bits < 0)
1943                 av_log(s->avctx, AV_LOG_ERROR,
1944                        "Internal error, negative bits\n");
1945
1946             assert(s->repeat_first_field == 0);
1947
1948             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1949             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1950                         s->avctx->rc_max_rate;
1951
1952             vbv_delay = FFMAX(vbv_delay, min_delay);
1953
1954             av_assert0(vbv_delay < 0xFFFF);
1955
1956             s->vbv_delay_ptr[0] &= 0xF8;
1957             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1958             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1959             s->vbv_delay_ptr[2] &= 0x07;
1960             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1961             avctx->vbv_delay     = vbv_delay * 300;
1962         }
1963         s->total_bits     += s->frame_bits;
1964         avctx->frame_bits  = s->frame_bits;
1965
1966         pkt->pts = s->current_picture.f->pts;
1967         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1968             if (!s->current_picture.f->coded_picture_number)
1969                 pkt->dts = pkt->pts - s->dts_delta;
1970             else
1971                 pkt->dts = s->reordered_pts;
1972             s->reordered_pts = pkt->pts;
1973         } else
1974             pkt->dts = pkt->pts;
1975         if (s->current_picture.f->key_frame)
1976             pkt->flags |= AV_PKT_FLAG_KEY;
1977         if (s->mb_info)
1978             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1979     } else {
1980         s->frame_bits = 0;
1981     }
1982
1983     /* release non-reference frames */
1984     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1985         if (!s->picture[i].reference)
1986             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1987     }
1988
1989     av_assert1((s->frame_bits & 7) == 0);
1990
1991     pkt->size = s->frame_bits / 8;
1992     *got_packet = !!pkt->size;
1993     return 0;
1994 }
1995
1996 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1997                                                 int n, int threshold)
1998 {
1999     static const char tab[64] = {
2000         3, 2, 2, 1, 1, 1, 1, 1,
2001         1, 1, 1, 1, 1, 1, 1, 1,
2002         1, 1, 1, 1, 1, 1, 1, 1,
2003         0, 0, 0, 0, 0, 0, 0, 0,
2004         0, 0, 0, 0, 0, 0, 0, 0,
2005         0, 0, 0, 0, 0, 0, 0, 0,
2006         0, 0, 0, 0, 0, 0, 0, 0,
2007         0, 0, 0, 0, 0, 0, 0, 0
2008     };
2009     int score = 0;
2010     int run = 0;
2011     int i;
2012     int16_t *block = s->block[n];
2013     const int last_index = s->block_last_index[n];
2014     int skip_dc;
2015
2016     if (threshold < 0) {
2017         skip_dc = 0;
2018         threshold = -threshold;
2019     } else
2020         skip_dc = 1;
2021
2022     /* Are all we could set to zero already zero? */
2023     if (last_index <= skip_dc - 1)
2024         return;
2025
2026     for (i = 0; i <= last_index; i++) {
2027         const int j = s->intra_scantable.permutated[i];
2028         const int level = FFABS(block[j]);
2029         if (level == 1) {
2030             if (skip_dc && i == 0)
2031                 continue;
2032             score += tab[run];
2033             run = 0;
2034         } else if (level > 1) {
2035             return;
2036         } else {
2037             run++;
2038         }
2039     }
2040     if (score >= threshold)
2041         return;
2042     for (i = skip_dc; i <= last_index; i++) {
2043         const int j = s->intra_scantable.permutated[i];
2044         block[j] = 0;
2045     }
2046     if (block[0])
2047         s->block_last_index[n] = 0;
2048     else
2049         s->block_last_index[n] = -1;
2050 }
2051
2052 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2053                                int last_index)
2054 {
2055     int i;
2056     const int maxlevel = s->max_qcoeff;
2057     const int minlevel = s->min_qcoeff;
2058     int overflow = 0;
2059
2060     if (s->mb_intra) {
2061         i = 1; // skip clipping of intra dc
2062     } else
2063         i = 0;
2064
2065     for (; i <= last_index; i++) {
2066         const int j = s->intra_scantable.permutated[i];
2067         int level = block[j];
2068
2069         if (level > maxlevel) {
2070             level = maxlevel;
2071             overflow++;
2072         } else if (level < minlevel) {
2073             level = minlevel;
2074             overflow++;
2075         }
2076
2077         block[j] = level;
2078     }
2079
2080     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2081         av_log(s->avctx, AV_LOG_INFO,
2082                "warning, clipping %d dct coefficients to %d..%d\n",
2083                overflow, minlevel, maxlevel);
2084 }
2085
2086 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2087 {
2088     int x, y;
2089     // FIXME optimize
2090     for (y = 0; y < 8; y++) {
2091         for (x = 0; x < 8; x++) {
2092             int x2, y2;
2093             int sum = 0;
2094             int sqr = 0;
2095             int count = 0;
2096
2097             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2098                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2099                     int v = ptr[x2 + y2 * stride];
2100                     sum += v;
2101                     sqr += v * v;
2102                     count++;
2103                 }
2104             }
2105             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2106         }
2107     }
2108 }
2109
2110 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2111                                                 int motion_x, int motion_y,
2112                                                 int mb_block_height,
2113                                                 int mb_block_width,
2114                                                 int mb_block_count)
2115 {
2116     int16_t weight[12][64];
2117     int16_t orig[12][64];
2118     const int mb_x = s->mb_x;
2119     const int mb_y = s->mb_y;
2120     int i;
2121     int skip_dct[12];
2122     int dct_offset = s->linesize * 8; // default for progressive frames
2123     int uv_dct_offset = s->uvlinesize * 8;
2124     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2125     ptrdiff_t wrap_y, wrap_c;
2126
2127     for (i = 0; i < mb_block_count; i++)
2128         skip_dct[i] = s->skipdct;
2129
2130     if (s->adaptive_quant) {
2131         const int last_qp = s->qscale;
2132         const int mb_xy = mb_x + mb_y * s->mb_stride;
2133
2134         s->lambda = s->lambda_table[mb_xy];
2135         update_qscale(s);
2136
2137         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2138             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2139             s->dquant = s->qscale - last_qp;
2140
2141             if (s->out_format == FMT_H263) {
2142                 s->dquant = av_clip(s->dquant, -2, 2);
2143
2144                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2145                     if (!s->mb_intra) {
2146                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2147                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2148                                 s->dquant = 0;
2149                         }
2150                         if (s->mv_type == MV_TYPE_8X8)
2151                             s->dquant = 0;
2152                     }
2153                 }
2154             }
2155         }
2156         ff_set_qscale(s, last_qp + s->dquant);
2157     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2158         ff_set_qscale(s, s->qscale + s->dquant);
2159
2160     wrap_y = s->linesize;
2161     wrap_c = s->uvlinesize;
2162     ptr_y  = s->new_picture.f->data[0] +
2163              (mb_y * 16 * wrap_y)              + mb_x * 16;
2164     ptr_cb = s->new_picture.f->data[1] +
2165              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2166     ptr_cr = s->new_picture.f->data[2] +
2167              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2168
2169     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2170         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2171         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2172         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2173         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2174                                  wrap_y, wrap_y,
2175                                  16, 16, mb_x * 16, mb_y * 16,
2176                                  s->width, s->height);
2177         ptr_y = ebuf;
2178         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2179                                  wrap_c, wrap_c,
2180                                  mb_block_width, mb_block_height,
2181                                  mb_x * mb_block_width, mb_y * mb_block_height,
2182                                  cw, ch);
2183         ptr_cb = ebuf + 16 * wrap_y;
2184         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2185                                  wrap_c, wrap_c,
2186                                  mb_block_width, mb_block_height,
2187                                  mb_x * mb_block_width, mb_y * mb_block_height,
2188                                  cw, ch);
2189         ptr_cr = ebuf + 16 * wrap_y + 16;
2190     }
2191
2192     if (s->mb_intra) {
2193         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2194             int progressive_score, interlaced_score;
2195
2196             s->interlaced_dct = 0;
2197             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2198                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2199                                                      NULL, wrap_y, 8) - 400;
2200
2201             if (progressive_score > 0) {
2202                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2203                                                         NULL, wrap_y * 2, 8) +
2204                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2205                                                         NULL, wrap_y * 2, 8);
2206                 if (progressive_score > interlaced_score) {
2207                     s->interlaced_dct = 1;
2208
2209                     dct_offset = wrap_y;
2210                     uv_dct_offset = wrap_c;
2211                     wrap_y <<= 1;
2212                     if (s->chroma_format == CHROMA_422 ||
2213                         s->chroma_format == CHROMA_444)
2214                         wrap_c <<= 1;
2215                 }
2216             }
2217         }
2218
2219         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2220         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2221         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2222         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2223
2224         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2225             skip_dct[4] = 1;
2226             skip_dct[5] = 1;
2227         } else {
2228             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2229             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2230             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2231                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2232                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2233             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2234                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2235                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2236                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2237                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2238                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2239                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2240             }
2241         }
2242     } else {
2243         op_pixels_func (*op_pix)[4];
2244         qpel_mc_func (*op_qpix)[16];
2245         uint8_t *dest_y, *dest_cb, *dest_cr;
2246
2247         dest_y  = s->dest[0];
2248         dest_cb = s->dest[1];
2249         dest_cr = s->dest[2];
2250
2251         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2252             op_pix  = s->hdsp.put_pixels_tab;
2253             op_qpix = s->qdsp.put_qpel_pixels_tab;
2254         } else {
2255             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2256             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2257         }
2258
2259         if (s->mv_dir & MV_DIR_FORWARD) {
2260             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2261                           s->last_picture.f->data,
2262                           op_pix, op_qpix);
2263             op_pix  = s->hdsp.avg_pixels_tab;
2264             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2265         }
2266         if (s->mv_dir & MV_DIR_BACKWARD) {
2267             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2268                           s->next_picture.f->data,
2269                           op_pix, op_qpix);
2270         }
2271
2272         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2273             int progressive_score, interlaced_score;
2274
2275             s->interlaced_dct = 0;
2276             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2277                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2278                                                      ptr_y + wrap_y * 8,
2279                                                      wrap_y, 8) - 400;
2280
2281             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2282                 progressive_score -= 400;
2283
2284             if (progressive_score > 0) {
2285                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2286                                                         wrap_y * 2, 8) +
2287                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2288                                                         ptr_y + wrap_y,
2289                                                         wrap_y * 2, 8);
2290
2291                 if (progressive_score > interlaced_score) {
2292                     s->interlaced_dct = 1;
2293
2294                     dct_offset = wrap_y;
2295                     uv_dct_offset = wrap_c;
2296                     wrap_y <<= 1;
2297                     if (s->chroma_format == CHROMA_422)
2298                         wrap_c <<= 1;
2299                 }
2300             }
2301         }
2302
2303         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2304         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2305         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2306                             dest_y + dct_offset, wrap_y);
2307         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2308                             dest_y + dct_offset + 8, wrap_y);
2309
2310         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2311             skip_dct[4] = 1;
2312             skip_dct[5] = 1;
2313         } else {
2314             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2315             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2316             if (!s->chroma_y_shift) { /* 422 */
2317                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2318                                     dest_cb + uv_dct_offset, wrap_c);
2319                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2320                                     dest_cr + uv_dct_offset, wrap_c);
2321             }
2322         }
2323         /* pre quantization */
2324         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2325                 2 * s->qscale * s->qscale) {
2326             // FIXME optimize
2327             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2328                 skip_dct[0] = 1;
2329             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2330                 skip_dct[1] = 1;
2331             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2332                                wrap_y, 8) < 20 * s->qscale)
2333                 skip_dct[2] = 1;
2334             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2335                                wrap_y, 8) < 20 * s->qscale)
2336                 skip_dct[3] = 1;
2337             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2338                 skip_dct[4] = 1;
2339             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2340                 skip_dct[5] = 1;
2341             if (!s->chroma_y_shift) { /* 422 */
2342                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2343                                    dest_cb + uv_dct_offset,
2344                                    wrap_c, 8) < 20 * s->qscale)
2345                     skip_dct[6] = 1;
2346                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2347                                    dest_cr + uv_dct_offset,
2348                                    wrap_c, 8) < 20 * s->qscale)
2349                     skip_dct[7] = 1;
2350             }
2351         }
2352     }
2353
2354     if (s->quantizer_noise_shaping) {
2355         if (!skip_dct[0])
2356             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2357         if (!skip_dct[1])
2358             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2359         if (!skip_dct[2])
2360             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2361         if (!skip_dct[3])
2362             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2363         if (!skip_dct[4])
2364             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2365         if (!skip_dct[5])
2366             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2367         if (!s->chroma_y_shift) { /* 422 */
2368             if (!skip_dct[6])
2369                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2370                                   wrap_c);
2371             if (!skip_dct[7])
2372                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2373                                   wrap_c);
2374         }
2375         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2376     }
2377
2378     /* DCT & quantize */
2379     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2380     {
2381         for (i = 0; i < mb_block_count; i++) {
2382             if (!skip_dct[i]) {
2383                 int overflow;
2384                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2385                 // FIXME we could decide to change to quantizer instead of
2386                 // clipping
2387                 // JS: I don't think that would be a good idea it could lower
2388                 //     quality instead of improve it. Just INTRADC clipping
2389                 //     deserves changes in quantizer
2390                 if (overflow)
2391                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2392             } else
2393                 s->block_last_index[i] = -1;
2394         }
2395         if (s->quantizer_noise_shaping) {
2396             for (i = 0; i < mb_block_count; i++) {
2397                 if (!skip_dct[i]) {
2398                     s->block_last_index[i] =
2399                         dct_quantize_refine(s, s->block[i], weight[i],
2400                                             orig[i], i, s->qscale);
2401                 }
2402             }
2403         }
2404
2405         if (s->luma_elim_threshold && !s->mb_intra)
2406             for (i = 0; i < 4; i++)
2407                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2408         if (s->chroma_elim_threshold && !s->mb_intra)
2409             for (i = 4; i < mb_block_count; i++)
2410                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2411
2412         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2413             for (i = 0; i < mb_block_count; i++) {
2414                 if (s->block_last_index[i] == -1)
2415                     s->coded_score[i] = INT_MAX / 256;
2416             }
2417         }
2418     }
2419
2420     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2421         s->block_last_index[4] =
2422         s->block_last_index[5] = 0;
2423         s->block[4][0] =
2424         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2425         if (!s->chroma_y_shift) { /* 422 / 444 */
2426             for (i=6; i<12; i++) {
2427                 s->block_last_index[i] = 0;
2428                 s->block[i][0] = s->block[4][0];
2429             }
2430         }
2431     }
2432
2433     // non c quantize code returns incorrect block_last_index FIXME
2434     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2435         for (i = 0; i < mb_block_count; i++) {
2436             int j;
2437             if (s->block_last_index[i] > 0) {
2438                 for (j = 63; j > 0; j--) {
2439                     if (s->block[i][s->intra_scantable.permutated[j]])
2440                         break;
2441                 }
2442                 s->block_last_index[i] = j;
2443             }
2444         }
2445     }
2446
2447     /* huffman encode */
2448     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2449     case AV_CODEC_ID_MPEG1VIDEO:
2450     case AV_CODEC_ID_MPEG2VIDEO:
2451         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2452             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2453         break;
2454     case AV_CODEC_ID_MPEG4:
2455         if (CONFIG_MPEG4_ENCODER)
2456             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2457         break;
2458     case AV_CODEC_ID_MSMPEG4V2:
2459     case AV_CODEC_ID_MSMPEG4V3:
2460     case AV_CODEC_ID_WMV1:
2461         if (CONFIG_MSMPEG4_ENCODER)
2462             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2463         break;
2464     case AV_CODEC_ID_WMV2:
2465         if (CONFIG_WMV2_ENCODER)
2466             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2467         break;
2468     case AV_CODEC_ID_H261:
2469         if (CONFIG_H261_ENCODER)
2470             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2471         break;
2472     case AV_CODEC_ID_H263:
2473     case AV_CODEC_ID_H263P:
2474     case AV_CODEC_ID_FLV1:
2475     case AV_CODEC_ID_RV10:
2476     case AV_CODEC_ID_RV20:
2477         if (CONFIG_H263_ENCODER)
2478             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2479         break;
2480     case AV_CODEC_ID_MJPEG:
2481     case AV_CODEC_ID_AMV:
2482         if (CONFIG_MJPEG_ENCODER)
2483             ff_mjpeg_encode_mb(s, s->block);
2484         break;
2485     default:
2486         av_assert1(0);
2487     }
2488 }
2489
2490 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2491 {
2492     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2493     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2494     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2495 }
2496
2497 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2498     int i;
2499
2500     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2501
2502     /* mpeg1 */
2503     d->mb_skip_run= s->mb_skip_run;
2504     for(i=0; i<3; i++)
2505         d->last_dc[i] = s->last_dc[i];
2506
2507     /* statistics */
2508     d->mv_bits= s->mv_bits;
2509     d->i_tex_bits= s->i_tex_bits;
2510     d->p_tex_bits= s->p_tex_bits;
2511     d->i_count= s->i_count;
2512     d->f_count= s->f_count;
2513     d->b_count= s->b_count;
2514     d->skip_count= s->skip_count;
2515     d->misc_bits= s->misc_bits;
2516     d->last_bits= 0;
2517
2518     d->mb_skipped= 0;
2519     d->qscale= s->qscale;
2520     d->dquant= s->dquant;
2521
2522     d->esc3_level_length= s->esc3_level_length;
2523 }
2524
2525 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2526     int i;
2527
2528     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2529     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2530
2531     /* mpeg1 */
2532     d->mb_skip_run= s->mb_skip_run;
2533     for(i=0; i<3; i++)
2534         d->last_dc[i] = s->last_dc[i];
2535
2536     /* statistics */
2537     d->mv_bits= s->mv_bits;
2538     d->i_tex_bits= s->i_tex_bits;
2539     d->p_tex_bits= s->p_tex_bits;
2540     d->i_count= s->i_count;
2541     d->f_count= s->f_count;
2542     d->b_count= s->b_count;
2543     d->skip_count= s->skip_count;
2544     d->misc_bits= s->misc_bits;
2545
2546     d->mb_intra= s->mb_intra;
2547     d->mb_skipped= s->mb_skipped;
2548     d->mv_type= s->mv_type;
2549     d->mv_dir= s->mv_dir;
2550     d->pb= s->pb;
2551     if(s->data_partitioning){
2552         d->pb2= s->pb2;
2553         d->tex_pb= s->tex_pb;
2554     }
2555     d->block= s->block;
2556     for(i=0; i<8; i++)
2557         d->block_last_index[i]= s->block_last_index[i];
2558     d->interlaced_dct= s->interlaced_dct;
2559     d->qscale= s->qscale;
2560
2561     d->esc3_level_length= s->esc3_level_length;
2562 }
2563
2564 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2565                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2566                            int *dmin, int *next_block, int motion_x, int motion_y)
2567 {
2568     int score;
2569     uint8_t *dest_backup[3];
2570
2571     copy_context_before_encode(s, backup, type);
2572
2573     s->block= s->blocks[*next_block];
2574     s->pb= pb[*next_block];
2575     if(s->data_partitioning){
2576         s->pb2   = pb2   [*next_block];
2577         s->tex_pb= tex_pb[*next_block];
2578     }
2579
2580     if(*next_block){
2581         memcpy(dest_backup, s->dest, sizeof(s->dest));
2582         s->dest[0] = s->sc.rd_scratchpad;
2583         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2584         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2585         av_assert0(s->linesize >= 32); //FIXME
2586     }
2587
2588     encode_mb(s, motion_x, motion_y);
2589
2590     score= put_bits_count(&s->pb);
2591     if(s->data_partitioning){
2592         score+= put_bits_count(&s->pb2);
2593         score+= put_bits_count(&s->tex_pb);
2594     }
2595
2596     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2597         ff_mpv_decode_mb(s, s->block);
2598
2599         score *= s->lambda2;
2600         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2601     }
2602
2603     if(*next_block){
2604         memcpy(s->dest, dest_backup, sizeof(s->dest));
2605     }
2606
2607     if(score<*dmin){
2608         *dmin= score;
2609         *next_block^=1;
2610
2611         copy_context_after_encode(best, s, type);
2612     }
2613 }
2614
2615 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2616     uint32_t *sq = ff_square_tab + 256;
2617     int acc=0;
2618     int x,y;
2619
2620     if(w==16 && h==16)
2621         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2622     else if(w==8 && h==8)
2623         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2624
2625     for(y=0; y<h; y++){
2626         for(x=0; x<w; x++){
2627             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2628         }
2629     }
2630
2631     av_assert2(acc>=0);
2632
2633     return acc;
2634 }
2635
2636 static int sse_mb(MpegEncContext *s){
2637     int w= 16;
2638     int h= 16;
2639
2640     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2641     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2642
2643     if(w==16 && h==16)
2644       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2645         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2646                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2647                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2648       }else{
2649         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2650                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2651                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2652       }
2653     else
2654         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2655                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2656                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2657 }
2658
2659 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2660     MpegEncContext *s= *(void**)arg;
2661
2662
2663     s->me.pre_pass=1;
2664     s->me.dia_size= s->avctx->pre_dia_size;
2665     s->first_slice_line=1;
2666     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2667         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2668             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2669         }
2670         s->first_slice_line=0;
2671     }
2672
2673     s->me.pre_pass=0;
2674
2675     return 0;
2676 }
2677
2678 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2679     MpegEncContext *s= *(void**)arg;
2680
2681     ff_check_alignment();
2682
2683     s->me.dia_size= s->avctx->dia_size;
2684     s->first_slice_line=1;
2685     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2686         s->mb_x=0; //for block init below
2687         ff_init_block_index(s);
2688         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2689             s->block_index[0]+=2;
2690             s->block_index[1]+=2;
2691             s->block_index[2]+=2;
2692             s->block_index[3]+=2;
2693
2694             /* compute motion vector & mb_type and store in context */
2695             if(s->pict_type==AV_PICTURE_TYPE_B)
2696                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2697             else
2698                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2699         }
2700         s->first_slice_line=0;
2701     }
2702     return 0;
2703 }
2704
2705 static int mb_var_thread(AVCodecContext *c, void *arg){
2706     MpegEncContext *s= *(void**)arg;
2707     int mb_x, mb_y;
2708
2709     ff_check_alignment();
2710
2711     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2712         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2713             int xx = mb_x * 16;
2714             int yy = mb_y * 16;
2715             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2716             int varc;
2717             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2718
2719             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2720                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2721
2722             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2723             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2724             s->me.mb_var_sum_temp    += varc;
2725         }
2726     }
2727     return 0;
2728 }
2729
2730 static void write_slice_end(MpegEncContext *s){
2731     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2732         if(s->partitioned_frame){
2733             ff_mpeg4_merge_partitions(s);
2734         }
2735
2736         ff_mpeg4_stuffing(&s->pb);
2737     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2738         ff_mjpeg_encode_stuffing(s);
2739     }
2740
2741     avpriv_align_put_bits(&s->pb);
2742     flush_put_bits(&s->pb);
2743
2744     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2745         s->misc_bits+= get_bits_diff(s);
2746 }
2747
2748 static void write_mb_info(MpegEncContext *s)
2749 {
2750     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2751     int offset = put_bits_count(&s->pb);
2752     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2753     int gobn = s->mb_y / s->gob_index;
2754     int pred_x, pred_y;
2755     if (CONFIG_H263_ENCODER)
2756         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2757     bytestream_put_le32(&ptr, offset);
2758     bytestream_put_byte(&ptr, s->qscale);
2759     bytestream_put_byte(&ptr, gobn);
2760     bytestream_put_le16(&ptr, mba);
2761     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2762     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2763     /* 4MV not implemented */
2764     bytestream_put_byte(&ptr, 0); /* hmv2 */
2765     bytestream_put_byte(&ptr, 0); /* vmv2 */
2766 }
2767
2768 static void update_mb_info(MpegEncContext *s, int startcode)
2769 {
2770     if (!s->mb_info)
2771         return;
2772     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2773         s->mb_info_size += 12;
2774         s->prev_mb_info = s->last_mb_info;
2775     }
2776     if (startcode) {
2777         s->prev_mb_info = put_bits_count(&s->pb)/8;
2778         /* This might have incremented mb_info_size above, and we return without
2779          * actually writing any info into that slot yet. But in that case,
2780          * this will be called again at the start of the after writing the
2781          * start code, actually writing the mb info. */
2782         return;
2783     }
2784
2785     s->last_mb_info = put_bits_count(&s->pb)/8;
2786     if (!s->mb_info_size)
2787         s->mb_info_size += 12;
2788     write_mb_info(s);
2789 }
2790
2791 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2792 {
2793     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2794         && s->slice_context_count == 1
2795         && s->pb.buf == s->avctx->internal->byte_buffer) {
2796         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2797         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2798
2799         uint8_t *new_buffer = NULL;
2800         int new_buffer_size = 0;
2801
2802         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2803                               s->avctx->internal->byte_buffer_size + size_increase);
2804         if (!new_buffer)
2805             return AVERROR(ENOMEM);
2806
2807         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2808         av_free(s->avctx->internal->byte_buffer);
2809         s->avctx->internal->byte_buffer      = new_buffer;
2810         s->avctx->internal->byte_buffer_size = new_buffer_size;
2811         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2812         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2813         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2814     }
2815     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2816         return AVERROR(EINVAL);
2817     return 0;
2818 }
2819
2820 static int encode_thread(AVCodecContext *c, void *arg){
2821     MpegEncContext *s= *(void**)arg;
2822     int mb_x, mb_y, pdif = 0;
2823     int chr_h= 16>>s->chroma_y_shift;
2824     int i, j;
2825     MpegEncContext best_s = { 0 }, backup_s;
2826     uint8_t bit_buf[2][MAX_MB_BYTES];
2827     uint8_t bit_buf2[2][MAX_MB_BYTES];
2828     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2829     PutBitContext pb[2], pb2[2], tex_pb[2];
2830
2831     ff_check_alignment();
2832
2833     for(i=0; i<2; i++){
2834         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2835         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2836         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2837     }
2838
2839     s->last_bits= put_bits_count(&s->pb);
2840     s->mv_bits=0;
2841     s->misc_bits=0;
2842     s->i_tex_bits=0;
2843     s->p_tex_bits=0;
2844     s->i_count=0;
2845     s->f_count=0;
2846     s->b_count=0;
2847     s->skip_count=0;
2848
2849     for(i=0; i<3; i++){
2850         /* init last dc values */
2851         /* note: quant matrix value (8) is implied here */
2852         s->last_dc[i] = 128 << s->intra_dc_precision;
2853
2854         s->current_picture.encoding_error[i] = 0;
2855     }
2856     if(s->codec_id==AV_CODEC_ID_AMV){
2857         s->last_dc[0] = 128*8/13;
2858         s->last_dc[1] = 128*8/14;
2859         s->last_dc[2] = 128*8/14;
2860     }
2861     s->mb_skip_run = 0;
2862     memset(s->last_mv, 0, sizeof(s->last_mv));
2863
2864     s->last_mv_dir = 0;
2865
2866     switch(s->codec_id){
2867     case AV_CODEC_ID_H263:
2868     case AV_CODEC_ID_H263P:
2869     case AV_CODEC_ID_FLV1:
2870         if (CONFIG_H263_ENCODER)
2871             s->gob_index = H263_GOB_HEIGHT(s->height);
2872         break;
2873     case AV_CODEC_ID_MPEG4:
2874         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2875             ff_mpeg4_init_partitions(s);
2876         break;
2877     }
2878
2879     s->resync_mb_x=0;
2880     s->resync_mb_y=0;
2881     s->first_slice_line = 1;
2882     s->ptr_lastgob = s->pb.buf;
2883     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2884         s->mb_x=0;
2885         s->mb_y= mb_y;
2886
2887         ff_set_qscale(s, s->qscale);
2888         ff_init_block_index(s);
2889
2890         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2891             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2892             int mb_type= s->mb_type[xy];
2893 //            int d;
2894             int dmin= INT_MAX;
2895             int dir;
2896             int size_increase =  s->avctx->internal->byte_buffer_size/4
2897                                + s->mb_width*MAX_MB_BYTES;
2898
2899             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2900             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2901                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2902                 return -1;
2903             }
2904             if(s->data_partitioning){
2905                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2906                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2907                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2908                     return -1;
2909                 }
2910             }
2911
2912             s->mb_x = mb_x;
2913             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2914             ff_update_block_index(s);
2915
2916             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2917                 ff_h261_reorder_mb_index(s);
2918                 xy= s->mb_y*s->mb_stride + s->mb_x;
2919                 mb_type= s->mb_type[xy];
2920             }
2921
2922             /* write gob / video packet header  */
2923             if(s->rtp_mode){
2924                 int current_packet_size, is_gob_start;
2925
2926                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2927
2928                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2929
2930                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2931
2932                 switch(s->codec_id){
2933                 case AV_CODEC_ID_H263:
2934                 case AV_CODEC_ID_H263P:
2935                     if(!s->h263_slice_structured)
2936                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2937                     break;
2938                 case AV_CODEC_ID_MPEG2VIDEO:
2939                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2940                 case AV_CODEC_ID_MPEG1VIDEO:
2941                     if(s->mb_skip_run) is_gob_start=0;
2942                     break;
2943                 case AV_CODEC_ID_MJPEG:
2944                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2945                     break;
2946                 }
2947
2948                 if(is_gob_start){
2949                     if(s->start_mb_y != mb_y || mb_x!=0){
2950                         write_slice_end(s);
2951
2952                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2953                             ff_mpeg4_init_partitions(s);
2954                         }
2955                     }
2956
2957                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2958                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2959
2960                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2961                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2962                         int d = 100 / s->error_rate;
2963                         if(r % d == 0){
2964                             current_packet_size=0;
2965                             s->pb.buf_ptr= s->ptr_lastgob;
2966                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2967                         }
2968                     }
2969
2970                     if (s->avctx->rtp_callback){
2971                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2972                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2973                     }
2974                     update_mb_info(s, 1);
2975
2976                     switch(s->codec_id){
2977                     case AV_CODEC_ID_MPEG4:
2978                         if (CONFIG_MPEG4_ENCODER) {
2979                             ff_mpeg4_encode_video_packet_header(s);
2980                             ff_mpeg4_clean_buffers(s);
2981                         }
2982                     break;
2983                     case AV_CODEC_ID_MPEG1VIDEO:
2984                     case AV_CODEC_ID_MPEG2VIDEO:
2985                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2986                             ff_mpeg1_encode_slice_header(s);
2987                             ff_mpeg1_clean_buffers(s);
2988                         }
2989                     break;
2990                     case AV_CODEC_ID_H263:
2991                     case AV_CODEC_ID_H263P:
2992                         if (CONFIG_H263_ENCODER)
2993                             ff_h263_encode_gob_header(s, mb_y);
2994                     break;
2995                     }
2996
2997                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2998                         int bits= put_bits_count(&s->pb);
2999                         s->misc_bits+= bits - s->last_bits;
3000                         s->last_bits= bits;
3001                     }
3002
3003                     s->ptr_lastgob += current_packet_size;
3004                     s->first_slice_line=1;
3005                     s->resync_mb_x=mb_x;
3006                     s->resync_mb_y=mb_y;
3007                 }
3008             }
3009
3010             if(  (s->resync_mb_x   == s->mb_x)
3011                && s->resync_mb_y+1 == s->mb_y){
3012                 s->first_slice_line=0;
3013             }
3014
3015             s->mb_skipped=0;
3016             s->dquant=0; //only for QP_RD
3017
3018             update_mb_info(s, 0);
3019
3020             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
3021                 int next_block=0;
3022                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3023
3024                 copy_context_before_encode(&backup_s, s, -1);
3025                 backup_s.pb= s->pb;
3026                 best_s.data_partitioning= s->data_partitioning;
3027                 best_s.partitioned_frame= s->partitioned_frame;
3028                 if(s->data_partitioning){
3029                     backup_s.pb2= s->pb2;
3030                     backup_s.tex_pb= s->tex_pb;
3031                 }
3032
3033                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
3034                     s->mv_dir = MV_DIR_FORWARD;
3035                     s->mv_type = MV_TYPE_16X16;
3036                     s->mb_intra= 0;
3037                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3038                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3039                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
3040                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3041                 }
3042                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
3043                     s->mv_dir = MV_DIR_FORWARD;
3044                     s->mv_type = MV_TYPE_FIELD;
3045                     s->mb_intra= 0;
3046                     for(i=0; i<2; i++){
3047                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3048                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3049                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3050                     }
3051                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3052                                  &dmin, &next_block, 0, 0);
3053                 }
3054                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3055                     s->mv_dir = MV_DIR_FORWARD;
3056                     s->mv_type = MV_TYPE_16X16;
3057                     s->mb_intra= 0;
3058                     s->mv[0][0][0] = 0;
3059                     s->mv[0][0][1] = 0;
3060                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3061                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3062                 }
3063                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3064                     s->mv_dir = MV_DIR_FORWARD;
3065                     s->mv_type = MV_TYPE_8X8;
3066                     s->mb_intra= 0;
3067                     for(i=0; i<4; i++){
3068                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3069                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3070                     }
3071                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3072                                  &dmin, &next_block, 0, 0);
3073                 }
3074                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3075                     s->mv_dir = MV_DIR_FORWARD;
3076                     s->mv_type = MV_TYPE_16X16;
3077                     s->mb_intra= 0;
3078                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3079                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3080                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3081                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3082                 }
3083                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3084                     s->mv_dir = MV_DIR_BACKWARD;
3085                     s->mv_type = MV_TYPE_16X16;
3086                     s->mb_intra= 0;
3087                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3088                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3089                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3090                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3091                 }
3092                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3093                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3094                     s->mv_type = MV_TYPE_16X16;
3095                     s->mb_intra= 0;
3096                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3097                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3098                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3099                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3100                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3101                                  &dmin, &next_block, 0, 0);
3102                 }
3103                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3104                     s->mv_dir = MV_DIR_FORWARD;
3105                     s->mv_type = MV_TYPE_FIELD;
3106                     s->mb_intra= 0;
3107                     for(i=0; i<2; i++){
3108                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3109                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3110                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3111                     }
3112                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3113                                  &dmin, &next_block, 0, 0);
3114                 }
3115                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3116                     s->mv_dir = MV_DIR_BACKWARD;
3117                     s->mv_type = MV_TYPE_FIELD;
3118                     s->mb_intra= 0;
3119                     for(i=0; i<2; i++){
3120                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3121                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3122                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3123                     }
3124                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3125                                  &dmin, &next_block, 0, 0);
3126                 }
3127                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3128                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3129                     s->mv_type = MV_TYPE_FIELD;
3130                     s->mb_intra= 0;
3131                     for(dir=0; dir<2; dir++){
3132                         for(i=0; i<2; i++){
3133                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3134                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3135                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3136                         }
3137                     }
3138                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3139                                  &dmin, &next_block, 0, 0);
3140                 }
3141                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3142                     s->mv_dir = 0;
3143                     s->mv_type = MV_TYPE_16X16;
3144                     s->mb_intra= 1;
3145                     s->mv[0][0][0] = 0;
3146                     s->mv[0][0][1] = 0;
3147                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3148                                  &dmin, &next_block, 0, 0);
3149                     if(s->h263_pred || s->h263_aic){
3150                         if(best_s.mb_intra)
3151                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3152                         else
3153                             ff_clean_intra_table_entries(s); //old mode?
3154                     }
3155                 }
3156
3157                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3158                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3159                         const int last_qp= backup_s.qscale;
3160                         int qpi, qp, dc[6];
3161                         int16_t ac[6][16];
3162                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3163                         static const int dquant_tab[4]={-1,1,-2,2};
3164                         int storecoefs = s->mb_intra && s->dc_val[0];
3165
3166                         av_assert2(backup_s.dquant == 0);
3167
3168                         //FIXME intra
3169                         s->mv_dir= best_s.mv_dir;
3170                         s->mv_type = MV_TYPE_16X16;
3171                         s->mb_intra= best_s.mb_intra;
3172                         s->mv[0][0][0] = best_s.mv[0][0][0];
3173                         s->mv[0][0][1] = best_s.mv[0][0][1];
3174                         s->mv[1][0][0] = best_s.mv[1][0][0];
3175                         s->mv[1][0][1] = best_s.mv[1][0][1];
3176
3177                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3178                         for(; qpi<4; qpi++){
3179                             int dquant= dquant_tab[qpi];
3180                             qp= last_qp + dquant;
3181                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3182                                 continue;
3183                             backup_s.dquant= dquant;
3184                             if(storecoefs){
3185                                 for(i=0; i<6; i++){
3186                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3187                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3188                                 }
3189                             }
3190
3191                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3192                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3193                             if(best_s.qscale != qp){
3194                                 if(storecoefs){
3195                                     for(i=0; i<6; i++){
3196                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3197                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3198                                     }
3199                                 }
3200                             }
3201                         }
3202                     }
3203                 }
3204                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3205                     int mx= s->b_direct_mv_table[xy][0];
3206                     int my= s->b_direct_mv_table[xy][1];
3207
3208                     backup_s.dquant = 0;
3209                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3210                     s->mb_intra= 0;
3211                     ff_mpeg4_set_direct_mv(s, mx, my);
3212                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3213                                  &dmin, &next_block, mx, my);
3214                 }
3215                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3216                     backup_s.dquant = 0;
3217                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3218                     s->mb_intra= 0;
3219                     ff_mpeg4_set_direct_mv(s, 0, 0);
3220                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3221                                  &dmin, &next_block, 0, 0);
3222                 }
3223                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3224                     int coded=0;
3225                     for(i=0; i<6; i++)
3226                         coded |= s->block_last_index[i];
3227                     if(coded){
3228                         int mx,my;
3229                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3230                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3231                             mx=my=0; //FIXME find the one we actually used
3232                             ff_mpeg4_set_direct_mv(s, mx, my);
3233                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3234                             mx= s->mv[1][0][0];
3235                             my= s->mv[1][0][1];
3236                         }else{
3237                             mx= s->mv[0][0][0];
3238                             my= s->mv[0][0][1];
3239                         }
3240
3241                         s->mv_dir= best_s.mv_dir;
3242                         s->mv_type = best_s.mv_type;
3243                         s->mb_intra= 0;
3244 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3245                         s->mv[0][0][1] = best_s.mv[0][0][1];
3246                         s->mv[1][0][0] = best_s.mv[1][0][0];
3247                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3248                         backup_s.dquant= 0;
3249                         s->skipdct=1;
3250                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3251                                         &dmin, &next_block, mx, my);
3252                         s->skipdct=0;
3253                     }
3254                 }
3255
3256                 s->current_picture.qscale_table[xy] = best_s.qscale;
3257
3258                 copy_context_after_encode(s, &best_s, -1);
3259
3260                 pb_bits_count= put_bits_count(&s->pb);
3261                 flush_put_bits(&s->pb);
3262                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3263                 s->pb= backup_s.pb;
3264
3265                 if(s->data_partitioning){
3266                     pb2_bits_count= put_bits_count(&s->pb2);
3267                     flush_put_bits(&s->pb2);
3268                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3269                     s->pb2= backup_s.pb2;
3270
3271                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3272                     flush_put_bits(&s->tex_pb);
3273                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3274                     s->tex_pb= backup_s.tex_pb;
3275                 }
3276                 s->last_bits= put_bits_count(&s->pb);
3277
3278                 if (CONFIG_H263_ENCODER &&
3279                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3280                     ff_h263_update_motion_val(s);
3281
3282                 if(next_block==0){ //FIXME 16 vs linesize16
3283                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3284                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3285                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3286                 }
3287
3288                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3289                     ff_mpv_decode_mb(s, s->block);
3290             } else {
3291                 int motion_x = 0, motion_y = 0;
3292                 s->mv_type=MV_TYPE_16X16;
3293                 // only one MB-Type possible
3294
3295                 switch(mb_type){
3296                 case CANDIDATE_MB_TYPE_INTRA:
3297                     s->mv_dir = 0;
3298                     s->mb_intra= 1;
3299                     motion_x= s->mv[0][0][0] = 0;
3300                     motion_y= s->mv[0][0][1] = 0;
3301                     break;
3302                 case CANDIDATE_MB_TYPE_INTER:
3303                     s->mv_dir = MV_DIR_FORWARD;
3304                     s->mb_intra= 0;
3305                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3306                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3307                     break;
3308                 case CANDIDATE_MB_TYPE_INTER_I:
3309                     s->mv_dir = MV_DIR_FORWARD;
3310                     s->mv_type = MV_TYPE_FIELD;
3311                     s->mb_intra= 0;
3312                     for(i=0; i<2; i++){
3313                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3314                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3315                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3316                     }
3317                     break;
3318                 case CANDIDATE_MB_TYPE_INTER4V:
3319                     s->mv_dir = MV_DIR_FORWARD;
3320                     s->mv_type = MV_TYPE_8X8;
3321                     s->mb_intra= 0;
3322                     for(i=0; i<4; i++){
3323                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3324                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3325                     }
3326                     break;
3327                 case CANDIDATE_MB_TYPE_DIRECT:
3328                     if (CONFIG_MPEG4_ENCODER) {
3329                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3330                         s->mb_intra= 0;
3331                         motion_x=s->b_direct_mv_table[xy][0];
3332                         motion_y=s->b_direct_mv_table[xy][1];
3333                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3334                     }
3335                     break;
3336                 case CANDIDATE_MB_TYPE_DIRECT0:
3337                     if (CONFIG_MPEG4_ENCODER) {
3338                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3339                         s->mb_intra= 0;
3340                         ff_mpeg4_set_direct_mv(s, 0, 0);
3341                     }
3342                     break;
3343                 case CANDIDATE_MB_TYPE_BIDIR:
3344                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3345                     s->mb_intra= 0;
3346                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3347                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3348                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3349                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3350                     break;
3351                 case CANDIDATE_MB_TYPE_BACKWARD:
3352                     s->mv_dir = MV_DIR_BACKWARD;
3353                     s->mb_intra= 0;
3354                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3355                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3356                     break;
3357                 case CANDIDATE_MB_TYPE_FORWARD:
3358                     s->mv_dir = MV_DIR_FORWARD;
3359                     s->mb_intra= 0;
3360                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3361                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3362                     break;
3363                 case CANDIDATE_MB_TYPE_FORWARD_I:
3364                     s->mv_dir = MV_DIR_FORWARD;
3365                     s->mv_type = MV_TYPE_FIELD;
3366                     s->mb_intra= 0;
3367                     for(i=0; i<2; i++){
3368                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3369                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3370                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3371                     }
3372                     break;
3373                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3374                     s->mv_dir = MV_DIR_BACKWARD;
3375                     s->mv_type = MV_TYPE_FIELD;
3376                     s->mb_intra= 0;
3377                     for(i=0; i<2; i++){
3378                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3379                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3380                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3381                     }
3382                     break;
3383                 case CANDIDATE_MB_TYPE_BIDIR_I:
3384                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3385                     s->mv_type = MV_TYPE_FIELD;
3386                     s->mb_intra= 0;
3387                     for(dir=0; dir<2; dir++){
3388                         for(i=0; i<2; i++){
3389                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3390                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3391                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3392                         }
3393                     }
3394                     break;
3395                 default:
3396                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3397                 }
3398
3399                 encode_mb(s, motion_x, motion_y);
3400
3401                 // RAL: Update last macroblock type
3402                 s->last_mv_dir = s->mv_dir;
3403
3404                 if (CONFIG_H263_ENCODER &&
3405                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3406                     ff_h263_update_motion_val(s);
3407
3408                 ff_mpv_decode_mb(s, s->block);
3409             }
3410
3411             /* clean the MV table in IPS frames for direct mode in B frames */
3412             if(s->mb_intra /* && I,P,S_TYPE */){
3413                 s->p_mv_table[xy][0]=0;
3414                 s->p_mv_table[xy][1]=0;
3415             }
3416
3417             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3418                 int w= 16;
3419                 int h= 16;
3420
3421                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3422                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3423
3424                 s->current_picture.encoding_error[0] += sse(
3425                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3426                     s->dest[0], w, h, s->linesize);
3427                 s->current_picture.encoding_error[1] += sse(
3428                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3429                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3430                 s->current_picture.encoding_error[2] += sse(
3431                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3432                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3433             }
3434             if(s->loop_filter){
3435                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3436                     ff_h263_loop_filter(s);
3437             }
3438             ff_dlog(s->avctx, "MB %d %d bits\n",
3439                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3440         }
3441     }
3442
3443     //not beautiful here but we must write it before flushing so it has to be here
3444     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3445         ff_msmpeg4_encode_ext_header(s);
3446
3447     write_slice_end(s);
3448
3449     /* Send the last GOB if RTP */
3450     if (s->avctx->rtp_callback) {
3451         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3452         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3453         /* Call the RTP callback to send the last GOB */
3454         emms_c();
3455         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3456     }
3457
3458     return 0;
3459 }
3460
3461 #define MERGE(field) dst->field += src->field; src->field=0
3462 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3463     MERGE(me.scene_change_score);
3464     MERGE(me.mc_mb_var_sum_temp);
3465     MERGE(me.mb_var_sum_temp);
3466 }
3467
3468 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3469     int i;
3470
3471     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3472     MERGE(dct_count[1]);
3473     MERGE(mv_bits);
3474     MERGE(i_tex_bits);
3475     MERGE(p_tex_bits);
3476     MERGE(i_count);
3477     MERGE(f_count);
3478     MERGE(b_count);
3479     MERGE(skip_count);
3480     MERGE(misc_bits);
3481     MERGE(er.error_count);
3482     MERGE(padding_bug_score);
3483     MERGE(current_picture.encoding_error[0]);
3484     MERGE(current_picture.encoding_error[1]);
3485     MERGE(current_picture.encoding_error[2]);
3486
3487     if(dst->avctx->noise_reduction){
3488         for(i=0; i<64; i++){
3489             MERGE(dct_error_sum[0][i]);
3490             MERGE(dct_error_sum[1][i]);
3491         }
3492     }
3493
3494     assert(put_bits_count(&src->pb) % 8 ==0);
3495     assert(put_bits_count(&dst->pb) % 8 ==0);
3496     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3497     flush_put_bits(&dst->pb);
3498 }
3499
3500 static int estimate_qp(MpegEncContext *s, int dry_run){
3501     if (s->next_lambda){
3502         s->current_picture_ptr->f->quality =
3503         s->current_picture.f->quality = s->next_lambda;
3504         if(!dry_run) s->next_lambda= 0;
3505     } else if (!s->fixed_qscale) {
3506         s->current_picture_ptr->f->quality =
3507         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3508         if (s->current_picture.f->quality < 0)
3509             return -1;
3510     }
3511
3512     if(s->adaptive_quant){
3513         switch(s->codec_id){
3514         case AV_CODEC_ID_MPEG4:
3515             if (CONFIG_MPEG4_ENCODER)
3516                 ff_clean_mpeg4_qscales(s);
3517             break;
3518         case AV_CODEC_ID_H263:
3519         case AV_CODEC_ID_H263P:
3520         case AV_CODEC_ID_FLV1:
3521             if (CONFIG_H263_ENCODER)
3522                 ff_clean_h263_qscales(s);
3523             break;
3524         default:
3525             ff_init_qscale_tab(s);
3526         }
3527
3528         s->lambda= s->lambda_table[0];
3529         //FIXME broken
3530     }else
3531         s->lambda = s->current_picture.f->quality;
3532     update_qscale(s);
3533     return 0;
3534 }
3535
3536 /* must be called before writing the header */
3537 static void set_frame_distances(MpegEncContext * s){
3538     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3539     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3540
3541     if(s->pict_type==AV_PICTURE_TYPE_B){
3542         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3543         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3544     }else{
3545         s->pp_time= s->time - s->last_non_b_time;
3546         s->last_non_b_time= s->time;
3547         assert(s->picture_number==0 || s->pp_time > 0);
3548     }
3549 }
3550
3551 static int encode_picture(MpegEncContext *s, int picture_number)
3552 {
3553     int i, ret;
3554     int bits;
3555     int context_count = s->slice_context_count;
3556
3557     s->picture_number = picture_number;
3558
3559     /* Reset the average MB variance */
3560     s->me.mb_var_sum_temp    =
3561     s->me.mc_mb_var_sum_temp = 0;
3562
3563     /* we need to initialize some time vars before we can encode b-frames */
3564     // RAL: Condition added for MPEG1VIDEO
3565     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3566         set_frame_distances(s);
3567     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3568         ff_set_mpeg4_time(s);
3569
3570     s->me.scene_change_score=0;
3571
3572 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3573
3574     if(s->pict_type==AV_PICTURE_TYPE_I){
3575         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3576         else                        s->no_rounding=0;
3577     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3578         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3579             s->no_rounding ^= 1;
3580     }
3581
3582     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3583         if (estimate_qp(s,1) < 0)
3584             return -1;
3585         ff_get_2pass_fcode(s);
3586     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3587         if(s->pict_type==AV_PICTURE_TYPE_B)
3588             s->lambda= s->last_lambda_for[s->pict_type];
3589         else
3590             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3591         update_qscale(s);
3592     }
3593
3594     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3595         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3596         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3597         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3598         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3599     }
3600
3601     s->mb_intra=0; //for the rate distortion & bit compare functions
3602     for(i=1; i<context_count; i++){
3603         ret = ff_update_duplicate_context(s->thread_context[i], s);
3604         if (ret < 0)
3605             return ret;
3606     }
3607
3608     if(ff_init_me(s)<0)
3609         return -1;
3610
3611     /* Estimate motion for every MB */
3612     if(s->pict_type != AV_PICTURE_TYPE_I){
3613         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3614         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3615         if (s->pict_type != AV_PICTURE_TYPE_B) {
3616             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3617                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3618             }
3619         }
3620
3621         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3622     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3623         /* I-Frame */
3624         for(i=0; i<s->mb_stride*s->mb_height; i++)
3625             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3626
3627         if(!s->fixed_qscale){
3628             /* finding spatial complexity for I-frame rate control */
3629             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3630         }
3631     }
3632     for(i=1; i<context_count; i++){
3633         merge_context_after_me(s, s->thread_context[i]);
3634     }
3635     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3636     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3637     emms_c();
3638
3639     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3640         s->pict_type= AV_PICTURE_TYPE_I;
3641         for(i=0; i<s->mb_stride*s->mb_height; i++)
3642             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3643         if(s->msmpeg4_version >= 3)
3644             s->no_rounding=1;
3645         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3646                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3647     }
3648
3649     if(!s->umvplus){
3650         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3651             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3652
3653             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3654                 int a,b;
3655                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3656                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3657                 s->f_code= FFMAX3(s->f_code, a, b);
3658             }
3659
3660             ff_fix_long_p_mvs(s);
3661             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3662             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3663                 int j;
3664                 for(i=0; i<2; i++){
3665                     for(j=0; j<2; j++)
3666                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3667                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3668                 }
3669             }
3670         }
3671
3672         if(s->pict_type==AV_PICTURE_TYPE_B){
3673             int a, b;
3674
3675             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3676             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3677             s->f_code = FFMAX(a, b);
3678
3679             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3680             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3681             s->b_code = FFMAX(a, b);
3682
3683             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3684             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3685             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3686             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3687             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3688                 int dir, j;
3689                 for(dir=0; dir<2; dir++){
3690                     for(i=0; i<2; i++){
3691                         for(j=0; j<2; j++){
3692                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3693                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3694                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3695                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3696                         }
3697                     }
3698                 }
3699             }
3700         }
3701     }
3702
3703     if (estimate_qp(s, 0) < 0)
3704         return -1;
3705
3706     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3707         s->pict_type == AV_PICTURE_TYPE_I &&
3708         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3709         s->qscale= 3; //reduce clipping problems
3710
3711     if (s->out_format == FMT_MJPEG) {
3712         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3713         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3714
3715         if (s->avctx->intra_matrix) {
3716             chroma_matrix =
3717             luma_matrix = s->avctx->intra_matrix;
3718         }
3719         if (s->avctx->chroma_intra_matrix)
3720             chroma_matrix = s->avctx->chroma_intra_matrix;
3721
3722         /* for mjpeg, we do include qscale in the matrix */
3723         for(i=1;i<64;i++){
3724             int j = s->idsp.idct_permutation[i];
3725
3726             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3727             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3728         }
3729         s->y_dc_scale_table=
3730         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3731         s->chroma_intra_matrix[0] =
3732         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3733         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3734                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3735         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3736                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3737         s->qscale= 8;
3738     }
3739     if(s->codec_id == AV_CODEC_ID_AMV){
3740         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3741         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3742         for(i=1;i<64;i++){
3743             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3744
3745             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3746             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3747         }
3748         s->y_dc_scale_table= y;
3749         s->c_dc_scale_table= c;
3750         s->intra_matrix[0] = 13;
3751         s->chroma_intra_matrix[0] = 14;
3752         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3753                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3754         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3755                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3756         s->qscale= 8;
3757     }
3758
3759     //FIXME var duplication
3760     s->current_picture_ptr->f->key_frame =
3761     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3762     s->current_picture_ptr->f->pict_type =
3763     s->current_picture.f->pict_type = s->pict_type;
3764
3765     if (s->current_picture.f->key_frame)
3766         s->picture_in_gop_number=0;
3767
3768     s->mb_x = s->mb_y = 0;
3769     s->last_bits= put_bits_count(&s->pb);
3770     switch(s->out_format) {
3771     case FMT_MJPEG:
3772         if (CONFIG_MJPEG_ENCODER)
3773             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3774                                            s->intra_matrix, s->chroma_intra_matrix);
3775         break;
3776     case FMT_H261:
3777         if (CONFIG_H261_ENCODER)
3778             ff_h261_encode_picture_header(s, picture_number);
3779         break;
3780     case FMT_H263:
3781         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3782             ff_wmv2_encode_picture_header(s, picture_number);
3783         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3784             ff_msmpeg4_encode_picture_header(s, picture_number);
3785         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3786             ff_mpeg4_encode_picture_header(s, picture_number);
3787         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3788             ret = ff_rv10_encode_picture_header(s, picture_number);
3789             if (ret < 0)
3790                 return ret;
3791         }
3792         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3793             ff_rv20_encode_picture_header(s, picture_number);
3794         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3795             ff_flv_encode_picture_header(s, picture_number);
3796         else if (CONFIG_H263_ENCODER)
3797             ff_h263_encode_picture_header(s, picture_number);
3798         break;
3799     case FMT_MPEG1:
3800         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3801             ff_mpeg1_encode_picture_header(s, picture_number);
3802         break;
3803     default:
3804         av_assert0(0);
3805     }
3806     bits= put_bits_count(&s->pb);
3807     s->header_bits= bits - s->last_bits;
3808
3809     for(i=1; i<context_count; i++){
3810         update_duplicate_context_after_me(s->thread_context[i], s);
3811     }
3812     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3813     for(i=1; i<context_count; i++){
3814         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3815             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3816         merge_context_after_encode(s, s->thread_context[i]);
3817     }
3818     emms_c();
3819     return 0;
3820 }
3821
3822 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3823     const int intra= s->mb_intra;
3824     int i;
3825
3826     s->dct_count[intra]++;
3827
3828     for(i=0; i<64; i++){
3829         int level= block[i];
3830
3831         if(level){
3832             if(level>0){
3833                 s->dct_error_sum[intra][i] += level;
3834                 level -= s->dct_offset[intra][i];
3835                 if(level<0) level=0;
3836             }else{
3837                 s->dct_error_sum[intra][i] -= level;
3838                 level += s->dct_offset[intra][i];
3839                 if(level>0) level=0;
3840             }
3841             block[i]= level;
3842         }
3843     }
3844 }
3845
3846 static int dct_quantize_trellis_c(MpegEncContext *s,
3847                                   int16_t *block, int n,
3848                                   int qscale, int *overflow){
3849     const int *qmat;
3850     const uint16_t *matrix;
3851     const uint8_t *scantable= s->intra_scantable.scantable;
3852     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3853     int max=0;
3854     unsigned int threshold1, threshold2;
3855     int bias=0;
3856     int run_tab[65];
3857     int level_tab[65];
3858     int score_tab[65];
3859     int survivor[65];
3860     int survivor_count;
3861     int last_run=0;
3862     int last_level=0;
3863     int last_score= 0;
3864     int last_i;
3865     int coeff[2][64];
3866     int coeff_count[64];
3867     int qmul, qadd, start_i, last_non_zero, i, dc;
3868     const int esc_length= s->ac_esc_length;
3869     uint8_t * length;
3870     uint8_t * last_length;
3871     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3872     int mpeg2_qscale;
3873
3874     s->fdsp.fdct(block);
3875
3876     if(s->dct_error_sum)
3877         s->denoise_dct(s, block);
3878     qmul= qscale*16;
3879     qadd= ((qscale-1)|1)*8;
3880
3881     if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
3882     else                 mpeg2_qscale = qscale << 1;
3883
3884     if (s->mb_intra) {
3885         int q;
3886         if (!s->h263_aic) {
3887             if (n < 4)
3888                 q = s->y_dc_scale;
3889             else
3890                 q = s->c_dc_scale;
3891             q = q << 3;
3892         } else{
3893             /* For AIC we skip quant/dequant of INTRADC */
3894             q = 1 << 3;
3895             qadd=0;
3896         }
3897
3898         /* note: block[0] is assumed to be positive */
3899         block[0] = (block[0] + (q >> 1)) / q;
3900         start_i = 1;
3901         last_non_zero = 0;
3902         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3903         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3904         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3905             bias= 1<<(QMAT_SHIFT-1);
3906
3907         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3908             length     = s->intra_chroma_ac_vlc_length;
3909             last_length= s->intra_chroma_ac_vlc_last_length;
3910         } else {
3911             length     = s->intra_ac_vlc_length;
3912             last_length= s->intra_ac_vlc_last_length;
3913         }
3914     } else {
3915         start_i = 0;
3916         last_non_zero = -1;
3917         qmat = s->q_inter_matrix[qscale];
3918         matrix = s->inter_matrix;
3919         length     = s->inter_ac_vlc_length;
3920         last_length= s->inter_ac_vlc_last_length;
3921     }
3922     last_i= start_i;
3923
3924     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3925     threshold2= (threshold1<<1);
3926
3927     for(i=63; i>=start_i; i--) {
3928         const int j = scantable[i];
3929         int level = block[j] * qmat[j];
3930
3931         if(((unsigned)(level+threshold1))>threshold2){
3932             last_non_zero = i;
3933             break;
3934         }
3935     }
3936
3937     for(i=start_i; i<=last_non_zero; i++) {
3938         const int j = scantable[i];
3939         int level = block[j] * qmat[j];
3940
3941 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3942 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3943         if(((unsigned)(level+threshold1))>threshold2){
3944             if(level>0){
3945                 level= (bias + level)>>QMAT_SHIFT;
3946                 coeff[0][i]= level;
3947                 coeff[1][i]= level-1;
3948 //                coeff[2][k]= level-2;
3949             }else{
3950                 level= (bias - level)>>QMAT_SHIFT;
3951                 coeff[0][i]= -level;
3952                 coeff[1][i]= -level+1;
3953 //                coeff[2][k]= -level+2;
3954             }
3955             coeff_count[i]= FFMIN(level, 2);
3956             av_assert2(coeff_count[i]);
3957             max |=level;
3958         }else{
3959             coeff[0][i]= (level>>31)|1;
3960             coeff_count[i]= 1;
3961         }
3962     }
3963
3964     *overflow= s->max_qcoeff < max; //overflow might have happened
3965
3966     if(last_non_zero < start_i){
3967         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3968         return last_non_zero;
3969     }
3970
3971     score_tab[start_i]= 0;
3972     survivor[0]= start_i;
3973     survivor_count= 1;
3974
3975     for(i=start_i; i<=last_non_zero; i++){
3976         int level_index, j, zero_distortion;
3977         int dct_coeff= FFABS(block[ scantable[i] ]);
3978         int best_score=256*256*256*120;
3979
3980         if (s->fdsp.fdct == ff_fdct_ifast)
3981             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3982         zero_distortion= dct_coeff*dct_coeff;
3983
3984         for(level_index=0; level_index < coeff_count[i]; level_index++){
3985             int distortion;
3986             int level= coeff[level_index][i];
3987             const int alevel= FFABS(level);
3988             int unquant_coeff;
3989
3990             av_assert2(level);
3991
3992             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3993                 unquant_coeff= alevel*qmul + qadd;
3994             } else if(s->out_format == FMT_MJPEG) {
3995                 j = s->idsp.idct_permutation[scantable[i]];
3996                 unquant_coeff = alevel * matrix[j] * 8;
3997             }else{ //MPEG1
3998                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3999                 if(s->mb_intra){
4000                         unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
4001                         unquant_coeff =   (unquant_coeff - 1) | 1;
4002                 }else{
4003                         unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
4004                         unquant_coeff =   (unquant_coeff - 1) | 1;
4005                 }
4006                 unquant_coeff<<= 3;
4007             }
4008
4009             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
4010             level+=64;
4011             if((level&(~127)) == 0){
4012                 for(j=survivor_count-1; j>=0; j--){
4013                     int run= i - survivor[j];
4014                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4015                     score += score_tab[i-run];
4016
4017                     if(score < best_score){
4018                         best_score= score;
4019                         run_tab[i+1]= run;
4020                         level_tab[i+1]= level-64;
4021                     }
4022                 }
4023
4024                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4025                     for(j=survivor_count-1; j>=0; j--){
4026                         int run= i - survivor[j];
4027                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4028                         score += score_tab[i-run];
4029                         if(score < last_score){
4030                             last_score= score;
4031                             last_run= run;
4032                             last_level= level-64;
4033                             last_i= i+1;
4034                         }
4035                     }
4036                 }
4037             }else{
4038                 distortion += esc_length*lambda;
4039                 for(j=survivor_count-1; j>=0; j--){
4040                     int run= i - survivor[j];
4041                     int score= distortion + score_tab[i-run];
4042
4043                     if(score < best_score){
4044                         best_score= score;
4045                         run_tab[i+1]= run;
4046                         level_tab[i+1]= level-64;
4047                     }
4048                 }
4049
4050                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4051                   for(j=survivor_count-1; j>=0; j--){
4052                         int run= i - survivor[j];
4053                         int score= distortion + score_tab[i-run];
4054                         if(score < last_score){
4055                             last_score= score;
4056                             last_run= run;
4057                             last_level= level-64;
4058                             last_i= i+1;
4059                         }
4060                     }
4061                 }
4062             }
4063         }
4064
4065         score_tab[i+1]= best_score;
4066
4067         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4068         if(last_non_zero <= 27){
4069             for(; survivor_count; survivor_count--){
4070                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4071                     break;
4072             }
4073         }else{
4074             for(; survivor_count; survivor_count--){
4075                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4076                     break;
4077             }
4078         }
4079
4080         survivor[ survivor_count++ ]= i+1;
4081     }
4082
4083     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4084         last_score= 256*256*256*120;
4085         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4086             int score= score_tab[i];
4087             if(i) score += lambda*2; //FIXME exacter?
4088
4089             if(score < last_score){
4090                 last_score= score;
4091                 last_i= i;
4092                 last_level= level_tab[i];
4093                 last_run= run_tab[i];
4094             }
4095         }
4096     }
4097
4098     s->coded_score[n] = last_score;
4099
4100     dc= FFABS(block[0]);
4101     last_non_zero= last_i - 1;
4102     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4103
4104     if(last_non_zero < start_i)
4105         return last_non_zero;
4106
4107     if(last_non_zero == 0 && start_i == 0){
4108         int best_level= 0;
4109         int best_score= dc * dc;
4110
4111         for(i=0; i<coeff_count[0]; i++){
4112             int level= coeff[i][0];
4113             int alevel= FFABS(level);
4114             int unquant_coeff, score, distortion;
4115
4116             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4117                     unquant_coeff= (alevel*qmul + qadd)>>3;
4118             }else{ //MPEG1
4119                     unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
4120                     unquant_coeff =   (unquant_coeff - 1) | 1;
4121             }
4122             unquant_coeff = (unquant_coeff + 4) >> 3;
4123             unquant_coeff<<= 3 + 3;
4124
4125             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4126             level+=64;
4127             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4128             else                    score= distortion + esc_length*lambda;
4129
4130             if(score < best_score){
4131                 best_score= score;
4132                 best_level= level - 64;
4133             }
4134         }
4135         block[0]= best_level;
4136         s->coded_score[n] = best_score - dc*dc;
4137         if(best_level == 0) return -1;
4138         else                return last_non_zero;
4139     }
4140
4141     i= last_i;
4142     av_assert2(last_level);
4143
4144     block[ perm_scantable[last_non_zero] ]= last_level;
4145     i -= last_run + 1;
4146
4147     for(; i>start_i; i -= run_tab[i] + 1){
4148         block[ perm_scantable[i-1] ]= level_tab[i];
4149     }
4150
4151     return last_non_zero;
4152 }
4153
4154 //#define REFINE_STATS 1
4155 static int16_t basis[64][64];
4156
4157 static void build_basis(uint8_t *perm){
4158     int i, j, x, y;
4159     emms_c();
4160     for(i=0; i<8; i++){
4161         for(j=0; j<8; j++){
4162             for(y=0; y<8; y++){
4163                 for(x=0; x<8; x++){
4164                     double s= 0.25*(1<<BASIS_SHIFT);
4165                     int index= 8*i + j;
4166                     int perm_index= perm[index];
4167                     if(i==0) s*= sqrt(0.5);
4168                     if(j==0) s*= sqrt(0.5);
4169                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4170                 }
4171             }
4172         }
4173     }
4174 }
4175
4176 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4177                         int16_t *block, int16_t *weight, int16_t *orig,
4178                         int n, int qscale){
4179     int16_t rem[64];
4180     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4181     const uint8_t *scantable= s->intra_scantable.scantable;
4182     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4183 //    unsigned int threshold1, threshold2;
4184 //    int bias=0;
4185     int run_tab[65];
4186     int prev_run=0;
4187     int prev_level=0;
4188     int qmul, qadd, start_i, last_non_zero, i, dc;
4189     uint8_t * length;
4190     uint8_t * last_length;
4191     int lambda;
4192     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4193 #ifdef REFINE_STATS
4194 static int count=0;
4195 static int after_last=0;
4196 static int to_zero=0;
4197 static int from_zero=0;
4198 static int raise=0;
4199 static int lower=0;
4200 static int messed_sign=0;
4201 #endif
4202
4203     if(basis[0][0] == 0)
4204         build_basis(s->idsp.idct_permutation);
4205
4206     qmul= qscale*2;
4207     qadd= (qscale-1)|1;
4208     if (s->mb_intra) {
4209         if (!s->h263_aic) {
4210             if (n < 4)
4211                 q = s->y_dc_scale;
4212             else
4213                 q = s->c_dc_scale;
4214         } else{
4215             /* For AIC we skip quant/dequant of INTRADC */
4216             q = 1;
4217             qadd=0;
4218         }
4219         q <<= RECON_SHIFT-3;
4220         /* note: block[0] is assumed to be positive */
4221         dc= block[0]*q;
4222 //        block[0] = (block[0] + (q >> 1)) / q;
4223         start_i = 1;
4224 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4225 //            bias= 1<<(QMAT_SHIFT-1);
4226         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4227             length     = s->intra_chroma_ac_vlc_length;
4228             last_length= s->intra_chroma_ac_vlc_last_length;
4229         } else {
4230             length     = s->intra_ac_vlc_length;
4231             last_length= s->intra_ac_vlc_last_length;
4232         }
4233     } else {
4234         dc= 0;
4235         start_i = 0;
4236         length     = s->inter_ac_vlc_length;
4237         last_length= s->inter_ac_vlc_last_length;
4238     }
4239     last_non_zero = s->block_last_index[n];
4240
4241 #ifdef REFINE_STATS
4242 {START_TIMER
4243 #endif
4244     dc += (1<<(RECON_SHIFT-1));
4245     for(i=0; i<64; i++){
4246         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4247     }
4248 #ifdef REFINE_STATS
4249 STOP_TIMER("memset rem[]")}
4250 #endif
4251     sum=0;
4252     for(i=0; i<64; i++){
4253         int one= 36;
4254         int qns=4;
4255         int w;
4256
4257         w= FFABS(weight[i]) + qns*one;
4258         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4259
4260         weight[i] = w;
4261 //        w=weight[i] = (63*qns + (w/2)) / w;
4262
4263         av_assert2(w>0);
4264         av_assert2(w<(1<<6));
4265         sum += w*w;
4266     }
4267     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4268 #ifdef REFINE_STATS
4269 {START_TIMER
4270 #endif
4271     run=0;
4272     rle_index=0;
4273     for(i=start_i; i<=last_non_zero; i++){
4274         int j= perm_scantable[i];
4275         const int level= block[j];
4276         int coeff;
4277
4278         if(level){
4279             if(level<0) coeff= qmul*level - qadd;
4280             else        coeff= qmul*level + qadd;
4281             run_tab[rle_index++]=run;
4282             run=0;
4283
4284             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4285         }else{
4286             run++;
4287         }
4288     }
4289 #ifdef REFINE_STATS
4290 if(last_non_zero>0){
4291 STOP_TIMER("init rem[]")
4292 }
4293 }
4294
4295 {START_TIMER
4296 #endif
4297     for(;;){
4298         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4299         int best_coeff=0;
4300         int best_change=0;
4301         int run2, best_unquant_change=0, analyze_gradient;
4302 #ifdef REFINE_STATS
4303 {START_TIMER
4304 #endif
4305         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4306
4307         if(analyze_gradient){
4308 #ifdef REFINE_STATS
4309 {START_TIMER
4310 #endif
4311             for(i=0; i<64; i++){
4312                 int w= weight[i];
4313
4314                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4315             }
4316 #ifdef REFINE_STATS
4317 STOP_TIMER("rem*w*w")}
4318 {START_TIMER
4319 #endif
4320             s->fdsp.fdct(d1);
4321 #ifdef REFINE_STATS
4322 STOP_TIMER("dct")}
4323 #endif
4324         }
4325
4326         if(start_i){
4327             const int level= block[0];
4328             int change, old_coeff;
4329
4330             av_assert2(s->mb_intra);
4331
4332             old_coeff= q*level;
4333
4334             for(change=-1; change<=1; change+=2){
4335                 int new_level= level + change;
4336                 int score, new_coeff;
4337
4338                 new_coeff= q*new_level;
4339                 if(new_coeff >= 2048 || new_coeff < 0)
4340                     continue;
4341
4342                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4343                                                   new_coeff - old_coeff);
4344                 if(score<best_score){
4345                     best_score= score;
4346                     best_coeff= 0;
4347                     best_change= change;
4348                     best_unquant_change= new_coeff - old_coeff;
4349                 }
4350             }
4351         }
4352
4353         run=0;
4354         rle_index=0;
4355         run2= run_tab[rle_index++];
4356         prev_level=0;
4357         prev_run=0;
4358
4359         for(i=start_i; i<64; i++){
4360             int j= perm_scantable[i];
4361             const int level= block[j];
4362             int change, old_coeff;
4363
4364             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4365                 break;
4366
4367             if(level){
4368                 if(level<0) old_coeff= qmul*level - qadd;
4369                 else        old_coeff= qmul*level + qadd;
4370                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4371             }else{
4372                 old_coeff=0;
4373                 run2--;
4374                 av_assert2(run2>=0 || i >= last_non_zero );
4375             }
4376
4377             for(change=-1; change<=1; change+=2){
4378                 int new_level= level + change;
4379                 int score, new_coeff, unquant_change;
4380
4381                 score=0;
4382                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4383                    continue;
4384
4385                 if(new_level){
4386                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4387                     else            new_coeff= qmul*new_level + qadd;
4388                     if(new_coeff >= 2048 || new_coeff <= -2048)
4389                         continue;
4390                     //FIXME check for overflow
4391
4392                     if(level){
4393                         if(level < 63 && level > -63){
4394                             if(i < last_non_zero)
4395                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4396                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4397                             else
4398                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4399                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4400                         }
4401                     }else{
4402                         av_assert2(FFABS(new_level)==1);
4403
4404                         if(analyze_gradient){
4405                             int g= d1[ scantable[i] ];
4406                             if(g && (g^new_level) >= 0)
4407                                 continue;
4408                         }
4409
4410                         if(i < last_non_zero){
4411                             int next_i= i + run2 + 1;
4412                             int next_level= block[ perm_scantable[next_i] ] + 64;
4413
4414                             if(next_level&(~127))
4415                                 next_level= 0;
4416
4417                             if(next_i < last_non_zero)
4418                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4419                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4420                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4421                             else
4422                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4423                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4424                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4425                         }else{
4426                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4427                             if(prev_level){
4428                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4429                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4430                             }
4431                         }
4432                     }
4433                 }else{
4434                     new_coeff=0;
4435                     av_assert2(FFABS(level)==1);
4436
4437                     if(i < last_non_zero){
4438                         int next_i= i + run2 + 1;
4439                         int next_level= block[ perm_scantable[next_i] ] + 64;
4440
4441                         if(next_level&(~127))
4442                             next_level= 0;
4443
4444                         if(next_i < last_non_zero)
4445                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4446                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4447                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4448                         else
4449                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4450                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4451                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4452                     }else{
4453                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4454                         if(prev_level){
4455                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4456                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4457                         }
4458                     }
4459                 }
4460
4461                 score *= lambda;
4462
4463                 unquant_change= new_coeff - old_coeff;
4464                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4465
4466                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4467                                                    unquant_change);
4468                 if(score<best_score){
4469                     best_score= score;
4470                     best_coeff= i;
4471                     best_change= change;
4472                     best_unquant_change= unquant_change;
4473                 }
4474             }
4475             if(level){
4476                 prev_level= level + 64;
4477                 if(prev_level&(~127))
4478                     prev_level= 0;
4479                 prev_run= run;
4480                 run=0;
4481             }else{
4482                 run++;
4483             }
4484         }
4485 #ifdef REFINE_STATS
4486 STOP_TIMER("iterative step")}
4487 #endif
4488
4489         if(best_change){
4490             int j= perm_scantable[ best_coeff ];
4491
4492             block[j] += best_change;
4493
4494             if(best_coeff > last_non_zero){
4495                 last_non_zero= best_coeff;
4496                 av_assert2(block[j]);
4497 #ifdef REFINE_STATS
4498 after_last++;
4499 #endif
4500             }else{
4501 #ifdef REFINE_STATS
4502 if(block[j]){
4503     if(block[j] - best_change){
4504         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4505             raise++;
4506         }else{
4507             lower++;
4508         }
4509     }else{
4510         from_zero++;
4511     }
4512 }else{
4513     to_zero++;
4514 }
4515 #endif
4516                 for(; last_non_zero>=start_i; last_non_zero--){
4517                     if(block[perm_scantable[last_non_zero]])
4518                         break;
4519                 }
4520             }
4521 #ifdef REFINE_STATS
4522 count++;
4523 if(256*256*256*64 % count == 0){
4524     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4525 }
4526 #endif
4527             run=0;
4528             rle_index=0;
4529             for(i=start_i; i<=last_non_zero; i++){
4530                 int j= perm_scantable[i];
4531                 const int level= block[j];
4532
4533                  if(level){
4534                      run_tab[rle_index++]=run;
4535                      run=0;
4536                  }else{
4537                      run++;
4538                  }
4539             }
4540
4541             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4542         }else{
4543             break;
4544         }
4545     }
4546 #ifdef REFINE_STATS
4547 if(last_non_zero>0){
4548 STOP_TIMER("iterative search")
4549 }
4550 }
4551 #endif
4552
4553     return last_non_zero;
4554 }
4555
4556 /**
4557  * Permute an 8x8 block according to permuatation.
4558  * @param block the block which will be permuted according to
4559  *              the given permutation vector
4560  * @param permutation the permutation vector
4561  * @param last the last non zero coefficient in scantable order, used to
4562  *             speed the permutation up
4563  * @param scantable the used scantable, this is only used to speed the
4564  *                  permutation up, the block is not (inverse) permutated
4565  *                  to scantable order!
4566  */
4567 void ff_block_permute(int16_t *block, uint8_t *permutation,
4568                       const uint8_t *scantable, int last)
4569 {
4570     int i;
4571     int16_t temp[64];
4572
4573     if (last <= 0)
4574         return;
4575     //FIXME it is ok but not clean and might fail for some permutations
4576     // if (permutation[1] == 1)
4577     // return;
4578
4579     for (i = 0; i <= last; i++) {
4580         const int j = scantable[i];
4581         temp[j] = block[j];
4582         block[j] = 0;
4583     }
4584
4585     for (i = 0; i <= last; i++) {
4586         const int j = scantable[i];
4587         const int perm_j = permutation[j];
4588         block[perm_j] = temp[j];
4589     }
4590 }
4591
4592 int ff_dct_quantize_c(MpegEncContext *s,
4593                         int16_t *block, int n,
4594                         int qscale, int *overflow)
4595 {
4596     int i, j, level, last_non_zero, q, start_i;
4597     const int *qmat;
4598     const uint8_t *scantable= s->intra_scantable.scantable;
4599     int bias;
4600     int max=0;
4601     unsigned int threshold1, threshold2;
4602
4603     s->fdsp.fdct(block);
4604
4605     if(s->dct_error_sum)
4606         s->denoise_dct(s, block);
4607
4608     if (s->mb_intra) {
4609         if (!s->h263_aic) {
4610             if (n < 4)
4611                 q = s->y_dc_scale;
4612             else
4613                 q = s->c_dc_scale;
4614             q = q << 3;
4615         } else
4616             /* For AIC we skip quant/dequant of INTRADC */
4617             q = 1 << 3;
4618
4619         /* note: block[0] is assumed to be positive */
4620         block[0] = (block[0] + (q >> 1)) / q;
4621         start_i = 1;
4622         last_non_zero = 0;
4623         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4624         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4625     } else {
4626         start_i = 0;
4627         last_non_zero = -1;
4628         qmat = s->q_inter_matrix[qscale];
4629         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4630     }
4631     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4632     threshold2= (threshold1<<1);
4633     for(i=63;i>=start_i;i--) {
4634         j = scantable[i];
4635         level = block[j] * qmat[j];
4636
4637         if(((unsigned)(level+threshold1))>threshold2){
4638             last_non_zero = i;
4639             break;
4640         }else{
4641             block[j]=0;
4642         }
4643     }
4644     for(i=start_i; i<=last_non_zero; i++) {
4645         j = scantable[i];
4646         level = block[j] * qmat[j];
4647
4648 //        if(   bias+level >= (1<<QMAT_SHIFT)
4649 //           || bias-level >= (1<<QMAT_SHIFT)){
4650         if(((unsigned)(level+threshold1))>threshold2){
4651             if(level>0){
4652                 level= (bias + level)>>QMAT_SHIFT;
4653                 block[j]= level;
4654             }else{
4655                 level= (bias - level)>>QMAT_SHIFT;
4656                 block[j]= -level;
4657             }
4658             max |=level;
4659         }else{
4660             block[j]=0;
4661         }
4662     }
4663     *overflow= s->max_qcoeff < max; //overflow might have happened
4664
4665     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4666     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4667         ff_block_permute(block, s->idsp.idct_permutation,
4668                       scantable, last_non_zero);
4669
4670     return last_non_zero;
4671 }
4672
4673 #define OFFSET(x) offsetof(MpegEncContext, x)
4674 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4675 static const AVOption h263_options[] = {
4676     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4677     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4678     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4679     FF_MPV_COMMON_OPTS
4680     { NULL },
4681 };
4682
4683 static const AVClass h263_class = {
4684     .class_name = "H.263 encoder",
4685     .item_name  = av_default_item_name,
4686     .option     = h263_options,
4687     .version    = LIBAVUTIL_VERSION_INT,
4688 };
4689
4690 AVCodec ff_h263_encoder = {
4691     .name           = "h263",
4692     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4693     .type           = AVMEDIA_TYPE_VIDEO,
4694     .id             = AV_CODEC_ID_H263,
4695     .priv_data_size = sizeof(MpegEncContext),
4696     .init           = ff_mpv_encode_init,
4697     .encode2        = ff_mpv_encode_picture,
4698     .close          = ff_mpv_encode_end,
4699     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4700     .priv_class     = &h263_class,
4701 };
4702
4703 static const AVOption h263p_options[] = {
4704     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4705     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4706     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4707     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4708     FF_MPV_COMMON_OPTS
4709     { NULL },
4710 };
4711 static const AVClass h263p_class = {
4712     .class_name = "H.263p encoder",
4713     .item_name  = av_default_item_name,
4714     .option     = h263p_options,
4715     .version    = LIBAVUTIL_VERSION_INT,
4716 };
4717
4718 AVCodec ff_h263p_encoder = {
4719     .name           = "h263p",
4720     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4721     .type           = AVMEDIA_TYPE_VIDEO,
4722     .id             = AV_CODEC_ID_H263P,
4723     .priv_data_size = sizeof(MpegEncContext),
4724     .init           = ff_mpv_encode_init,
4725     .encode2        = ff_mpv_encode_picture,
4726     .close          = ff_mpv_encode_end,
4727     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4728     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4729     .priv_class     = &h263p_class,
4730 };
4731
4732 static const AVClass msmpeg4v2_class = {
4733     .class_name = "msmpeg4v2 encoder",
4734     .item_name  = av_default_item_name,
4735     .option     = ff_mpv_generic_options,
4736     .version    = LIBAVUTIL_VERSION_INT,
4737 };
4738
4739 AVCodec ff_msmpeg4v2_encoder = {
4740     .name           = "msmpeg4v2",
4741     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4742     .type           = AVMEDIA_TYPE_VIDEO,
4743     .id             = AV_CODEC_ID_MSMPEG4V2,
4744     .priv_data_size = sizeof(MpegEncContext),
4745     .init           = ff_mpv_encode_init,
4746     .encode2        = ff_mpv_encode_picture,
4747     .close          = ff_mpv_encode_end,
4748     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4749     .priv_class     = &msmpeg4v2_class,
4750 };
4751
4752 static const AVClass msmpeg4v3_class = {
4753     .class_name = "msmpeg4v3 encoder",
4754     .item_name  = av_default_item_name,
4755     .option     = ff_mpv_generic_options,
4756     .version    = LIBAVUTIL_VERSION_INT,
4757 };
4758
4759 AVCodec ff_msmpeg4v3_encoder = {
4760     .name           = "msmpeg4",
4761     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4762     .type           = AVMEDIA_TYPE_VIDEO,
4763     .id             = AV_CODEC_ID_MSMPEG4V3,
4764     .priv_data_size = sizeof(MpegEncContext),
4765     .init           = ff_mpv_encode_init,
4766     .encode2        = ff_mpv_encode_picture,
4767     .close          = ff_mpv_encode_end,
4768     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4769     .priv_class     = &msmpeg4v3_class,
4770 };
4771
4772 static const AVClass wmv1_class = {
4773     .class_name = "wmv1 encoder",
4774     .item_name  = av_default_item_name,
4775     .option     = ff_mpv_generic_options,
4776     .version    = LIBAVUTIL_VERSION_INT,
4777 };
4778
4779 AVCodec ff_wmv1_encoder = {
4780     .name           = "wmv1",
4781     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4782     .type           = AVMEDIA_TYPE_VIDEO,
4783     .id             = AV_CODEC_ID_WMV1,
4784     .priv_data_size = sizeof(MpegEncContext),
4785     .init           = ff_mpv_encode_init,
4786     .encode2        = ff_mpv_encode_picture,
4787     .close          = ff_mpv_encode_end,
4788     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4789     .priv_class     = &wmv1_class,
4790 };