]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
avcodec: Remove deprecated AVCodecContext.coded_frame
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /*
26  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
27  */
28
29 /**
30  * @file
31  * The simplest mpeg encoder (well, it was the simplest!).
32  */
33
34 #include <stdint.h>
35
36 #include "libavutil/internal.h"
37 #include "libavutil/intmath.h"
38 #include "libavutil/mathematics.h"
39 #include "libavutil/mem_internal.h"
40 #include "libavutil/pixdesc.h"
41 #include "libavutil/opt.h"
42 #include "libavutil/thread.h"
43 #include "avcodec.h"
44 #include "dct.h"
45 #include "idctdsp.h"
46 #include "mpeg12.h"
47 #include "mpegvideo.h"
48 #include "mpegvideodata.h"
49 #include "h261.h"
50 #include "h263.h"
51 #include "h263data.h"
52 #include "mjpegenc_common.h"
53 #include "mathops.h"
54 #include "mpegutils.h"
55 #include "mjpegenc.h"
56 #include "speedhqenc.h"
57 #include "msmpeg4.h"
58 #include "pixblockdsp.h"
59 #include "qpeldsp.h"
60 #include "faandct.h"
61 #include "thread.h"
62 #include "aandcttab.h"
63 #include "flv.h"
64 #include "mpeg4video.h"
65 #include "internal.h"
66 #include "bytestream.h"
67 #include "wmv2.h"
68 #include "rv10.h"
69 #include "packet_internal.h"
70 #include <limits.h>
71 #include "sp5x.h"
72
73 #define QUANT_BIAS_SHIFT 8
74
75 #define QMAT_SHIFT_MMX 16
76 #define QMAT_SHIFT 21
77
78 static int encode_picture(MpegEncContext *s, int picture_number);
79 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
80 static int sse_mb(MpegEncContext *s);
81 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
82 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
83
84 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_DMV * 2 + 1];
85 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
86
87 const AVOption ff_mpv_generic_options[] = {
88     FF_MPV_COMMON_OPTS
89 #if FF_API_MPEGVIDEO_OPTS
90     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
91     FF_MPV_DEPRECATED_A53_CC_OPT
92     FF_MPV_DEPRECATED_MATRIX_OPT
93     FF_MPV_DEPRECATED_BFRAME_OPTS
94 #endif
95     { NULL },
96 };
97
98 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
99                        uint16_t (*qmat16)[2][64],
100                        const uint16_t *quant_matrix,
101                        int bias, int qmin, int qmax, int intra)
102 {
103     FDCTDSPContext *fdsp = &s->fdsp;
104     int qscale;
105     int shift = 0;
106
107     for (qscale = qmin; qscale <= qmax; qscale++) {
108         int i;
109         int qscale2;
110
111         if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
112         else                 qscale2 = qscale << 1;
113
114         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
115 #if CONFIG_FAANDCT
116             fdsp->fdct == ff_faandct            ||
117 #endif /* CONFIG_FAANDCT */
118             fdsp->fdct == ff_jpeg_fdct_islow_10) {
119             for (i = 0; i < 64; i++) {
120                 const int j = s->idsp.idct_permutation[i];
121                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
122                 /* 16 <= qscale * quant_matrix[i] <= 7905
123                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
124                  *             19952 <=              x  <= 249205026
125                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
126                  *           3444240 >= (1 << 36) / (x) >= 275 */
127
128                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
129             }
130         } else if (fdsp->fdct == ff_fdct_ifast) {
131             for (i = 0; i < 64; i++) {
132                 const int j = s->idsp.idct_permutation[i];
133                 int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
134                 /* 16 <= qscale * quant_matrix[i] <= 7905
135                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
136                  *             19952 <=              x  <= 249205026
137                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
138                  *           3444240 >= (1 << 36) / (x) >= 275 */
139
140                 qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
141             }
142         } else {
143             for (i = 0; i < 64; i++) {
144                 const int j = s->idsp.idct_permutation[i];
145                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
146                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
147                  * Assume x = qscale * quant_matrix[i]
148                  * So             16 <=              x  <= 7905
149                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
150                  * so          32768 >= (1 << 19) / (x) >= 67 */
151                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
152                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
153                 //                    (qscale * quant_matrix[i]);
154                 qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
155
156                 if (qmat16[qscale][0][i] == 0 ||
157                     qmat16[qscale][0][i] == 128 * 256)
158                     qmat16[qscale][0][i] = 128 * 256 - 1;
159                 qmat16[qscale][1][i] =
160                     ROUNDED_DIV(bias * (1<<(16 - QUANT_BIAS_SHIFT)),
161                                 qmat16[qscale][0][i]);
162             }
163         }
164
165         for (i = intra; i < 64; i++) {
166             int64_t max = 8191;
167             if (fdsp->fdct == ff_fdct_ifast) {
168                 max = (8191LL * ff_aanscales[i]) >> 14;
169             }
170             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
171                 shift++;
172             }
173         }
174     }
175     if (shift) {
176         av_log(s->avctx, AV_LOG_INFO,
177                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
178                QMAT_SHIFT - shift);
179     }
180 }
181
182 static inline void update_qscale(MpegEncContext *s)
183 {
184     if (s->q_scale_type == 1 && 0) {
185         int i;
186         int bestdiff=INT_MAX;
187         int best = 1;
188
189         for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
190             int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
191             if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
192                 (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
193                 continue;
194             if (diff < bestdiff) {
195                 bestdiff = diff;
196                 best = i;
197             }
198         }
199         s->qscale = best;
200     } else {
201         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
202                     (FF_LAMBDA_SHIFT + 7);
203         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
204     }
205
206     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
207                  FF_LAMBDA_SHIFT;
208 }
209
210 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
211 {
212     int i;
213
214     if (matrix) {
215         put_bits(pb, 1, 1);
216         for (i = 0; i < 64; i++) {
217             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
218         }
219     } else
220         put_bits(pb, 1, 0);
221 }
222
223 /**
224  * init s->current_picture.qscale_table from s->lambda_table
225  */
226 void ff_init_qscale_tab(MpegEncContext *s)
227 {
228     int8_t * const qscale_table = s->current_picture.qscale_table;
229     int i;
230
231     for (i = 0; i < s->mb_num; i++) {
232         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
233         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
234         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
235                                                   s->avctx->qmax);
236     }
237 }
238
239 static void update_duplicate_context_after_me(MpegEncContext *dst,
240                                               MpegEncContext *src)
241 {
242 #define COPY(a) dst->a= src->a
243     COPY(pict_type);
244     COPY(current_picture);
245     COPY(f_code);
246     COPY(b_code);
247     COPY(qscale);
248     COPY(lambda);
249     COPY(lambda2);
250     COPY(picture_in_gop_number);
251     COPY(gop_picture_number);
252     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
253     COPY(progressive_frame);    // FIXME don't set in encode_header
254     COPY(partitioned_frame);    // FIXME don't set in encode_header
255 #undef COPY
256 }
257
258 static void mpv_encode_init_static(void)
259 {
260    for (int i = -16; i < 16; i++)
261         default_fcode_tab[i + MAX_MV] = 1;
262 }
263
264 /**
265  * Set the given MpegEncContext to defaults for encoding.
266  * the changed fields will not depend upon the prior state of the MpegEncContext.
267  */
268 static void mpv_encode_defaults(MpegEncContext *s)
269 {
270     static AVOnce init_static_once = AV_ONCE_INIT;
271
272     ff_mpv_common_defaults(s);
273
274     ff_thread_once(&init_static_once, mpv_encode_init_static);
275
276     s->me.mv_penalty = default_mv_penalty;
277     s->fcode_tab     = default_fcode_tab;
278
279     s->input_picture_number  = 0;
280     s->picture_in_gop_number = 0;
281 }
282
283 av_cold int ff_dct_encode_init(MpegEncContext *s)
284 {
285     if (ARCH_X86)
286         ff_dct_encode_init_x86(s);
287
288     if (CONFIG_H263_ENCODER)
289         ff_h263dsp_init(&s->h263dsp);
290     if (!s->dct_quantize)
291         s->dct_quantize = ff_dct_quantize_c;
292     if (!s->denoise_dct)
293         s->denoise_dct  = denoise_dct_c;
294     s->fast_dct_quantize = s->dct_quantize;
295     if (s->avctx->trellis)
296         s->dct_quantize  = dct_quantize_trellis_c;
297
298     return 0;
299 }
300
301 /* init video encoder */
302 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
303 {
304     MpegEncContext *s = avctx->priv_data;
305     AVCPBProperties *cpb_props;
306     int i, ret;
307
308     mpv_encode_defaults(s);
309
310     switch (avctx->pix_fmt) {
311     case AV_PIX_FMT_YUVJ444P:
312     case AV_PIX_FMT_YUV444P:
313         s->chroma_format = CHROMA_444;
314         break;
315     case AV_PIX_FMT_YUVJ422P:
316     case AV_PIX_FMT_YUV422P:
317         s->chroma_format = CHROMA_422;
318         break;
319     case AV_PIX_FMT_YUVJ420P:
320     case AV_PIX_FMT_YUV420P:
321     default:
322         s->chroma_format = CHROMA_420;
323         break;
324     }
325
326     avctx->bits_per_raw_sample = av_clip(avctx->bits_per_raw_sample, 0, 8);
327
328     s->bit_rate = avctx->bit_rate;
329     s->width    = avctx->width;
330     s->height   = avctx->height;
331     if (avctx->gop_size > 600 &&
332         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
333         av_log(avctx, AV_LOG_WARNING,
334                "keyframe interval too large!, reducing it from %d to %d\n",
335                avctx->gop_size, 600);
336         avctx->gop_size = 600;
337     }
338     s->gop_size     = avctx->gop_size;
339     s->avctx        = avctx;
340     if (avctx->max_b_frames > MAX_B_FRAMES) {
341         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
342                "is %d.\n", MAX_B_FRAMES);
343         avctx->max_b_frames = MAX_B_FRAMES;
344     }
345     s->max_b_frames = avctx->max_b_frames;
346     s->codec_id     = avctx->codec->id;
347     s->strict_std_compliance = avctx->strict_std_compliance;
348     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
349     s->rtp_mode           = !!s->rtp_payload_size;
350     s->intra_dc_precision = avctx->intra_dc_precision;
351
352     // workaround some differences between how applications specify dc precision
353     if (s->intra_dc_precision < 0) {
354         s->intra_dc_precision += 8;
355     } else if (s->intra_dc_precision >= 8)
356         s->intra_dc_precision -= 8;
357
358     if (s->intra_dc_precision < 0) {
359         av_log(avctx, AV_LOG_ERROR,
360                 "intra dc precision must be positive, note some applications use"
361                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
362         return AVERROR(EINVAL);
363     }
364
365     if (avctx->codec_id == AV_CODEC_ID_AMV || (avctx->active_thread_type & FF_THREAD_SLICE))
366         s->huffman = 0;
367
368     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
369         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
370         return AVERROR(EINVAL);
371     }
372     s->user_specified_pts = AV_NOPTS_VALUE;
373
374     if (s->gop_size <= 1) {
375         s->intra_only = 1;
376         s->gop_size   = 12;
377     } else {
378         s->intra_only = 0;
379     }
380
381     /* Fixed QSCALE */
382     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
383
384     s->adaptive_quant = (avctx->lumi_masking ||
385                          avctx->dark_masking ||
386                          avctx->temporal_cplx_masking ||
387                          avctx->spatial_cplx_masking  ||
388                          avctx->p_masking      ||
389                          s->border_masking ||
390                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
391                         !s->fixed_qscale;
392
393     s->loop_filter = !!(avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
394
395     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
396         switch(avctx->codec_id) {
397         case AV_CODEC_ID_MPEG1VIDEO:
398         case AV_CODEC_ID_MPEG2VIDEO:
399             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
400             break;
401         case AV_CODEC_ID_MPEG4:
402         case AV_CODEC_ID_MSMPEG4V1:
403         case AV_CODEC_ID_MSMPEG4V2:
404         case AV_CODEC_ID_MSMPEG4V3:
405             if       (avctx->rc_max_rate >= 15000000) {
406                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
407             } else if(avctx->rc_max_rate >=  2000000) {
408                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
409             } else if(avctx->rc_max_rate >=   384000) {
410                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
411             } else
412                 avctx->rc_buffer_size = 40;
413             avctx->rc_buffer_size *= 16384;
414             break;
415         }
416         if (avctx->rc_buffer_size) {
417             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
418         }
419     }
420
421     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
422         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
423         return AVERROR(EINVAL);
424     }
425
426     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
427         av_log(avctx, AV_LOG_INFO,
428                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
429     }
430
431     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
432         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
433         return AVERROR(EINVAL);
434     }
435
436     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
437         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
438         return AVERROR(EINVAL);
439     }
440
441     if (avctx->rc_max_rate &&
442         avctx->rc_max_rate == avctx->bit_rate &&
443         avctx->rc_max_rate != avctx->rc_min_rate) {
444         av_log(avctx, AV_LOG_INFO,
445                "impossible bitrate constraints, this will fail\n");
446     }
447
448     if (avctx->rc_buffer_size &&
449         avctx->bit_rate * (int64_t)avctx->time_base.num >
450             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
451         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
452         return AVERROR(EINVAL);
453     }
454
455     if (!s->fixed_qscale &&
456         avctx->bit_rate * av_q2d(avctx->time_base) >
457             avctx->bit_rate_tolerance) {
458         av_log(avctx, AV_LOG_WARNING,
459                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
460         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
461     }
462
463     if (avctx->rc_max_rate &&
464         avctx->rc_min_rate == avctx->rc_max_rate &&
465         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
466          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
467         90000LL * (avctx->rc_buffer_size - 1) >
468             avctx->rc_max_rate * 0xFFFFLL) {
469         av_log(avctx, AV_LOG_INFO,
470                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
471                "specified vbv buffer is too large for the given bitrate!\n");
472     }
473
474     if ((avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
475         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
476         s->codec_id != AV_CODEC_ID_FLV1) {
477         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
478         return AVERROR(EINVAL);
479     }
480
481     if (s->obmc && avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
482         av_log(avctx, AV_LOG_ERROR,
483                "OBMC is only supported with simple mb decision\n");
484         return AVERROR(EINVAL);
485     }
486
487     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
488         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
489         return AVERROR(EINVAL);
490     }
491
492     if (s->max_b_frames                    &&
493         s->codec_id != AV_CODEC_ID_MPEG4      &&
494         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
495         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
496         av_log(avctx, AV_LOG_ERROR, "B-frames not supported by codec\n");
497         return AVERROR(EINVAL);
498     }
499     if (s->max_b_frames < 0) {
500         av_log(avctx, AV_LOG_ERROR,
501                "max b frames must be 0 or positive for mpegvideo based encoders\n");
502         return AVERROR(EINVAL);
503     }
504
505     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
506          s->codec_id == AV_CODEC_ID_H263  ||
507          s->codec_id == AV_CODEC_ID_H263P) &&
508         (avctx->sample_aspect_ratio.num > 255 ||
509          avctx->sample_aspect_ratio.den > 255)) {
510         av_log(avctx, AV_LOG_WARNING,
511                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
512                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
513         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
514                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
515     }
516
517     if ((s->codec_id == AV_CODEC_ID_H263  ||
518          s->codec_id == AV_CODEC_ID_H263P) &&
519         (avctx->width  > 2048 ||
520          avctx->height > 1152 )) {
521         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
522         return AVERROR(EINVAL);
523     }
524     if ((s->codec_id == AV_CODEC_ID_H263  ||
525          s->codec_id == AV_CODEC_ID_H263P ||
526          s->codec_id == AV_CODEC_ID_RV20) &&
527         ((avctx->width &3) ||
528          (avctx->height&3) )) {
529         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
530         return AVERROR(EINVAL);
531     }
532
533     if (s->codec_id == AV_CODEC_ID_RV10 &&
534         (avctx->width &15 ||
535          avctx->height&15 )) {
536         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
537         return AVERROR(EINVAL);
538     }
539
540     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
541          s->codec_id == AV_CODEC_ID_WMV2) &&
542          avctx->width & 1) {
543         av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
544         return AVERROR(EINVAL);
545     }
546
547     if ((avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
548         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
549         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
550         return AVERROR(EINVAL);
551     }
552
553     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
554         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
555         return AVERROR(EINVAL);
556     }
557
558     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
559         avctx->mb_decision != FF_MB_DECISION_RD) {
560         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
561         return AVERROR(EINVAL);
562     }
563
564     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
565             (s->codec_id == AV_CODEC_ID_AMV ||
566              s->codec_id == AV_CODEC_ID_MJPEG)) {
567         // Used to produce garbage with MJPEG.
568         av_log(avctx, AV_LOG_ERROR,
569                "QP RD is no longer compatible with MJPEG or AMV\n");
570         return AVERROR(EINVAL);
571     }
572
573     if (s->scenechange_threshold < 1000000000 &&
574         (avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
575         av_log(avctx, AV_LOG_ERROR,
576                "closed gop with scene change detection are not supported yet, "
577                "set threshold to 1000000000\n");
578         return AVERROR_PATCHWELCOME;
579     }
580
581     if (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
582         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
583             s->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
584             av_log(avctx, AV_LOG_ERROR,
585                    "low delay forcing is only available for mpeg2, "
586                    "set strict_std_compliance to 'unofficial' or lower in order to allow it\n");
587             return AVERROR(EINVAL);
588         }
589         if (s->max_b_frames != 0) {
590             av_log(avctx, AV_LOG_ERROR,
591                    "B-frames cannot be used with low delay\n");
592             return AVERROR(EINVAL);
593         }
594     }
595
596     if (s->q_scale_type == 1) {
597         if (avctx->qmax > 28) {
598             av_log(avctx, AV_LOG_ERROR,
599                    "non linear quant only supports qmax <= 28 currently\n");
600             return AVERROR_PATCHWELCOME;
601         }
602     }
603
604     if (avctx->slices > 1 &&
605         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
606         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
607         return AVERROR(EINVAL);
608     }
609
610     if (avctx->thread_count > 1         &&
611         s->codec_id != AV_CODEC_ID_MPEG4      &&
612         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
613         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
614         s->codec_id != AV_CODEC_ID_MJPEG      &&
615         (s->codec_id != AV_CODEC_ID_H263P)) {
616         av_log(avctx, AV_LOG_ERROR,
617                "multi threaded encoding not supported by codec\n");
618         return AVERROR_PATCHWELCOME;
619     }
620
621     if (avctx->thread_count < 1) {
622         av_log(avctx, AV_LOG_ERROR,
623                "automatic thread number detection not supported by codec, "
624                "patch welcome\n");
625         return AVERROR_PATCHWELCOME;
626     }
627
628     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
629         av_log(avctx, AV_LOG_INFO,
630                "notice: b_frame_strategy only affects the first pass\n");
631         s->b_frame_strategy = 0;
632     }
633
634     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
635     if (i > 1) {
636         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
637         avctx->time_base.den /= i;
638         avctx->time_base.num /= i;
639         //return -1;
640     }
641
642     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id == AV_CODEC_ID_AMV || s->codec_id == AV_CODEC_ID_SPEEDHQ) {
643         // (a + x * 3 / 8) / x
644         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
645         s->inter_quant_bias = 0;
646     } else {
647         s->intra_quant_bias = 0;
648         // (a - x / 4) / x
649         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
650     }
651
652     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
653         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
654         return AVERROR(EINVAL);
655     }
656
657     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
658
659     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
660         avctx->time_base.den > (1 << 16) - 1) {
661         av_log(avctx, AV_LOG_ERROR,
662                "timebase %d/%d not supported by MPEG 4 standard, "
663                "the maximum admitted value for the timebase denominator "
664                "is %d\n", avctx->time_base.num, avctx->time_base.den,
665                (1 << 16) - 1);
666         return AVERROR(EINVAL);
667     }
668     s->time_increment_bits = av_log2(avctx->time_base.den - 1) + 1;
669
670     switch (avctx->codec->id) {
671     case AV_CODEC_ID_MPEG1VIDEO:
672         s->out_format = FMT_MPEG1;
673         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
674         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
675         break;
676     case AV_CODEC_ID_MPEG2VIDEO:
677         s->out_format = FMT_MPEG1;
678         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
679         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
680         s->rtp_mode   = 1;
681         break;
682 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
683     case AV_CODEC_ID_MJPEG:
684     case AV_CODEC_ID_AMV:
685         s->out_format = FMT_MJPEG;
686         s->intra_only = 1; /* force intra only for jpeg */
687         if ((ret = ff_mjpeg_encode_init(s)) < 0)
688             return ret;
689         avctx->delay = 0;
690         s->low_delay = 1;
691         break;
692 #endif
693     case AV_CODEC_ID_SPEEDHQ:
694         s->out_format = FMT_SPEEDHQ;
695         s->intra_only = 1; /* force intra only for SHQ */
696         if (!CONFIG_SPEEDHQ_ENCODER)
697             return AVERROR_ENCODER_NOT_FOUND;
698         if ((ret = ff_speedhq_encode_init(s)) < 0)
699             return ret;
700         avctx->delay = 0;
701         s->low_delay = 1;
702         break;
703     case AV_CODEC_ID_H261:
704         if (!CONFIG_H261_ENCODER)
705             return AVERROR_ENCODER_NOT_FOUND;
706         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
707             av_log(avctx, AV_LOG_ERROR,
708                    "The specified picture size of %dx%d is not valid for the "
709                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
710                     s->width, s->height);
711             return AVERROR(EINVAL);
712         }
713         s->out_format = FMT_H261;
714         avctx->delay  = 0;
715         s->low_delay  = 1;
716         s->rtp_mode   = 0; /* Sliced encoding not supported */
717         break;
718     case AV_CODEC_ID_H263:
719         if (!CONFIG_H263_ENCODER)
720             return AVERROR_ENCODER_NOT_FOUND;
721         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
722                              s->width, s->height) == 8) {
723             av_log(avctx, AV_LOG_ERROR,
724                    "The specified picture size of %dx%d is not valid for "
725                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
726                    "352x288, 704x576, and 1408x1152. "
727                    "Try H.263+.\n", s->width, s->height);
728             return AVERROR(EINVAL);
729         }
730         s->out_format = FMT_H263;
731         avctx->delay  = 0;
732         s->low_delay  = 1;
733         break;
734     case AV_CODEC_ID_H263P:
735         s->out_format = FMT_H263;
736         s->h263_plus  = 1;
737         /* Fx */
738         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
739         s->modified_quant  = s->h263_aic;
740         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
741         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
742
743         /* /Fx */
744         /* These are just to be sure */
745         avctx->delay = 0;
746         s->low_delay = 1;
747         break;
748     case AV_CODEC_ID_FLV1:
749         s->out_format      = FMT_H263;
750         s->h263_flv        = 2; /* format = 1; 11-bit codes */
751         s->unrestricted_mv = 1;
752         s->rtp_mode  = 0; /* don't allow GOB */
753         avctx->delay = 0;
754         s->low_delay = 1;
755         break;
756     case AV_CODEC_ID_RV10:
757         s->out_format = FMT_H263;
758         avctx->delay  = 0;
759         s->low_delay  = 1;
760         break;
761     case AV_CODEC_ID_RV20:
762         s->out_format      = FMT_H263;
763         avctx->delay       = 0;
764         s->low_delay       = 1;
765         s->modified_quant  = 1;
766         s->h263_aic        = 1;
767         s->h263_plus       = 1;
768         s->loop_filter     = 1;
769         s->unrestricted_mv = 0;
770         break;
771     case AV_CODEC_ID_MPEG4:
772         s->out_format      = FMT_H263;
773         s->h263_pred       = 1;
774         s->unrestricted_mv = 1;
775         s->low_delay       = s->max_b_frames ? 0 : 1;
776         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
777         break;
778     case AV_CODEC_ID_MSMPEG4V2:
779         s->out_format      = FMT_H263;
780         s->h263_pred       = 1;
781         s->unrestricted_mv = 1;
782         s->msmpeg4_version = 2;
783         avctx->delay       = 0;
784         s->low_delay       = 1;
785         break;
786     case AV_CODEC_ID_MSMPEG4V3:
787         s->out_format        = FMT_H263;
788         s->h263_pred         = 1;
789         s->unrestricted_mv   = 1;
790         s->msmpeg4_version   = 3;
791         s->flipflop_rounding = 1;
792         avctx->delay         = 0;
793         s->low_delay         = 1;
794         break;
795     case AV_CODEC_ID_WMV1:
796         s->out_format        = FMT_H263;
797         s->h263_pred         = 1;
798         s->unrestricted_mv   = 1;
799         s->msmpeg4_version   = 4;
800         s->flipflop_rounding = 1;
801         avctx->delay         = 0;
802         s->low_delay         = 1;
803         break;
804     case AV_CODEC_ID_WMV2:
805         s->out_format        = FMT_H263;
806         s->h263_pred         = 1;
807         s->unrestricted_mv   = 1;
808         s->msmpeg4_version   = 5;
809         s->flipflop_rounding = 1;
810         avctx->delay         = 0;
811         s->low_delay         = 1;
812         break;
813     default:
814         return AVERROR(EINVAL);
815     }
816
817     avctx->has_b_frames = !s->low_delay;
818
819     s->encoding = 1;
820
821     s->progressive_frame    =
822     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
823                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
824                                 s->alternate_scan);
825
826     /* init */
827     ff_mpv_idct_init(s);
828     if ((ret = ff_mpv_common_init(s)) < 0)
829         return ret;
830
831     ff_fdctdsp_init(&s->fdsp, avctx);
832     ff_me_cmp_init(&s->mecc, avctx);
833     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
834     ff_pixblockdsp_init(&s->pdsp, avctx);
835     ff_qpeldsp_init(&s->qdsp);
836
837     if (s->msmpeg4_version) {
838         int ac_stats_size = 2 * 2 * (MAX_LEVEL + 1) *  (MAX_RUN + 1) * 2 * sizeof(int);
839         if (!(s->ac_stats = av_mallocz(ac_stats_size)))
840             return AVERROR(ENOMEM);
841     }
842
843     if (!(avctx->stats_out = av_mallocz(256))               ||
844         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix,          32) ||
845         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix,   32) ||
846         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix,          32) ||
847         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix16,        32) ||
848         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix16, 32) ||
849         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix16,        32) ||
850         !FF_ALLOCZ_TYPED_ARRAY(s->input_picture,           MAX_PICTURE_COUNT) ||
851         !FF_ALLOCZ_TYPED_ARRAY(s->reordered_input_picture, MAX_PICTURE_COUNT))
852         return AVERROR(ENOMEM);
853
854     if (s->noise_reduction) {
855         if (!FF_ALLOCZ_TYPED_ARRAY(s->dct_offset, 2))
856             return AVERROR(ENOMEM);
857     }
858
859     ff_dct_encode_init(s);
860
861     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
862         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
863
864     if (s->slice_context_count > 1) {
865         s->rtp_mode = 1;
866
867         if (avctx->codec_id == AV_CODEC_ID_H263P)
868             s->h263_slice_structured = 1;
869     }
870
871     s->quant_precision = 5;
872
873     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      avctx->ildct_cmp);
874     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
875
876     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
877         ff_h261_encode_init(s);
878     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
879         ff_h263_encode_init(s);
880     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
881         ff_msmpeg4_encode_init(s);
882     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
883         && s->out_format == FMT_MPEG1)
884         ff_mpeg1_encode_init(s);
885
886     /* init q matrix */
887     for (i = 0; i < 64; i++) {
888         int j = s->idsp.idct_permutation[i];
889         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
890             s->mpeg_quant) {
891             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
892             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
893         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
894             s->intra_matrix[j] =
895             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
896         } else if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
897             s->intra_matrix[j] =
898             s->inter_matrix[j] = ff_mpeg1_default_intra_matrix[i];
899         } else {
900             /* MPEG-1/2 */
901             s->chroma_intra_matrix[j] =
902             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
903             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
904         }
905         if (avctx->intra_matrix)
906             s->intra_matrix[j] = avctx->intra_matrix[i];
907         if (avctx->inter_matrix)
908             s->inter_matrix[j] = avctx->inter_matrix[i];
909     }
910
911     /* precompute matrix */
912     /* for mjpeg, we do include qscale in the matrix */
913     if (s->out_format != FMT_MJPEG) {
914         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
915                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
916                           31, 1);
917         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
918                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
919                           31, 0);
920     }
921
922     if ((ret = ff_rate_control_init(s)) < 0)
923         return ret;
924
925     if (s->b_frame_strategy == 2) {
926         for (i = 0; i < s->max_b_frames + 2; i++) {
927             s->tmp_frames[i] = av_frame_alloc();
928             if (!s->tmp_frames[i])
929                 return AVERROR(ENOMEM);
930
931             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
932             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
933             s->tmp_frames[i]->height = s->height >> s->brd_scale;
934
935             ret = av_frame_get_buffer(s->tmp_frames[i], 0);
936             if (ret < 0)
937                 return ret;
938         }
939     }
940
941     cpb_props = ff_add_cpb_side_data(avctx);
942     if (!cpb_props)
943         return AVERROR(ENOMEM);
944     cpb_props->max_bitrate = avctx->rc_max_rate;
945     cpb_props->min_bitrate = avctx->rc_min_rate;
946     cpb_props->avg_bitrate = avctx->bit_rate;
947     cpb_props->buffer_size = avctx->rc_buffer_size;
948
949     return 0;
950 }
951
952 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
953 {
954     MpegEncContext *s = avctx->priv_data;
955     int i;
956
957     ff_rate_control_uninit(s);
958
959     ff_mpv_common_end(s);
960     if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
961         s->out_format == FMT_MJPEG)
962         ff_mjpeg_encode_close(s);
963
964     av_freep(&avctx->extradata);
965
966     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
967         av_frame_free(&s->tmp_frames[i]);
968
969     ff_free_picture_tables(&s->new_picture);
970     ff_mpeg_unref_picture(avctx, &s->new_picture);
971
972     av_freep(&avctx->stats_out);
973     av_freep(&s->ac_stats);
974
975     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
976     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
977     s->q_chroma_intra_matrix=   NULL;
978     s->q_chroma_intra_matrix16= NULL;
979     av_freep(&s->q_intra_matrix);
980     av_freep(&s->q_inter_matrix);
981     av_freep(&s->q_intra_matrix16);
982     av_freep(&s->q_inter_matrix16);
983     av_freep(&s->input_picture);
984     av_freep(&s->reordered_input_picture);
985     av_freep(&s->dct_offset);
986
987     return 0;
988 }
989
990 static int get_sae(uint8_t *src, int ref, int stride)
991 {
992     int x,y;
993     int acc = 0;
994
995     for (y = 0; y < 16; y++) {
996         for (x = 0; x < 16; x++) {
997             acc += FFABS(src[x + y * stride] - ref);
998         }
999     }
1000
1001     return acc;
1002 }
1003
1004 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1005                            uint8_t *ref, int stride)
1006 {
1007     int x, y, w, h;
1008     int acc = 0;
1009
1010     w = s->width  & ~15;
1011     h = s->height & ~15;
1012
1013     for (y = 0; y < h; y += 16) {
1014         for (x = 0; x < w; x += 16) {
1015             int offset = x + y * stride;
1016             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1017                                       stride, 16);
1018             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1019             int sae  = get_sae(src + offset, mean, stride);
1020
1021             acc += sae + 500 < sad;
1022         }
1023     }
1024     return acc;
1025 }
1026
1027 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1028 {
1029     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1030                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1031                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1032                             &s->linesize, &s->uvlinesize);
1033 }
1034
1035 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1036 {
1037     Picture *pic = NULL;
1038     int64_t pts;
1039     int i, display_picture_number = 0, ret;
1040     int encoding_delay = s->max_b_frames ? s->max_b_frames
1041                                          : (s->low_delay ? 0 : 1);
1042     int flush_offset = 1;
1043     int direct = 1;
1044
1045     if (pic_arg) {
1046         pts = pic_arg->pts;
1047         display_picture_number = s->input_picture_number++;
1048
1049         if (pts != AV_NOPTS_VALUE) {
1050             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1051                 int64_t last = s->user_specified_pts;
1052
1053                 if (pts <= last) {
1054                     av_log(s->avctx, AV_LOG_ERROR,
1055                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1056                            pts, last);
1057                     return AVERROR(EINVAL);
1058                 }
1059
1060                 if (!s->low_delay && display_picture_number == 1)
1061                     s->dts_delta = pts - last;
1062             }
1063             s->user_specified_pts = pts;
1064         } else {
1065             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1066                 s->user_specified_pts =
1067                 pts = s->user_specified_pts + 1;
1068                 av_log(s->avctx, AV_LOG_INFO,
1069                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1070                        pts);
1071             } else {
1072                 pts = display_picture_number;
1073             }
1074         }
1075
1076         if (!pic_arg->buf[0] ||
1077             pic_arg->linesize[0] != s->linesize ||
1078             pic_arg->linesize[1] != s->uvlinesize ||
1079             pic_arg->linesize[2] != s->uvlinesize)
1080             direct = 0;
1081         if ((s->width & 15) || (s->height & 15))
1082             direct = 0;
1083         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1084             direct = 0;
1085         if (s->linesize & (STRIDE_ALIGN-1))
1086             direct = 0;
1087
1088         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1089                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1090
1091         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1092         if (i < 0)
1093             return i;
1094
1095         pic = &s->picture[i];
1096         pic->reference = 3;
1097
1098         if (direct) {
1099             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1100                 return ret;
1101         }
1102         ret = alloc_picture(s, pic, direct);
1103         if (ret < 0)
1104             return ret;
1105
1106         if (!direct) {
1107             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1108                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1109                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1110                 // empty
1111             } else {
1112                 int h_chroma_shift, v_chroma_shift;
1113                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1114                                                  &h_chroma_shift,
1115                                                  &v_chroma_shift);
1116
1117                 for (i = 0; i < 3; i++) {
1118                     int src_stride = pic_arg->linesize[i];
1119                     int dst_stride = i ? s->uvlinesize : s->linesize;
1120                     int h_shift = i ? h_chroma_shift : 0;
1121                     int v_shift = i ? v_chroma_shift : 0;
1122                     int w = s->width  >> h_shift;
1123                     int h = s->height >> v_shift;
1124                     uint8_t *src = pic_arg->data[i];
1125                     uint8_t *dst = pic->f->data[i];
1126                     int vpad = 16;
1127
1128                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1129                         && !s->progressive_sequence
1130                         && FFALIGN(s->height, 32) - s->height > 16)
1131                         vpad = 32;
1132
1133                     if (!s->avctx->rc_buffer_size)
1134                         dst += INPLACE_OFFSET;
1135
1136                     if (src_stride == dst_stride)
1137                         memcpy(dst, src, src_stride * h);
1138                     else {
1139                         int h2 = h;
1140                         uint8_t *dst2 = dst;
1141                         while (h2--) {
1142                             memcpy(dst2, src, w);
1143                             dst2 += dst_stride;
1144                             src += src_stride;
1145                         }
1146                     }
1147                     if ((s->width & 15) || (s->height & (vpad-1))) {
1148                         s->mpvencdsp.draw_edges(dst, dst_stride,
1149                                                 w, h,
1150                                                 16 >> h_shift,
1151                                                 vpad >> v_shift,
1152                                                 EDGE_BOTTOM);
1153                     }
1154                 }
1155                 emms_c();
1156             }
1157         }
1158         ret = av_frame_copy_props(pic->f, pic_arg);
1159         if (ret < 0)
1160             return ret;
1161
1162         pic->f->display_picture_number = display_picture_number;
1163         pic->f->pts = pts; // we set this here to avoid modifying pic_arg
1164     } else {
1165         /* Flushing: When we have not received enough input frames,
1166          * ensure s->input_picture[0] contains the first picture */
1167         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1168             if (s->input_picture[flush_offset])
1169                 break;
1170
1171         if (flush_offset <= 1)
1172             flush_offset = 1;
1173         else
1174             encoding_delay = encoding_delay - flush_offset + 1;
1175     }
1176
1177     /* shift buffer entries */
1178     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1179         s->input_picture[i - flush_offset] = s->input_picture[i];
1180
1181     s->input_picture[encoding_delay] = (Picture*) pic;
1182
1183     return 0;
1184 }
1185
1186 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1187 {
1188     int x, y, plane;
1189     int score = 0;
1190     int64_t score64 = 0;
1191
1192     for (plane = 0; plane < 3; plane++) {
1193         const int stride = p->f->linesize[plane];
1194         const int bw = plane ? 1 : 2;
1195         for (y = 0; y < s->mb_height * bw; y++) {
1196             for (x = 0; x < s->mb_width * bw; x++) {
1197                 int off = p->shared ? 0 : 16;
1198                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1199                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1200                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1201
1202                 switch (FFABS(s->frame_skip_exp)) {
1203                 case 0: score    =  FFMAX(score, v);          break;
1204                 case 1: score   += FFABS(v);                  break;
1205                 case 2: score64 += v * (int64_t)v;                       break;
1206                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1207                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1208                 }
1209             }
1210         }
1211     }
1212     emms_c();
1213
1214     if (score)
1215         score64 = score;
1216     if (s->frame_skip_exp < 0)
1217         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1218                       -1.0/s->frame_skip_exp);
1219
1220     if (score64 < s->frame_skip_threshold)
1221         return 1;
1222     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1223         return 1;
1224     return 0;
1225 }
1226
1227 static int encode_frame(AVCodecContext *c, AVFrame *frame, AVPacket *pkt)
1228 {
1229     int ret;
1230     int size = 0;
1231
1232     ret = avcodec_send_frame(c, frame);
1233     if (ret < 0)
1234         return ret;
1235
1236     do {
1237         ret = avcodec_receive_packet(c, pkt);
1238         if (ret >= 0) {
1239             size += pkt->size;
1240             av_packet_unref(pkt);
1241         } else if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
1242             return ret;
1243     } while (ret >= 0);
1244
1245     return size;
1246 }
1247
1248 static int estimate_best_b_count(MpegEncContext *s)
1249 {
1250     const AVCodec *codec = avcodec_find_encoder(s->avctx->codec_id);
1251     AVPacket *pkt;
1252     const int scale = s->brd_scale;
1253     int width  = s->width  >> scale;
1254     int height = s->height >> scale;
1255     int i, j, out_size, p_lambda, b_lambda, lambda2;
1256     int64_t best_rd  = INT64_MAX;
1257     int best_b_count = -1;
1258     int ret = 0;
1259
1260     av_assert0(scale >= 0 && scale <= 3);
1261
1262     pkt = av_packet_alloc();
1263     if (!pkt)
1264         return AVERROR(ENOMEM);
1265
1266     //emms_c();
1267     //s->next_picture_ptr->quality;
1268     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1269     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1270     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1271     if (!b_lambda) // FIXME we should do this somewhere else
1272         b_lambda = p_lambda;
1273     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1274                FF_LAMBDA_SHIFT;
1275
1276     for (i = 0; i < s->max_b_frames + 2; i++) {
1277         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1278                                                 s->next_picture_ptr;
1279         uint8_t *data[4];
1280
1281         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1282             pre_input = *pre_input_ptr;
1283             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1284
1285             if (!pre_input.shared && i) {
1286                 data[0] += INPLACE_OFFSET;
1287                 data[1] += INPLACE_OFFSET;
1288                 data[2] += INPLACE_OFFSET;
1289             }
1290
1291             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1292                                        s->tmp_frames[i]->linesize[0],
1293                                        data[0],
1294                                        pre_input.f->linesize[0],
1295                                        width, height);
1296             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1297                                        s->tmp_frames[i]->linesize[1],
1298                                        data[1],
1299                                        pre_input.f->linesize[1],
1300                                        width >> 1, height >> 1);
1301             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1302                                        s->tmp_frames[i]->linesize[2],
1303                                        data[2],
1304                                        pre_input.f->linesize[2],
1305                                        width >> 1, height >> 1);
1306         }
1307     }
1308
1309     for (j = 0; j < s->max_b_frames + 1; j++) {
1310         AVCodecContext *c;
1311         int64_t rd = 0;
1312
1313         if (!s->input_picture[j])
1314             break;
1315
1316         c = avcodec_alloc_context3(NULL);
1317         if (!c) {
1318             ret = AVERROR(ENOMEM);
1319             goto fail;
1320         }
1321
1322         c->width        = width;
1323         c->height       = height;
1324         c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1325         c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1326         c->mb_decision  = s->avctx->mb_decision;
1327         c->me_cmp       = s->avctx->me_cmp;
1328         c->mb_cmp       = s->avctx->mb_cmp;
1329         c->me_sub_cmp   = s->avctx->me_sub_cmp;
1330         c->pix_fmt      = AV_PIX_FMT_YUV420P;
1331         c->time_base    = s->avctx->time_base;
1332         c->max_b_frames = s->max_b_frames;
1333
1334         ret = avcodec_open2(c, codec, NULL);
1335         if (ret < 0)
1336             goto fail;
1337
1338
1339         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1340         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1341
1342         out_size = encode_frame(c, s->tmp_frames[0], pkt);
1343         if (out_size < 0) {
1344             ret = out_size;
1345             goto fail;
1346         }
1347
1348         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1349
1350         for (i = 0; i < s->max_b_frames + 1; i++) {
1351             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1352
1353             s->tmp_frames[i + 1]->pict_type = is_p ?
1354                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1355             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1356
1357             out_size = encode_frame(c, s->tmp_frames[i + 1], pkt);
1358             if (out_size < 0) {
1359                 ret = out_size;
1360                 goto fail;
1361             }
1362
1363             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1364         }
1365
1366         /* get the delayed frames */
1367         out_size = encode_frame(c, NULL, pkt);
1368         if (out_size < 0) {
1369             ret = out_size;
1370             goto fail;
1371         }
1372         rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1373
1374         rd += c->error[0] + c->error[1] + c->error[2];
1375
1376         if (rd < best_rd) {
1377             best_rd = rd;
1378             best_b_count = j;
1379         }
1380
1381 fail:
1382         avcodec_free_context(&c);
1383         av_packet_unref(pkt);
1384         if (ret < 0) {
1385             best_b_count = ret;
1386             break;
1387         }
1388     }
1389
1390     av_packet_free(&pkt);
1391
1392     return best_b_count;
1393 }
1394
1395 static int select_input_picture(MpegEncContext *s)
1396 {
1397     int i, ret;
1398
1399     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1400         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1401     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1402
1403     /* set next picture type & ordering */
1404     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1405         if (s->frame_skip_threshold || s->frame_skip_factor) {
1406             if (s->picture_in_gop_number < s->gop_size &&
1407                 s->next_picture_ptr &&
1408                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1409                 // FIXME check that the gop check above is +-1 correct
1410                 av_frame_unref(s->input_picture[0]->f);
1411
1412                 ff_vbv_update(s, 0);
1413
1414                 goto no_output_pic;
1415             }
1416         }
1417
1418         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1419             !s->next_picture_ptr || s->intra_only) {
1420             s->reordered_input_picture[0] = s->input_picture[0];
1421             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1422             s->reordered_input_picture[0]->f->coded_picture_number =
1423                 s->coded_picture_number++;
1424         } else {
1425             int b_frames = 0;
1426
1427             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1428                 for (i = 0; i < s->max_b_frames + 1; i++) {
1429                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1430
1431                     if (pict_num >= s->rc_context.num_entries)
1432                         break;
1433                     if (!s->input_picture[i]) {
1434                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1435                         break;
1436                     }
1437
1438                     s->input_picture[i]->f->pict_type =
1439                         s->rc_context.entry[pict_num].new_pict_type;
1440                 }
1441             }
1442
1443             if (s->b_frame_strategy == 0) {
1444                 b_frames = s->max_b_frames;
1445                 while (b_frames && !s->input_picture[b_frames])
1446                     b_frames--;
1447             } else if (s->b_frame_strategy == 1) {
1448                 for (i = 1; i < s->max_b_frames + 1; i++) {
1449                     if (s->input_picture[i] &&
1450                         s->input_picture[i]->b_frame_score == 0) {
1451                         s->input_picture[i]->b_frame_score =
1452                             get_intra_count(s,
1453                                             s->input_picture[i    ]->f->data[0],
1454                                             s->input_picture[i - 1]->f->data[0],
1455                                             s->linesize) + 1;
1456                     }
1457                 }
1458                 for (i = 0; i < s->max_b_frames + 1; i++) {
1459                     if (!s->input_picture[i] ||
1460                         s->input_picture[i]->b_frame_score - 1 >
1461                             s->mb_num / s->b_sensitivity)
1462                         break;
1463                 }
1464
1465                 b_frames = FFMAX(0, i - 1);
1466
1467                 /* reset scores */
1468                 for (i = 0; i < b_frames + 1; i++) {
1469                     s->input_picture[i]->b_frame_score = 0;
1470                 }
1471             } else if (s->b_frame_strategy == 2) {
1472                 b_frames = estimate_best_b_count(s);
1473                 if (b_frames < 0)
1474                     return b_frames;
1475             }
1476
1477             emms_c();
1478
1479             for (i = b_frames - 1; i >= 0; i--) {
1480                 int type = s->input_picture[i]->f->pict_type;
1481                 if (type && type != AV_PICTURE_TYPE_B)
1482                     b_frames = i;
1483             }
1484             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1485                 b_frames == s->max_b_frames) {
1486                 av_log(s->avctx, AV_LOG_ERROR,
1487                        "warning, too many B-frames in a row\n");
1488             }
1489
1490             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1491                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1492                     s->gop_size > s->picture_in_gop_number) {
1493                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1494                 } else {
1495                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1496                         b_frames = 0;
1497                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1498                 }
1499             }
1500
1501             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1502                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1503                 b_frames--;
1504
1505             s->reordered_input_picture[0] = s->input_picture[b_frames];
1506             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1507                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1508             s->reordered_input_picture[0]->f->coded_picture_number =
1509                 s->coded_picture_number++;
1510             for (i = 0; i < b_frames; i++) {
1511                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1512                 s->reordered_input_picture[i + 1]->f->pict_type =
1513                     AV_PICTURE_TYPE_B;
1514                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1515                     s->coded_picture_number++;
1516             }
1517         }
1518     }
1519 no_output_pic:
1520     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1521
1522     if (s->reordered_input_picture[0]) {
1523         s->reordered_input_picture[0]->reference =
1524            s->reordered_input_picture[0]->f->pict_type !=
1525                AV_PICTURE_TYPE_B ? 3 : 0;
1526
1527         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1528             return ret;
1529
1530         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1531             // input is a shared pix, so we can't modify it -> allocate a new
1532             // one & ensure that the shared one is reuseable
1533
1534             Picture *pic;
1535             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1536             if (i < 0)
1537                 return i;
1538             pic = &s->picture[i];
1539
1540             pic->reference = s->reordered_input_picture[0]->reference;
1541             if (alloc_picture(s, pic, 0) < 0) {
1542                 return -1;
1543             }
1544
1545             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1546             if (ret < 0)
1547                 return ret;
1548
1549             /* mark us unused / free shared pic */
1550             av_frame_unref(s->reordered_input_picture[0]->f);
1551             s->reordered_input_picture[0]->shared = 0;
1552
1553             s->current_picture_ptr = pic;
1554         } else {
1555             // input is not a shared pix -> reuse buffer for current_pix
1556             s->current_picture_ptr = s->reordered_input_picture[0];
1557             for (i = 0; i < 4; i++) {
1558                 if (s->new_picture.f->data[i])
1559                     s->new_picture.f->data[i] += INPLACE_OFFSET;
1560             }
1561         }
1562         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1563         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1564                                        s->current_picture_ptr)) < 0)
1565             return ret;
1566
1567         s->picture_number = s->new_picture.f->display_picture_number;
1568     }
1569     return 0;
1570 }
1571
1572 static void frame_end(MpegEncContext *s)
1573 {
1574     if (s->unrestricted_mv &&
1575         s->current_picture.reference &&
1576         !s->intra_only) {
1577         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1578         int hshift = desc->log2_chroma_w;
1579         int vshift = desc->log2_chroma_h;
1580         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1581                                 s->current_picture.f->linesize[0],
1582                                 s->h_edge_pos, s->v_edge_pos,
1583                                 EDGE_WIDTH, EDGE_WIDTH,
1584                                 EDGE_TOP | EDGE_BOTTOM);
1585         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1586                                 s->current_picture.f->linesize[1],
1587                                 s->h_edge_pos >> hshift,
1588                                 s->v_edge_pos >> vshift,
1589                                 EDGE_WIDTH >> hshift,
1590                                 EDGE_WIDTH >> vshift,
1591                                 EDGE_TOP | EDGE_BOTTOM);
1592         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1593                                 s->current_picture.f->linesize[2],
1594                                 s->h_edge_pos >> hshift,
1595                                 s->v_edge_pos >> vshift,
1596                                 EDGE_WIDTH >> hshift,
1597                                 EDGE_WIDTH >> vshift,
1598                                 EDGE_TOP | EDGE_BOTTOM);
1599     }
1600
1601     emms_c();
1602
1603     s->last_pict_type                 = s->pict_type;
1604     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1605     if (s->pict_type!= AV_PICTURE_TYPE_B)
1606         s->last_non_b_pict_type = s->pict_type;
1607
1608 #if FF_API_ERROR_FRAME
1609 FF_DISABLE_DEPRECATION_WARNINGS
1610     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1611            sizeof(s->current_picture.encoding_error));
1612 FF_ENABLE_DEPRECATION_WARNINGS
1613 #endif
1614 }
1615
1616 static void update_noise_reduction(MpegEncContext *s)
1617 {
1618     int intra, i;
1619
1620     for (intra = 0; intra < 2; intra++) {
1621         if (s->dct_count[intra] > (1 << 16)) {
1622             for (i = 0; i < 64; i++) {
1623                 s->dct_error_sum[intra][i] >>= 1;
1624             }
1625             s->dct_count[intra] >>= 1;
1626         }
1627
1628         for (i = 0; i < 64; i++) {
1629             s->dct_offset[intra][i] = (s->noise_reduction *
1630                                        s->dct_count[intra] +
1631                                        s->dct_error_sum[intra][i] / 2) /
1632                                       (s->dct_error_sum[intra][i] + 1);
1633         }
1634     }
1635 }
1636
1637 static int frame_start(MpegEncContext *s)
1638 {
1639     int ret;
1640
1641     /* mark & release old frames */
1642     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1643         s->last_picture_ptr != s->next_picture_ptr &&
1644         s->last_picture_ptr->f->buf[0]) {
1645         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1646     }
1647
1648     s->current_picture_ptr->f->pict_type = s->pict_type;
1649     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1650
1651     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1652     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1653                                    s->current_picture_ptr)) < 0)
1654         return ret;
1655
1656     if (s->pict_type != AV_PICTURE_TYPE_B) {
1657         s->last_picture_ptr = s->next_picture_ptr;
1658         if (!s->droppable)
1659             s->next_picture_ptr = s->current_picture_ptr;
1660     }
1661
1662     if (s->last_picture_ptr) {
1663         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1664         if (s->last_picture_ptr->f->buf[0] &&
1665             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1666                                        s->last_picture_ptr)) < 0)
1667             return ret;
1668     }
1669     if (s->next_picture_ptr) {
1670         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1671         if (s->next_picture_ptr->f->buf[0] &&
1672             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1673                                        s->next_picture_ptr)) < 0)
1674             return ret;
1675     }
1676
1677     if (s->picture_structure!= PICT_FRAME) {
1678         int i;
1679         for (i = 0; i < 4; i++) {
1680             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1681                 s->current_picture.f->data[i] +=
1682                     s->current_picture.f->linesize[i];
1683             }
1684             s->current_picture.f->linesize[i] *= 2;
1685             s->last_picture.f->linesize[i]    *= 2;
1686             s->next_picture.f->linesize[i]    *= 2;
1687         }
1688     }
1689
1690     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1691         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1692         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1693     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1694         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1695         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1696     } else {
1697         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1698         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1699     }
1700
1701     if (s->dct_error_sum) {
1702         av_assert2(s->noise_reduction && s->encoding);
1703         update_noise_reduction(s);
1704     }
1705
1706     return 0;
1707 }
1708
1709 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1710                           const AVFrame *pic_arg, int *got_packet)
1711 {
1712     MpegEncContext *s = avctx->priv_data;
1713     int i, stuffing_count, ret;
1714     int context_count = s->slice_context_count;
1715
1716     s->vbv_ignore_qmax = 0;
1717
1718     s->picture_in_gop_number++;
1719
1720     if (load_input_picture(s, pic_arg) < 0)
1721         return -1;
1722
1723     if (select_input_picture(s) < 0) {
1724         return -1;
1725     }
1726
1727     /* output? */
1728     if (s->new_picture.f->data[0]) {
1729         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1730         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1731                                               :
1732                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1733         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1734             return ret;
1735         if (s->mb_info) {
1736             s->mb_info_ptr = av_packet_new_side_data(pkt,
1737                                  AV_PKT_DATA_H263_MB_INFO,
1738                                  s->mb_width*s->mb_height*12);
1739             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1740         }
1741
1742         for (i = 0; i < context_count; i++) {
1743             int start_y = s->thread_context[i]->start_mb_y;
1744             int   end_y = s->thread_context[i]->  end_mb_y;
1745             int h       = s->mb_height;
1746             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1747             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1748
1749             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1750         }
1751
1752         s->pict_type = s->new_picture.f->pict_type;
1753         //emms_c();
1754         ret = frame_start(s);
1755         if (ret < 0)
1756             return ret;
1757 vbv_retry:
1758         ret = encode_picture(s, s->picture_number);
1759         if (growing_buffer) {
1760             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1761             pkt->data = s->pb.buf;
1762             pkt->size = avctx->internal->byte_buffer_size;
1763         }
1764         if (ret < 0)
1765             return -1;
1766
1767         frame_end(s);
1768
1769        if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) && s->out_format == FMT_MJPEG)
1770             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1771
1772         if (avctx->rc_buffer_size) {
1773             RateControlContext *rcc = &s->rc_context;
1774             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1775             int hq = (avctx->mb_decision == FF_MB_DECISION_RD || avctx->trellis);
1776             int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
1777
1778             if (put_bits_count(&s->pb) > max_size &&
1779                 s->lambda < s->lmax) {
1780                 s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
1781                                        (s->qscale + 1) / s->qscale);
1782                 if (s->adaptive_quant) {
1783                     int i;
1784                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1785                         s->lambda_table[i] =
1786                             FFMAX(s->lambda_table[i] + min_step,
1787                                   s->lambda_table[i] * (s->qscale + 1) /
1788                                   s->qscale);
1789                 }
1790                 s->mb_skipped = 0;        // done in frame_start()
1791                 // done in encode_picture() so we must undo it
1792                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1793                     if (s->flipflop_rounding          ||
1794                         s->codec_id == AV_CODEC_ID_H263P ||
1795                         s->codec_id == AV_CODEC_ID_MPEG4)
1796                         s->no_rounding ^= 1;
1797                 }
1798                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1799                     s->time_base       = s->last_time_base;
1800                     s->last_non_b_time = s->time - s->pp_time;
1801                 }
1802                 for (i = 0; i < context_count; i++) {
1803                     PutBitContext *pb = &s->thread_context[i]->pb;
1804                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1805                 }
1806                 s->vbv_ignore_qmax = 1;
1807                 av_log(avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1808                 goto vbv_retry;
1809             }
1810
1811             av_assert0(avctx->rc_max_rate);
1812         }
1813
1814         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1815             ff_write_pass1_stats(s);
1816
1817         for (i = 0; i < 4; i++) {
1818             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1819             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1820         }
1821         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1822                                        s->current_picture_ptr->encoding_error,
1823                                        (avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1824                                        s->pict_type);
1825
1826         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1827             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1828                                              s->misc_bits + s->i_tex_bits +
1829                                              s->p_tex_bits);
1830         flush_put_bits(&s->pb);
1831         s->frame_bits  = put_bits_count(&s->pb);
1832
1833         stuffing_count = ff_vbv_update(s, s->frame_bits);
1834         s->stuffing_bits = 8*stuffing_count;
1835         if (stuffing_count) {
1836             if (put_bytes_left(&s->pb, 0) < stuffing_count + 50) {
1837                 av_log(avctx, AV_LOG_ERROR, "stuffing too large\n");
1838                 return -1;
1839             }
1840
1841             switch (s->codec_id) {
1842             case AV_CODEC_ID_MPEG1VIDEO:
1843             case AV_CODEC_ID_MPEG2VIDEO:
1844                 while (stuffing_count--) {
1845                     put_bits(&s->pb, 8, 0);
1846                 }
1847             break;
1848             case AV_CODEC_ID_MPEG4:
1849                 put_bits(&s->pb, 16, 0);
1850                 put_bits(&s->pb, 16, 0x1C3);
1851                 stuffing_count -= 4;
1852                 while (stuffing_count--) {
1853                     put_bits(&s->pb, 8, 0xFF);
1854                 }
1855             break;
1856             default:
1857                 av_log(avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1858             }
1859             flush_put_bits(&s->pb);
1860             s->frame_bits  = put_bits_count(&s->pb);
1861         }
1862
1863         /* update MPEG-1/2 vbv_delay for CBR */
1864         if (avctx->rc_max_rate                          &&
1865             avctx->rc_min_rate == avctx->rc_max_rate &&
1866             s->out_format == FMT_MPEG1                     &&
1867             90000LL * (avctx->rc_buffer_size - 1) <=
1868                 avctx->rc_max_rate * 0xFFFFLL) {
1869             AVCPBProperties *props;
1870             size_t props_size;
1871
1872             int vbv_delay, min_delay;
1873             double inbits  = avctx->rc_max_rate *
1874                              av_q2d(avctx->time_base);
1875             int    minbits = s->frame_bits - 8 *
1876                              (s->vbv_delay_ptr - s->pb.buf - 1);
1877             double bits    = s->rc_context.buffer_index + minbits - inbits;
1878
1879             if (bits < 0)
1880                 av_log(avctx, AV_LOG_ERROR,
1881                        "Internal error, negative bits\n");
1882
1883             av_assert1(s->repeat_first_field == 0);
1884
1885             vbv_delay = bits * 90000 / avctx->rc_max_rate;
1886             min_delay = (minbits * 90000LL + avctx->rc_max_rate - 1) /
1887                         avctx->rc_max_rate;
1888
1889             vbv_delay = FFMAX(vbv_delay, min_delay);
1890
1891             av_assert0(vbv_delay < 0xFFFF);
1892
1893             s->vbv_delay_ptr[0] &= 0xF8;
1894             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1895             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1896             s->vbv_delay_ptr[2] &= 0x07;
1897             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1898
1899             props = av_cpb_properties_alloc(&props_size);
1900             if (!props)
1901                 return AVERROR(ENOMEM);
1902             props->vbv_delay = vbv_delay * 300;
1903
1904             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1905                                           (uint8_t*)props, props_size);
1906             if (ret < 0) {
1907                 av_freep(&props);
1908                 return ret;
1909             }
1910         }
1911         s->total_bits     += s->frame_bits;
1912
1913         pkt->pts = s->current_picture.f->pts;
1914         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1915             if (!s->current_picture.f->coded_picture_number)
1916                 pkt->dts = pkt->pts - s->dts_delta;
1917             else
1918                 pkt->dts = s->reordered_pts;
1919             s->reordered_pts = pkt->pts;
1920         } else
1921             pkt->dts = pkt->pts;
1922         if (s->current_picture.f->key_frame)
1923             pkt->flags |= AV_PKT_FLAG_KEY;
1924         if (s->mb_info)
1925             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1926     } else {
1927         s->frame_bits = 0;
1928     }
1929
1930     /* release non-reference frames */
1931     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1932         if (!s->picture[i].reference)
1933             ff_mpeg_unref_picture(avctx, &s->picture[i]);
1934     }
1935
1936     av_assert1((s->frame_bits & 7) == 0);
1937
1938     pkt->size = s->frame_bits / 8;
1939     *got_packet = !!pkt->size;
1940     return 0;
1941 }
1942
1943 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1944                                                 int n, int threshold)
1945 {
1946     static const char tab[64] = {
1947         3, 2, 2, 1, 1, 1, 1, 1,
1948         1, 1, 1, 1, 1, 1, 1, 1,
1949         1, 1, 1, 1, 1, 1, 1, 1,
1950         0, 0, 0, 0, 0, 0, 0, 0,
1951         0, 0, 0, 0, 0, 0, 0, 0,
1952         0, 0, 0, 0, 0, 0, 0, 0,
1953         0, 0, 0, 0, 0, 0, 0, 0,
1954         0, 0, 0, 0, 0, 0, 0, 0
1955     };
1956     int score = 0;
1957     int run = 0;
1958     int i;
1959     int16_t *block = s->block[n];
1960     const int last_index = s->block_last_index[n];
1961     int skip_dc;
1962
1963     if (threshold < 0) {
1964         skip_dc = 0;
1965         threshold = -threshold;
1966     } else
1967         skip_dc = 1;
1968
1969     /* Are all we could set to zero already zero? */
1970     if (last_index <= skip_dc - 1)
1971         return;
1972
1973     for (i = 0; i <= last_index; i++) {
1974         const int j = s->intra_scantable.permutated[i];
1975         const int level = FFABS(block[j]);
1976         if (level == 1) {
1977             if (skip_dc && i == 0)
1978                 continue;
1979             score += tab[run];
1980             run = 0;
1981         } else if (level > 1) {
1982             return;
1983         } else {
1984             run++;
1985         }
1986     }
1987     if (score >= threshold)
1988         return;
1989     for (i = skip_dc; i <= last_index; i++) {
1990         const int j = s->intra_scantable.permutated[i];
1991         block[j] = 0;
1992     }
1993     if (block[0])
1994         s->block_last_index[n] = 0;
1995     else
1996         s->block_last_index[n] = -1;
1997 }
1998
1999 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2000                                int last_index)
2001 {
2002     int i;
2003     const int maxlevel = s->max_qcoeff;
2004     const int minlevel = s->min_qcoeff;
2005     int overflow = 0;
2006
2007     if (s->mb_intra) {
2008         i = 1; // skip clipping of intra dc
2009     } else
2010         i = 0;
2011
2012     for (; i <= last_index; i++) {
2013         const int j = s->intra_scantable.permutated[i];
2014         int level = block[j];
2015
2016         if (level > maxlevel) {
2017             level = maxlevel;
2018             overflow++;
2019         } else if (level < minlevel) {
2020             level = minlevel;
2021             overflow++;
2022         }
2023
2024         block[j] = level;
2025     }
2026
2027     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2028         av_log(s->avctx, AV_LOG_INFO,
2029                "warning, clipping %d dct coefficients to %d..%d\n",
2030                overflow, minlevel, maxlevel);
2031 }
2032
2033 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2034 {
2035     int x, y;
2036     // FIXME optimize
2037     for (y = 0; y < 8; y++) {
2038         for (x = 0; x < 8; x++) {
2039             int x2, y2;
2040             int sum = 0;
2041             int sqr = 0;
2042             int count = 0;
2043
2044             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2045                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2046                     int v = ptr[x2 + y2 * stride];
2047                     sum += v;
2048                     sqr += v * v;
2049                     count++;
2050                 }
2051             }
2052             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2053         }
2054     }
2055 }
2056
2057 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2058                                                 int motion_x, int motion_y,
2059                                                 int mb_block_height,
2060                                                 int mb_block_width,
2061                                                 int mb_block_count)
2062 {
2063     int16_t weight[12][64];
2064     int16_t orig[12][64];
2065     const int mb_x = s->mb_x;
2066     const int mb_y = s->mb_y;
2067     int i;
2068     int skip_dct[12];
2069     int dct_offset = s->linesize * 8; // default for progressive frames
2070     int uv_dct_offset = s->uvlinesize * 8;
2071     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2072     ptrdiff_t wrap_y, wrap_c;
2073
2074     for (i = 0; i < mb_block_count; i++)
2075         skip_dct[i] = s->skipdct;
2076
2077     if (s->adaptive_quant) {
2078         const int last_qp = s->qscale;
2079         const int mb_xy = mb_x + mb_y * s->mb_stride;
2080
2081         s->lambda = s->lambda_table[mb_xy];
2082         update_qscale(s);
2083
2084         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2085             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2086             s->dquant = s->qscale - last_qp;
2087
2088             if (s->out_format == FMT_H263) {
2089                 s->dquant = av_clip(s->dquant, -2, 2);
2090
2091                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2092                     if (!s->mb_intra) {
2093                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2094                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2095                                 s->dquant = 0;
2096                         }
2097                         if (s->mv_type == MV_TYPE_8X8)
2098                             s->dquant = 0;
2099                     }
2100                 }
2101             }
2102         }
2103         ff_set_qscale(s, last_qp + s->dquant);
2104     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2105         ff_set_qscale(s, s->qscale + s->dquant);
2106
2107     wrap_y = s->linesize;
2108     wrap_c = s->uvlinesize;
2109     ptr_y  = s->new_picture.f->data[0] +
2110              (mb_y * 16 * wrap_y)              + mb_x * 16;
2111     ptr_cb = s->new_picture.f->data[1] +
2112              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2113     ptr_cr = s->new_picture.f->data[2] +
2114              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2115
2116     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2117         uint8_t *ebuf = s->sc.edge_emu_buffer + 38 * wrap_y;
2118         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2119         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2120         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2121                                  wrap_y, wrap_y,
2122                                  16, 16, mb_x * 16, mb_y * 16,
2123                                  s->width, s->height);
2124         ptr_y = ebuf;
2125         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2126                                  wrap_c, wrap_c,
2127                                  mb_block_width, mb_block_height,
2128                                  mb_x * mb_block_width, mb_y * mb_block_height,
2129                                  cw, ch);
2130         ptr_cb = ebuf + 16 * wrap_y;
2131         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2132                                  wrap_c, wrap_c,
2133                                  mb_block_width, mb_block_height,
2134                                  mb_x * mb_block_width, mb_y * mb_block_height,
2135                                  cw, ch);
2136         ptr_cr = ebuf + 16 * wrap_y + 16;
2137     }
2138
2139     if (s->mb_intra) {
2140         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2141             int progressive_score, interlaced_score;
2142
2143             s->interlaced_dct = 0;
2144             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2145                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2146                                                      NULL, wrap_y, 8) - 400;
2147
2148             if (progressive_score > 0) {
2149                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2150                                                         NULL, wrap_y * 2, 8) +
2151                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2152                                                         NULL, wrap_y * 2, 8);
2153                 if (progressive_score > interlaced_score) {
2154                     s->interlaced_dct = 1;
2155
2156                     dct_offset = wrap_y;
2157                     uv_dct_offset = wrap_c;
2158                     wrap_y <<= 1;
2159                     if (s->chroma_format == CHROMA_422 ||
2160                         s->chroma_format == CHROMA_444)
2161                         wrap_c <<= 1;
2162                 }
2163             }
2164         }
2165
2166         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2167         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2168         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2169         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2170
2171         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2172             skip_dct[4] = 1;
2173             skip_dct[5] = 1;
2174         } else {
2175             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2176             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2177             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2178                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2179                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2180             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2181                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2182                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2183                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2184                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2185                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2186                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2187             }
2188         }
2189     } else {
2190         op_pixels_func (*op_pix)[4];
2191         qpel_mc_func (*op_qpix)[16];
2192         uint8_t *dest_y, *dest_cb, *dest_cr;
2193
2194         dest_y  = s->dest[0];
2195         dest_cb = s->dest[1];
2196         dest_cr = s->dest[2];
2197
2198         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2199             op_pix  = s->hdsp.put_pixels_tab;
2200             op_qpix = s->qdsp.put_qpel_pixels_tab;
2201         } else {
2202             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2203             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2204         }
2205
2206         if (s->mv_dir & MV_DIR_FORWARD) {
2207             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2208                           s->last_picture.f->data,
2209                           op_pix, op_qpix);
2210             op_pix  = s->hdsp.avg_pixels_tab;
2211             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2212         }
2213         if (s->mv_dir & MV_DIR_BACKWARD) {
2214             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2215                           s->next_picture.f->data,
2216                           op_pix, op_qpix);
2217         }
2218
2219         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2220             int progressive_score, interlaced_score;
2221
2222             s->interlaced_dct = 0;
2223             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2224                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2225                                                      ptr_y + wrap_y * 8,
2226                                                      wrap_y, 8) - 400;
2227
2228             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2229                 progressive_score -= 400;
2230
2231             if (progressive_score > 0) {
2232                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2233                                                         wrap_y * 2, 8) +
2234                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2235                                                         ptr_y + wrap_y,
2236                                                         wrap_y * 2, 8);
2237
2238                 if (progressive_score > interlaced_score) {
2239                     s->interlaced_dct = 1;
2240
2241                     dct_offset = wrap_y;
2242                     uv_dct_offset = wrap_c;
2243                     wrap_y <<= 1;
2244                     if (s->chroma_format == CHROMA_422)
2245                         wrap_c <<= 1;
2246                 }
2247             }
2248         }
2249
2250         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2251         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2252         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2253                             dest_y + dct_offset, wrap_y);
2254         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2255                             dest_y + dct_offset + 8, wrap_y);
2256
2257         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2258             skip_dct[4] = 1;
2259             skip_dct[5] = 1;
2260         } else {
2261             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2262             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2263             if (!s->chroma_y_shift) { /* 422 */
2264                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2265                                     dest_cb + uv_dct_offset, wrap_c);
2266                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2267                                     dest_cr + uv_dct_offset, wrap_c);
2268             }
2269         }
2270         /* pre quantization */
2271         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2272                 2 * s->qscale * s->qscale) {
2273             // FIXME optimize
2274             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2275                 skip_dct[0] = 1;
2276             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2277                 skip_dct[1] = 1;
2278             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2279                                wrap_y, 8) < 20 * s->qscale)
2280                 skip_dct[2] = 1;
2281             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2282                                wrap_y, 8) < 20 * s->qscale)
2283                 skip_dct[3] = 1;
2284             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2285                 skip_dct[4] = 1;
2286             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2287                 skip_dct[5] = 1;
2288             if (!s->chroma_y_shift) { /* 422 */
2289                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2290                                    dest_cb + uv_dct_offset,
2291                                    wrap_c, 8) < 20 * s->qscale)
2292                     skip_dct[6] = 1;
2293                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2294                                    dest_cr + uv_dct_offset,
2295                                    wrap_c, 8) < 20 * s->qscale)
2296                     skip_dct[7] = 1;
2297             }
2298         }
2299     }
2300
2301     if (s->quantizer_noise_shaping) {
2302         if (!skip_dct[0])
2303             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2304         if (!skip_dct[1])
2305             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2306         if (!skip_dct[2])
2307             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2308         if (!skip_dct[3])
2309             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2310         if (!skip_dct[4])
2311             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2312         if (!skip_dct[5])
2313             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2314         if (!s->chroma_y_shift) { /* 422 */
2315             if (!skip_dct[6])
2316                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2317                                   wrap_c);
2318             if (!skip_dct[7])
2319                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2320                                   wrap_c);
2321         }
2322         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2323     }
2324
2325     /* DCT & quantize */
2326     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2327     {
2328         for (i = 0; i < mb_block_count; i++) {
2329             if (!skip_dct[i]) {
2330                 int overflow;
2331                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2332                 // FIXME we could decide to change to quantizer instead of
2333                 // clipping
2334                 // JS: I don't think that would be a good idea it could lower
2335                 //     quality instead of improve it. Just INTRADC clipping
2336                 //     deserves changes in quantizer
2337                 if (overflow)
2338                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2339             } else
2340                 s->block_last_index[i] = -1;
2341         }
2342         if (s->quantizer_noise_shaping) {
2343             for (i = 0; i < mb_block_count; i++) {
2344                 if (!skip_dct[i]) {
2345                     s->block_last_index[i] =
2346                         dct_quantize_refine(s, s->block[i], weight[i],
2347                                             orig[i], i, s->qscale);
2348                 }
2349             }
2350         }
2351
2352         if (s->luma_elim_threshold && !s->mb_intra)
2353             for (i = 0; i < 4; i++)
2354                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2355         if (s->chroma_elim_threshold && !s->mb_intra)
2356             for (i = 4; i < mb_block_count; i++)
2357                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2358
2359         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2360             for (i = 0; i < mb_block_count; i++) {
2361                 if (s->block_last_index[i] == -1)
2362                     s->coded_score[i] = INT_MAX / 256;
2363             }
2364         }
2365     }
2366
2367     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2368         s->block_last_index[4] =
2369         s->block_last_index[5] = 0;
2370         s->block[4][0] =
2371         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2372         if (!s->chroma_y_shift) { /* 422 / 444 */
2373             for (i=6; i<12; i++) {
2374                 s->block_last_index[i] = 0;
2375                 s->block[i][0] = s->block[4][0];
2376             }
2377         }
2378     }
2379
2380     // non c quantize code returns incorrect block_last_index FIXME
2381     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2382         for (i = 0; i < mb_block_count; i++) {
2383             int j;
2384             if (s->block_last_index[i] > 0) {
2385                 for (j = 63; j > 0; j--) {
2386                     if (s->block[i][s->intra_scantable.permutated[j]])
2387                         break;
2388                 }
2389                 s->block_last_index[i] = j;
2390             }
2391         }
2392     }
2393
2394     /* huffman encode */
2395     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2396     case AV_CODEC_ID_MPEG1VIDEO:
2397     case AV_CODEC_ID_MPEG2VIDEO:
2398         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2399             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2400         break;
2401     case AV_CODEC_ID_MPEG4:
2402         if (CONFIG_MPEG4_ENCODER)
2403             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2404         break;
2405     case AV_CODEC_ID_MSMPEG4V2:
2406     case AV_CODEC_ID_MSMPEG4V3:
2407     case AV_CODEC_ID_WMV1:
2408         if (CONFIG_MSMPEG4_ENCODER)
2409             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2410         break;
2411     case AV_CODEC_ID_WMV2:
2412         if (CONFIG_WMV2_ENCODER)
2413             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2414         break;
2415     case AV_CODEC_ID_H261:
2416         if (CONFIG_H261_ENCODER)
2417             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2418         break;
2419     case AV_CODEC_ID_H263:
2420     case AV_CODEC_ID_H263P:
2421     case AV_CODEC_ID_FLV1:
2422     case AV_CODEC_ID_RV10:
2423     case AV_CODEC_ID_RV20:
2424         if (CONFIG_H263_ENCODER)
2425             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2426         break;
2427 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
2428     case AV_CODEC_ID_MJPEG:
2429     case AV_CODEC_ID_AMV:
2430         ff_mjpeg_encode_mb(s, s->block);
2431         break;
2432 #endif
2433     case AV_CODEC_ID_SPEEDHQ:
2434         if (CONFIG_SPEEDHQ_ENCODER)
2435             ff_speedhq_encode_mb(s, s->block);
2436         break;
2437     default:
2438         av_assert1(0);
2439     }
2440 }
2441
2442 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2443 {
2444     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2445     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2446     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2447 }
2448
2449 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2450     int i;
2451
2452     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2453
2454     /* MPEG-1 */
2455     d->mb_skip_run= s->mb_skip_run;
2456     for(i=0; i<3; i++)
2457         d->last_dc[i] = s->last_dc[i];
2458
2459     /* statistics */
2460     d->mv_bits= s->mv_bits;
2461     d->i_tex_bits= s->i_tex_bits;
2462     d->p_tex_bits= s->p_tex_bits;
2463     d->i_count= s->i_count;
2464     d->f_count= s->f_count;
2465     d->b_count= s->b_count;
2466     d->skip_count= s->skip_count;
2467     d->misc_bits= s->misc_bits;
2468     d->last_bits= 0;
2469
2470     d->mb_skipped= 0;
2471     d->qscale= s->qscale;
2472     d->dquant= s->dquant;
2473
2474     d->esc3_level_length= s->esc3_level_length;
2475 }
2476
2477 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2478     int i;
2479
2480     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2481     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2482
2483     /* MPEG-1 */
2484     d->mb_skip_run= s->mb_skip_run;
2485     for(i=0; i<3; i++)
2486         d->last_dc[i] = s->last_dc[i];
2487
2488     /* statistics */
2489     d->mv_bits= s->mv_bits;
2490     d->i_tex_bits= s->i_tex_bits;
2491     d->p_tex_bits= s->p_tex_bits;
2492     d->i_count= s->i_count;
2493     d->f_count= s->f_count;
2494     d->b_count= s->b_count;
2495     d->skip_count= s->skip_count;
2496     d->misc_bits= s->misc_bits;
2497
2498     d->mb_intra= s->mb_intra;
2499     d->mb_skipped= s->mb_skipped;
2500     d->mv_type= s->mv_type;
2501     d->mv_dir= s->mv_dir;
2502     d->pb= s->pb;
2503     if(s->data_partitioning){
2504         d->pb2= s->pb2;
2505         d->tex_pb= s->tex_pb;
2506     }
2507     d->block= s->block;
2508     for(i=0; i<8; i++)
2509         d->block_last_index[i]= s->block_last_index[i];
2510     d->interlaced_dct= s->interlaced_dct;
2511     d->qscale= s->qscale;
2512
2513     d->esc3_level_length= s->esc3_level_length;
2514 }
2515
2516 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2517                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2518                            int *dmin, int *next_block, int motion_x, int motion_y)
2519 {
2520     int score;
2521     uint8_t *dest_backup[3];
2522
2523     copy_context_before_encode(s, backup, type);
2524
2525     s->block= s->blocks[*next_block];
2526     s->pb= pb[*next_block];
2527     if(s->data_partitioning){
2528         s->pb2   = pb2   [*next_block];
2529         s->tex_pb= tex_pb[*next_block];
2530     }
2531
2532     if(*next_block){
2533         memcpy(dest_backup, s->dest, sizeof(s->dest));
2534         s->dest[0] = s->sc.rd_scratchpad;
2535         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2536         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2537         av_assert0(s->linesize >= 32); //FIXME
2538     }
2539
2540     encode_mb(s, motion_x, motion_y);
2541
2542     score= put_bits_count(&s->pb);
2543     if(s->data_partitioning){
2544         score+= put_bits_count(&s->pb2);
2545         score+= put_bits_count(&s->tex_pb);
2546     }
2547
2548     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2549         ff_mpv_reconstruct_mb(s, s->block);
2550
2551         score *= s->lambda2;
2552         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2553     }
2554
2555     if(*next_block){
2556         memcpy(s->dest, dest_backup, sizeof(s->dest));
2557     }
2558
2559     if(score<*dmin){
2560         *dmin= score;
2561         *next_block^=1;
2562
2563         copy_context_after_encode(best, s, type);
2564     }
2565 }
2566
2567 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2568     const uint32_t *sq = ff_square_tab + 256;
2569     int acc=0;
2570     int x,y;
2571
2572     if(w==16 && h==16)
2573         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2574     else if(w==8 && h==8)
2575         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2576
2577     for(y=0; y<h; y++){
2578         for(x=0; x<w; x++){
2579             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2580         }
2581     }
2582
2583     av_assert2(acc>=0);
2584
2585     return acc;
2586 }
2587
2588 static int sse_mb(MpegEncContext *s){
2589     int w= 16;
2590     int h= 16;
2591
2592     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2593     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2594
2595     if(w==16 && h==16)
2596       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2597         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2598                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2599                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2600       }else{
2601         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2602                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2603                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2604       }
2605     else
2606         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2607                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2608                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2609 }
2610
2611 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2612     MpegEncContext *s= *(void**)arg;
2613
2614
2615     s->me.pre_pass=1;
2616     s->me.dia_size= s->avctx->pre_dia_size;
2617     s->first_slice_line=1;
2618     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2619         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2620             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2621         }
2622         s->first_slice_line=0;
2623     }
2624
2625     s->me.pre_pass=0;
2626
2627     return 0;
2628 }
2629
2630 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2631     MpegEncContext *s= *(void**)arg;
2632
2633     s->me.dia_size= s->avctx->dia_size;
2634     s->first_slice_line=1;
2635     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2636         s->mb_x=0; //for block init below
2637         ff_init_block_index(s);
2638         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2639             s->block_index[0]+=2;
2640             s->block_index[1]+=2;
2641             s->block_index[2]+=2;
2642             s->block_index[3]+=2;
2643
2644             /* compute motion vector & mb_type and store in context */
2645             if(s->pict_type==AV_PICTURE_TYPE_B)
2646                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2647             else
2648                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2649         }
2650         s->first_slice_line=0;
2651     }
2652     return 0;
2653 }
2654
2655 static int mb_var_thread(AVCodecContext *c, void *arg){
2656     MpegEncContext *s= *(void**)arg;
2657     int mb_x, mb_y;
2658
2659     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2660         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2661             int xx = mb_x * 16;
2662             int yy = mb_y * 16;
2663             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2664             int varc;
2665             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2666
2667             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2668                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2669
2670             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2671             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2672             s->me.mb_var_sum_temp    += varc;
2673         }
2674     }
2675     return 0;
2676 }
2677
2678 static void write_slice_end(MpegEncContext *s){
2679     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2680         if(s->partitioned_frame){
2681             ff_mpeg4_merge_partitions(s);
2682         }
2683
2684         ff_mpeg4_stuffing(&s->pb);
2685     } else if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
2686                s->out_format == FMT_MJPEG) {
2687         ff_mjpeg_encode_stuffing(s);
2688     } else if (CONFIG_SPEEDHQ_ENCODER && s->out_format == FMT_SPEEDHQ) {
2689         ff_speedhq_end_slice(s);
2690     }
2691
2692     flush_put_bits(&s->pb);
2693
2694     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2695         s->misc_bits+= get_bits_diff(s);
2696 }
2697
2698 static void write_mb_info(MpegEncContext *s)
2699 {
2700     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2701     int offset = put_bits_count(&s->pb);
2702     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2703     int gobn = s->mb_y / s->gob_index;
2704     int pred_x, pred_y;
2705     if (CONFIG_H263_ENCODER)
2706         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2707     bytestream_put_le32(&ptr, offset);
2708     bytestream_put_byte(&ptr, s->qscale);
2709     bytestream_put_byte(&ptr, gobn);
2710     bytestream_put_le16(&ptr, mba);
2711     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2712     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2713     /* 4MV not implemented */
2714     bytestream_put_byte(&ptr, 0); /* hmv2 */
2715     bytestream_put_byte(&ptr, 0); /* vmv2 */
2716 }
2717
2718 static void update_mb_info(MpegEncContext *s, int startcode)
2719 {
2720     if (!s->mb_info)
2721         return;
2722     if (put_bytes_count(&s->pb, 0) - s->prev_mb_info >= s->mb_info) {
2723         s->mb_info_size += 12;
2724         s->prev_mb_info = s->last_mb_info;
2725     }
2726     if (startcode) {
2727         s->prev_mb_info = put_bytes_count(&s->pb, 0);
2728         /* This might have incremented mb_info_size above, and we return without
2729          * actually writing any info into that slot yet. But in that case,
2730          * this will be called again at the start of the after writing the
2731          * start code, actually writing the mb info. */
2732         return;
2733     }
2734
2735     s->last_mb_info = put_bytes_count(&s->pb, 0);
2736     if (!s->mb_info_size)
2737         s->mb_info_size += 12;
2738     write_mb_info(s);
2739 }
2740
2741 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2742 {
2743     if (put_bytes_left(&s->pb, 0) < threshold
2744         && s->slice_context_count == 1
2745         && s->pb.buf == s->avctx->internal->byte_buffer) {
2746         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2747         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2748
2749         uint8_t *new_buffer = NULL;
2750         int new_buffer_size = 0;
2751
2752         if ((s->avctx->internal->byte_buffer_size + size_increase) >= INT_MAX/8) {
2753             av_log(s->avctx, AV_LOG_ERROR, "Cannot reallocate putbit buffer\n");
2754             return AVERROR(ENOMEM);
2755         }
2756
2757         emms_c();
2758
2759         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2760                               s->avctx->internal->byte_buffer_size + size_increase);
2761         if (!new_buffer)
2762             return AVERROR(ENOMEM);
2763
2764         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2765         av_free(s->avctx->internal->byte_buffer);
2766         s->avctx->internal->byte_buffer      = new_buffer;
2767         s->avctx->internal->byte_buffer_size = new_buffer_size;
2768         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2769         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2770         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2771     }
2772     if (put_bytes_left(&s->pb, 0) < threshold)
2773         return AVERROR(EINVAL);
2774     return 0;
2775 }
2776
2777 static int encode_thread(AVCodecContext *c, void *arg){
2778     MpegEncContext *s= *(void**)arg;
2779     int mb_x, mb_y, mb_y_order;
2780     int chr_h= 16>>s->chroma_y_shift;
2781     int i, j;
2782     MpegEncContext best_s = { 0 }, backup_s;
2783     uint8_t bit_buf[2][MAX_MB_BYTES];
2784     uint8_t bit_buf2[2][MAX_MB_BYTES];
2785     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2786     PutBitContext pb[2], pb2[2], tex_pb[2];
2787
2788     for(i=0; i<2; i++){
2789         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2790         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2791         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2792     }
2793
2794     s->last_bits= put_bits_count(&s->pb);
2795     s->mv_bits=0;
2796     s->misc_bits=0;
2797     s->i_tex_bits=0;
2798     s->p_tex_bits=0;
2799     s->i_count=0;
2800     s->f_count=0;
2801     s->b_count=0;
2802     s->skip_count=0;
2803
2804     for(i=0; i<3; i++){
2805         /* init last dc values */
2806         /* note: quant matrix value (8) is implied here */
2807         s->last_dc[i] = 128 << s->intra_dc_precision;
2808
2809         s->current_picture.encoding_error[i] = 0;
2810     }
2811     if(s->codec_id==AV_CODEC_ID_AMV){
2812         s->last_dc[0] = 128*8/13;
2813         s->last_dc[1] = 128*8/14;
2814         s->last_dc[2] = 128*8/14;
2815     }
2816     s->mb_skip_run = 0;
2817     memset(s->last_mv, 0, sizeof(s->last_mv));
2818
2819     s->last_mv_dir = 0;
2820
2821     switch(s->codec_id){
2822     case AV_CODEC_ID_H263:
2823     case AV_CODEC_ID_H263P:
2824     case AV_CODEC_ID_FLV1:
2825         if (CONFIG_H263_ENCODER)
2826             s->gob_index = H263_GOB_HEIGHT(s->height);
2827         break;
2828     case AV_CODEC_ID_MPEG4:
2829         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2830             ff_mpeg4_init_partitions(s);
2831         break;
2832     }
2833
2834     s->resync_mb_x=0;
2835     s->resync_mb_y=0;
2836     s->first_slice_line = 1;
2837     s->ptr_lastgob = s->pb.buf;
2838     for (mb_y_order = s->start_mb_y; mb_y_order < s->end_mb_y; mb_y_order++) {
2839         if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
2840             int first_in_slice;
2841             mb_y = ff_speedhq_mb_y_order_to_mb(mb_y_order, s->mb_height, &first_in_slice);
2842             if (first_in_slice && mb_y_order != s->start_mb_y)
2843                 ff_speedhq_end_slice(s);
2844             s->last_dc[0] = s->last_dc[1] = s->last_dc[2] = 1024 << s->intra_dc_precision;
2845         } else {
2846             mb_y = mb_y_order;
2847         }
2848         s->mb_x=0;
2849         s->mb_y= mb_y;
2850
2851         ff_set_qscale(s, s->qscale);
2852         ff_init_block_index(s);
2853
2854         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2855             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2856             int mb_type= s->mb_type[xy];
2857 //            int d;
2858             int dmin= INT_MAX;
2859             int dir;
2860             int size_increase =  s->avctx->internal->byte_buffer_size/4
2861                                + s->mb_width*MAX_MB_BYTES;
2862
2863             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2864             if (put_bytes_left(&s->pb, 0) < MAX_MB_BYTES){
2865                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2866                 return -1;
2867             }
2868             if(s->data_partitioning){
2869                 if (put_bytes_left(&s->pb2,    0) < MAX_MB_BYTES ||
2870                     put_bytes_left(&s->tex_pb, 0) < MAX_MB_BYTES) {
2871                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2872                     return -1;
2873                 }
2874             }
2875
2876             s->mb_x = mb_x;
2877             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2878             ff_update_block_index(s);
2879
2880             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2881                 ff_h261_reorder_mb_index(s);
2882                 xy= s->mb_y*s->mb_stride + s->mb_x;
2883                 mb_type= s->mb_type[xy];
2884             }
2885
2886             /* write gob / video packet header  */
2887             if(s->rtp_mode){
2888                 int current_packet_size, is_gob_start;
2889
2890                 current_packet_size = put_bytes_count(&s->pb, 1)
2891                                       - (s->ptr_lastgob - s->pb.buf);
2892
2893                 is_gob_start = s->rtp_payload_size &&
2894                                current_packet_size >= s->rtp_payload_size &&
2895                                mb_y + mb_x > 0;
2896
2897                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2898
2899                 switch(s->codec_id){
2900                 case AV_CODEC_ID_H263:
2901                 case AV_CODEC_ID_H263P:
2902                     if(!s->h263_slice_structured)
2903                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2904                     break;
2905                 case AV_CODEC_ID_MPEG2VIDEO:
2906                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2907                 case AV_CODEC_ID_MPEG1VIDEO:
2908                     if(s->mb_skip_run) is_gob_start=0;
2909                     break;
2910                 case AV_CODEC_ID_MJPEG:
2911                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2912                     break;
2913                 }
2914
2915                 if(is_gob_start){
2916                     if(s->start_mb_y != mb_y || mb_x!=0){
2917                         write_slice_end(s);
2918
2919                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2920                             ff_mpeg4_init_partitions(s);
2921                         }
2922                     }
2923
2924                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2925                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2926
2927                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2928                         int r = put_bytes_count(&s->pb, 0) + s->picture_number + 16 + s->mb_x + s->mb_y;
2929                         int d = 100 / s->error_rate;
2930                         if(r % d == 0){
2931                             current_packet_size=0;
2932                             s->pb.buf_ptr= s->ptr_lastgob;
2933                             av_assert1(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2934                         }
2935                     }
2936
2937                     update_mb_info(s, 1);
2938
2939                     switch(s->codec_id){
2940                     case AV_CODEC_ID_MPEG4:
2941                         if (CONFIG_MPEG4_ENCODER) {
2942                             ff_mpeg4_encode_video_packet_header(s);
2943                             ff_mpeg4_clean_buffers(s);
2944                         }
2945                     break;
2946                     case AV_CODEC_ID_MPEG1VIDEO:
2947                     case AV_CODEC_ID_MPEG2VIDEO:
2948                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2949                             ff_mpeg1_encode_slice_header(s);
2950                             ff_mpeg1_clean_buffers(s);
2951                         }
2952                     break;
2953                     case AV_CODEC_ID_H263:
2954                     case AV_CODEC_ID_H263P:
2955                         if (CONFIG_H263_ENCODER)
2956                             ff_h263_encode_gob_header(s, mb_y);
2957                     break;
2958                     }
2959
2960                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2961                         int bits= put_bits_count(&s->pb);
2962                         s->misc_bits+= bits - s->last_bits;
2963                         s->last_bits= bits;
2964                     }
2965
2966                     s->ptr_lastgob += current_packet_size;
2967                     s->first_slice_line=1;
2968                     s->resync_mb_x=mb_x;
2969                     s->resync_mb_y=mb_y;
2970                 }
2971             }
2972
2973             if(  (s->resync_mb_x   == s->mb_x)
2974                && s->resync_mb_y+1 == s->mb_y){
2975                 s->first_slice_line=0;
2976             }
2977
2978             s->mb_skipped=0;
2979             s->dquant=0; //only for QP_RD
2980
2981             update_mb_info(s, 0);
2982
2983             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2984                 int next_block=0;
2985                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2986
2987                 copy_context_before_encode(&backup_s, s, -1);
2988                 backup_s.pb= s->pb;
2989                 best_s.data_partitioning= s->data_partitioning;
2990                 best_s.partitioned_frame= s->partitioned_frame;
2991                 if(s->data_partitioning){
2992                     backup_s.pb2= s->pb2;
2993                     backup_s.tex_pb= s->tex_pb;
2994                 }
2995
2996                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2997                     s->mv_dir = MV_DIR_FORWARD;
2998                     s->mv_type = MV_TYPE_16X16;
2999                     s->mb_intra= 0;
3000                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3001                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3002                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
3003                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3004                 }
3005                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
3006                     s->mv_dir = MV_DIR_FORWARD;
3007                     s->mv_type = MV_TYPE_FIELD;
3008                     s->mb_intra= 0;
3009                     for(i=0; i<2; i++){
3010                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3011                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3012                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3013                     }
3014                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3015                                  &dmin, &next_block, 0, 0);
3016                 }
3017                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3018                     s->mv_dir = MV_DIR_FORWARD;
3019                     s->mv_type = MV_TYPE_16X16;
3020                     s->mb_intra= 0;
3021                     s->mv[0][0][0] = 0;
3022                     s->mv[0][0][1] = 0;
3023                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3024                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3025                 }
3026                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3027                     s->mv_dir = MV_DIR_FORWARD;
3028                     s->mv_type = MV_TYPE_8X8;
3029                     s->mb_intra= 0;
3030                     for(i=0; i<4; i++){
3031                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3032                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3033                     }
3034                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3035                                  &dmin, &next_block, 0, 0);
3036                 }
3037                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3038                     s->mv_dir = MV_DIR_FORWARD;
3039                     s->mv_type = MV_TYPE_16X16;
3040                     s->mb_intra= 0;
3041                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3042                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3043                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3044                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3045                 }
3046                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3047                     s->mv_dir = MV_DIR_BACKWARD;
3048                     s->mv_type = MV_TYPE_16X16;
3049                     s->mb_intra= 0;
3050                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3051                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3052                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3053                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3054                 }
3055                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3056                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3057                     s->mv_type = MV_TYPE_16X16;
3058                     s->mb_intra= 0;
3059                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3060                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3061                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3062                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3063                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3064                                  &dmin, &next_block, 0, 0);
3065                 }
3066                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3067                     s->mv_dir = MV_DIR_FORWARD;
3068                     s->mv_type = MV_TYPE_FIELD;
3069                     s->mb_intra= 0;
3070                     for(i=0; i<2; i++){
3071                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3072                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3073                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3074                     }
3075                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3076                                  &dmin, &next_block, 0, 0);
3077                 }
3078                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3079                     s->mv_dir = MV_DIR_BACKWARD;
3080                     s->mv_type = MV_TYPE_FIELD;
3081                     s->mb_intra= 0;
3082                     for(i=0; i<2; i++){
3083                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3084                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3085                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3086                     }
3087                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3088                                  &dmin, &next_block, 0, 0);
3089                 }
3090                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3091                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3092                     s->mv_type = MV_TYPE_FIELD;
3093                     s->mb_intra= 0;
3094                     for(dir=0; dir<2; dir++){
3095                         for(i=0; i<2; i++){
3096                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3097                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3098                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3099                         }
3100                     }
3101                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3102                                  &dmin, &next_block, 0, 0);
3103                 }
3104                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3105                     s->mv_dir = 0;
3106                     s->mv_type = MV_TYPE_16X16;
3107                     s->mb_intra= 1;
3108                     s->mv[0][0][0] = 0;
3109                     s->mv[0][0][1] = 0;
3110                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3111                                  &dmin, &next_block, 0, 0);
3112                     if(s->h263_pred || s->h263_aic){
3113                         if(best_s.mb_intra)
3114                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3115                         else
3116                             ff_clean_intra_table_entries(s); //old mode?
3117                     }
3118                 }
3119
3120                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3121                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3122                         const int last_qp= backup_s.qscale;
3123                         int qpi, qp, dc[6];
3124                         int16_t ac[6][16];
3125                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3126                         static const int dquant_tab[4]={-1,1,-2,2};
3127                         int storecoefs = s->mb_intra && s->dc_val[0];
3128
3129                         av_assert2(backup_s.dquant == 0);
3130
3131                         //FIXME intra
3132                         s->mv_dir= best_s.mv_dir;
3133                         s->mv_type = MV_TYPE_16X16;
3134                         s->mb_intra= best_s.mb_intra;
3135                         s->mv[0][0][0] = best_s.mv[0][0][0];
3136                         s->mv[0][0][1] = best_s.mv[0][0][1];
3137                         s->mv[1][0][0] = best_s.mv[1][0][0];
3138                         s->mv[1][0][1] = best_s.mv[1][0][1];
3139
3140                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3141                         for(; qpi<4; qpi++){
3142                             int dquant= dquant_tab[qpi];
3143                             qp= last_qp + dquant;
3144                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3145                                 continue;
3146                             backup_s.dquant= dquant;
3147                             if(storecoefs){
3148                                 for(i=0; i<6; i++){
3149                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3150                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3151                                 }
3152                             }
3153
3154                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3155                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3156                             if(best_s.qscale != qp){
3157                                 if(storecoefs){
3158                                     for(i=0; i<6; i++){
3159                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3160                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3161                                     }
3162                                 }
3163                             }
3164                         }
3165                     }
3166                 }
3167                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3168                     int mx= s->b_direct_mv_table[xy][0];
3169                     int my= s->b_direct_mv_table[xy][1];
3170
3171                     backup_s.dquant = 0;
3172                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3173                     s->mb_intra= 0;
3174                     ff_mpeg4_set_direct_mv(s, mx, my);
3175                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3176                                  &dmin, &next_block, mx, my);
3177                 }
3178                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3179                     backup_s.dquant = 0;
3180                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3181                     s->mb_intra= 0;
3182                     ff_mpeg4_set_direct_mv(s, 0, 0);
3183                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3184                                  &dmin, &next_block, 0, 0);
3185                 }
3186                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3187                     int coded=0;
3188                     for(i=0; i<6; i++)
3189                         coded |= s->block_last_index[i];
3190                     if(coded){
3191                         int mx,my;
3192                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3193                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3194                             mx=my=0; //FIXME find the one we actually used
3195                             ff_mpeg4_set_direct_mv(s, mx, my);
3196                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3197                             mx= s->mv[1][0][0];
3198                             my= s->mv[1][0][1];
3199                         }else{
3200                             mx= s->mv[0][0][0];
3201                             my= s->mv[0][0][1];
3202                         }
3203
3204                         s->mv_dir= best_s.mv_dir;
3205                         s->mv_type = best_s.mv_type;
3206                         s->mb_intra= 0;
3207 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3208                         s->mv[0][0][1] = best_s.mv[0][0][1];
3209                         s->mv[1][0][0] = best_s.mv[1][0][0];
3210                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3211                         backup_s.dquant= 0;
3212                         s->skipdct=1;
3213                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3214                                         &dmin, &next_block, mx, my);
3215                         s->skipdct=0;
3216                     }
3217                 }
3218
3219                 s->current_picture.qscale_table[xy] = best_s.qscale;
3220
3221                 copy_context_after_encode(s, &best_s, -1);
3222
3223                 pb_bits_count= put_bits_count(&s->pb);
3224                 flush_put_bits(&s->pb);
3225                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3226                 s->pb= backup_s.pb;
3227
3228                 if(s->data_partitioning){
3229                     pb2_bits_count= put_bits_count(&s->pb2);
3230                     flush_put_bits(&s->pb2);
3231                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3232                     s->pb2= backup_s.pb2;
3233
3234                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3235                     flush_put_bits(&s->tex_pb);
3236                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3237                     s->tex_pb= backup_s.tex_pb;
3238                 }
3239                 s->last_bits= put_bits_count(&s->pb);
3240
3241                 if (CONFIG_H263_ENCODER &&
3242                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3243                     ff_h263_update_motion_val(s);
3244
3245                 if(next_block==0){ //FIXME 16 vs linesize16
3246                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3247                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3248                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3249                 }
3250
3251                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3252                     ff_mpv_reconstruct_mb(s, s->block);
3253             } else {
3254                 int motion_x = 0, motion_y = 0;
3255                 s->mv_type=MV_TYPE_16X16;
3256                 // only one MB-Type possible
3257
3258                 switch(mb_type){
3259                 case CANDIDATE_MB_TYPE_INTRA:
3260                     s->mv_dir = 0;
3261                     s->mb_intra= 1;
3262                     motion_x= s->mv[0][0][0] = 0;
3263                     motion_y= s->mv[0][0][1] = 0;
3264                     break;
3265                 case CANDIDATE_MB_TYPE_INTER:
3266                     s->mv_dir = MV_DIR_FORWARD;
3267                     s->mb_intra= 0;
3268                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3269                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3270                     break;
3271                 case CANDIDATE_MB_TYPE_INTER_I:
3272                     s->mv_dir = MV_DIR_FORWARD;
3273                     s->mv_type = MV_TYPE_FIELD;
3274                     s->mb_intra= 0;
3275                     for(i=0; i<2; i++){
3276                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3277                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3278                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3279                     }
3280                     break;
3281                 case CANDIDATE_MB_TYPE_INTER4V:
3282                     s->mv_dir = MV_DIR_FORWARD;
3283                     s->mv_type = MV_TYPE_8X8;
3284                     s->mb_intra= 0;
3285                     for(i=0; i<4; i++){
3286                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3287                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3288                     }
3289                     break;
3290                 case CANDIDATE_MB_TYPE_DIRECT:
3291                     if (CONFIG_MPEG4_ENCODER) {
3292                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3293                         s->mb_intra= 0;
3294                         motion_x=s->b_direct_mv_table[xy][0];
3295                         motion_y=s->b_direct_mv_table[xy][1];
3296                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3297                     }
3298                     break;
3299                 case CANDIDATE_MB_TYPE_DIRECT0:
3300                     if (CONFIG_MPEG4_ENCODER) {
3301                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3302                         s->mb_intra= 0;
3303                         ff_mpeg4_set_direct_mv(s, 0, 0);
3304                     }
3305                     break;
3306                 case CANDIDATE_MB_TYPE_BIDIR:
3307                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3308                     s->mb_intra= 0;
3309                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3310                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3311                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3312                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3313                     break;
3314                 case CANDIDATE_MB_TYPE_BACKWARD:
3315                     s->mv_dir = MV_DIR_BACKWARD;
3316                     s->mb_intra= 0;
3317                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3318                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3319                     break;
3320                 case CANDIDATE_MB_TYPE_FORWARD:
3321                     s->mv_dir = MV_DIR_FORWARD;
3322                     s->mb_intra= 0;
3323                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3324                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3325                     break;
3326                 case CANDIDATE_MB_TYPE_FORWARD_I:
3327                     s->mv_dir = MV_DIR_FORWARD;
3328                     s->mv_type = MV_TYPE_FIELD;
3329                     s->mb_intra= 0;
3330                     for(i=0; i<2; i++){
3331                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3332                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3333                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3334                     }
3335                     break;
3336                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3337                     s->mv_dir = MV_DIR_BACKWARD;
3338                     s->mv_type = MV_TYPE_FIELD;
3339                     s->mb_intra= 0;
3340                     for(i=0; i<2; i++){
3341                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3342                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3343                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3344                     }
3345                     break;
3346                 case CANDIDATE_MB_TYPE_BIDIR_I:
3347                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3348                     s->mv_type = MV_TYPE_FIELD;
3349                     s->mb_intra= 0;
3350                     for(dir=0; dir<2; dir++){
3351                         for(i=0; i<2; i++){
3352                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3353                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3354                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3355                         }
3356                     }
3357                     break;
3358                 default:
3359                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3360                 }
3361
3362                 encode_mb(s, motion_x, motion_y);
3363
3364                 // RAL: Update last macroblock type
3365                 s->last_mv_dir = s->mv_dir;
3366
3367                 if (CONFIG_H263_ENCODER &&
3368                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3369                     ff_h263_update_motion_val(s);
3370
3371                 ff_mpv_reconstruct_mb(s, s->block);
3372             }
3373
3374             /* clean the MV table in IPS frames for direct mode in B-frames */
3375             if(s->mb_intra /* && I,P,S_TYPE */){
3376                 s->p_mv_table[xy][0]=0;
3377                 s->p_mv_table[xy][1]=0;
3378             }
3379
3380             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3381                 int w= 16;
3382                 int h= 16;
3383
3384                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3385                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3386
3387                 s->current_picture.encoding_error[0] += sse(
3388                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3389                     s->dest[0], w, h, s->linesize);
3390                 s->current_picture.encoding_error[1] += sse(
3391                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3392                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3393                 s->current_picture.encoding_error[2] += sse(
3394                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3395                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3396             }
3397             if(s->loop_filter){
3398                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3399                     ff_h263_loop_filter(s);
3400             }
3401             ff_dlog(s->avctx, "MB %d %d bits\n",
3402                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3403         }
3404     }
3405
3406     //not beautiful here but we must write it before flushing so it has to be here
3407     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3408         ff_msmpeg4_encode_ext_header(s);
3409
3410     write_slice_end(s);
3411
3412     return 0;
3413 }
3414
3415 #define MERGE(field) dst->field += src->field; src->field=0
3416 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3417     MERGE(me.scene_change_score);
3418     MERGE(me.mc_mb_var_sum_temp);
3419     MERGE(me.mb_var_sum_temp);
3420 }
3421
3422 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3423     int i;
3424
3425     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3426     MERGE(dct_count[1]);
3427     MERGE(mv_bits);
3428     MERGE(i_tex_bits);
3429     MERGE(p_tex_bits);
3430     MERGE(i_count);
3431     MERGE(f_count);
3432     MERGE(b_count);
3433     MERGE(skip_count);
3434     MERGE(misc_bits);
3435     MERGE(er.error_count);
3436     MERGE(padding_bug_score);
3437     MERGE(current_picture.encoding_error[0]);
3438     MERGE(current_picture.encoding_error[1]);
3439     MERGE(current_picture.encoding_error[2]);
3440
3441     if (dst->noise_reduction){
3442         for(i=0; i<64; i++){
3443             MERGE(dct_error_sum[0][i]);
3444             MERGE(dct_error_sum[1][i]);
3445         }
3446     }
3447
3448     av_assert1(put_bits_count(&src->pb) % 8 ==0);
3449     av_assert1(put_bits_count(&dst->pb) % 8 ==0);
3450     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3451     flush_put_bits(&dst->pb);
3452 }
3453
3454 static int estimate_qp(MpegEncContext *s, int dry_run){
3455     if (s->next_lambda){
3456         s->current_picture_ptr->f->quality =
3457         s->current_picture.f->quality = s->next_lambda;
3458         if(!dry_run) s->next_lambda= 0;
3459     } else if (!s->fixed_qscale) {
3460         int quality = ff_rate_estimate_qscale(s, dry_run);
3461         s->current_picture_ptr->f->quality =
3462         s->current_picture.f->quality = quality;
3463         if (s->current_picture.f->quality < 0)
3464             return -1;
3465     }
3466
3467     if(s->adaptive_quant){
3468         switch(s->codec_id){
3469         case AV_CODEC_ID_MPEG4:
3470             if (CONFIG_MPEG4_ENCODER)
3471                 ff_clean_mpeg4_qscales(s);
3472             break;
3473         case AV_CODEC_ID_H263:
3474         case AV_CODEC_ID_H263P:
3475         case AV_CODEC_ID_FLV1:
3476             if (CONFIG_H263_ENCODER)
3477                 ff_clean_h263_qscales(s);
3478             break;
3479         default:
3480             ff_init_qscale_tab(s);
3481         }
3482
3483         s->lambda= s->lambda_table[0];
3484         //FIXME broken
3485     }else
3486         s->lambda = s->current_picture.f->quality;
3487     update_qscale(s);
3488     return 0;
3489 }
3490
3491 /* must be called before writing the header */
3492 static void set_frame_distances(MpegEncContext * s){
3493     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3494     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3495
3496     if(s->pict_type==AV_PICTURE_TYPE_B){
3497         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3498         av_assert1(s->pb_time > 0 && s->pb_time < s->pp_time);
3499     }else{
3500         s->pp_time= s->time - s->last_non_b_time;
3501         s->last_non_b_time= s->time;
3502         av_assert1(s->picture_number==0 || s->pp_time > 0);
3503     }
3504 }
3505
3506 static int encode_picture(MpegEncContext *s, int picture_number)
3507 {
3508     int i, ret;
3509     int bits;
3510     int context_count = s->slice_context_count;
3511
3512     s->picture_number = picture_number;
3513
3514     /* Reset the average MB variance */
3515     s->me.mb_var_sum_temp    =
3516     s->me.mc_mb_var_sum_temp = 0;
3517
3518     /* we need to initialize some time vars before we can encode B-frames */
3519     // RAL: Condition added for MPEG1VIDEO
3520     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3521         set_frame_distances(s);
3522     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3523         ff_set_mpeg4_time(s);
3524
3525     s->me.scene_change_score=0;
3526
3527 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3528
3529     if(s->pict_type==AV_PICTURE_TYPE_I){
3530         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3531         else                        s->no_rounding=0;
3532     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3533         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3534             s->no_rounding ^= 1;
3535     }
3536
3537     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3538         if (estimate_qp(s,1) < 0)
3539             return -1;
3540         ff_get_2pass_fcode(s);
3541     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3542         if(s->pict_type==AV_PICTURE_TYPE_B)
3543             s->lambda= s->last_lambda_for[s->pict_type];
3544         else
3545             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3546         update_qscale(s);
3547     }
3548
3549     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3550         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3551         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3552         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3553         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3554     }
3555
3556     s->mb_intra=0; //for the rate distortion & bit compare functions
3557     for(i=1; i<context_count; i++){
3558         ret = ff_update_duplicate_context(s->thread_context[i], s);
3559         if (ret < 0)
3560             return ret;
3561     }
3562
3563     if(ff_init_me(s)<0)
3564         return -1;
3565
3566     /* Estimate motion for every MB */
3567     if(s->pict_type != AV_PICTURE_TYPE_I){
3568         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3569         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3570         if (s->pict_type != AV_PICTURE_TYPE_B) {
3571             if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3572                 s->me_pre == 2) {
3573                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3574             }
3575         }
3576
3577         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3578     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3579         /* I-Frame */
3580         for(i=0; i<s->mb_stride*s->mb_height; i++)
3581             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3582
3583         if(!s->fixed_qscale){
3584             /* finding spatial complexity for I-frame rate control */
3585             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3586         }
3587     }
3588     for(i=1; i<context_count; i++){
3589         merge_context_after_me(s, s->thread_context[i]);
3590     }
3591     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3592     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3593     emms_c();
3594
3595     if (s->me.scene_change_score > s->scenechange_threshold &&
3596         s->pict_type == AV_PICTURE_TYPE_P) {
3597         s->pict_type= AV_PICTURE_TYPE_I;
3598         for(i=0; i<s->mb_stride*s->mb_height; i++)
3599             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3600         if(s->msmpeg4_version >= 3)
3601             s->no_rounding=1;
3602         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3603                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3604     }
3605
3606     if(!s->umvplus){
3607         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3608             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3609
3610             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3611                 int a,b;
3612                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3613                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3614                 s->f_code= FFMAX3(s->f_code, a, b);
3615             }
3616
3617             ff_fix_long_p_mvs(s, s->intra_penalty ? CANDIDATE_MB_TYPE_INTER : CANDIDATE_MB_TYPE_INTRA);
3618             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, !!s->intra_penalty);
3619             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3620                 int j;
3621                 for(i=0; i<2; i++){
3622                     for(j=0; j<2; j++)
3623                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3624                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, !!s->intra_penalty);
3625                 }
3626             }
3627         }
3628
3629         if(s->pict_type==AV_PICTURE_TYPE_B){
3630             int a, b;
3631
3632             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3633             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3634             s->f_code = FFMAX(a, b);
3635
3636             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3637             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3638             s->b_code = FFMAX(a, b);
3639
3640             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3641             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3642             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3643             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3644             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3645                 int dir, j;
3646                 for(dir=0; dir<2; dir++){
3647                     for(i=0; i<2; i++){
3648                         for(j=0; j<2; j++){
3649                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3650                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3651                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3652                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3653                         }
3654                     }
3655                 }
3656             }
3657         }
3658     }
3659
3660     if (estimate_qp(s, 0) < 0)
3661         return -1;
3662
3663     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3664         s->pict_type == AV_PICTURE_TYPE_I &&
3665         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3666         s->qscale= 3; //reduce clipping problems
3667
3668     if (s->out_format == FMT_MJPEG) {
3669         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3670         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3671
3672         if (s->avctx->intra_matrix) {
3673             chroma_matrix =
3674             luma_matrix = s->avctx->intra_matrix;
3675         }
3676         if (s->avctx->chroma_intra_matrix)
3677             chroma_matrix = s->avctx->chroma_intra_matrix;
3678
3679         /* for mjpeg, we do include qscale in the matrix */
3680         for(i=1;i<64;i++){
3681             int j = s->idsp.idct_permutation[i];
3682
3683             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3684             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3685         }
3686         s->y_dc_scale_table=
3687         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3688         s->chroma_intra_matrix[0] =
3689         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3690         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3691                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3692         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3693                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3694         s->qscale= 8;
3695     }
3696     if(s->codec_id == AV_CODEC_ID_AMV){
3697         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3698         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3699         for(i=1;i<64;i++){
3700             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3701
3702             s->intra_matrix[j]        = sp5x_qscale_five_quant_table[0][i];
3703             s->chroma_intra_matrix[j] = sp5x_qscale_five_quant_table[1][i];
3704         }
3705         s->y_dc_scale_table= y;
3706         s->c_dc_scale_table= c;
3707         s->intra_matrix[0] = 13;
3708         s->chroma_intra_matrix[0] = 14;
3709         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3710                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3711         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3712                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3713         s->qscale= 8;
3714     }
3715
3716     if (s->out_format == FMT_SPEEDHQ) {
3717         s->y_dc_scale_table=
3718         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[3];
3719     }
3720
3721     //FIXME var duplication
3722     s->current_picture_ptr->f->key_frame =
3723     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3724     s->current_picture_ptr->f->pict_type =
3725     s->current_picture.f->pict_type = s->pict_type;
3726
3727     if (s->current_picture.f->key_frame)
3728         s->picture_in_gop_number=0;
3729
3730     s->mb_x = s->mb_y = 0;
3731     s->last_bits= put_bits_count(&s->pb);
3732     switch(s->out_format) {
3733 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
3734     case FMT_MJPEG:
3735         /* s->huffman == HUFFMAN_TABLE_OPTIMAL can only be true for MJPEG. */
3736         if (!CONFIG_MJPEG_ENCODER || s->huffman != HUFFMAN_TABLE_OPTIMAL)
3737             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3738                                            s->pred, s->intra_matrix, s->chroma_intra_matrix);
3739         break;
3740 #endif
3741     case FMT_SPEEDHQ:
3742         if (CONFIG_SPEEDHQ_ENCODER)
3743             ff_speedhq_encode_picture_header(s);
3744         break;
3745     case FMT_H261:
3746         if (CONFIG_H261_ENCODER)
3747             ff_h261_encode_picture_header(s, picture_number);
3748         break;
3749     case FMT_H263:
3750         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3751             ff_wmv2_encode_picture_header(s, picture_number);
3752         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3753             ff_msmpeg4_encode_picture_header(s, picture_number);
3754         else if (CONFIG_MPEG4_ENCODER && s->h263_pred) {
3755             ret = ff_mpeg4_encode_picture_header(s, picture_number);
3756             if (ret < 0)
3757                 return ret;
3758         } else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3759             ret = ff_rv10_encode_picture_header(s, picture_number);
3760             if (ret < 0)
3761                 return ret;
3762         }
3763         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3764             ff_rv20_encode_picture_header(s, picture_number);
3765         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3766             ff_flv_encode_picture_header(s, picture_number);
3767         else if (CONFIG_H263_ENCODER)
3768             ff_h263_encode_picture_header(s, picture_number);
3769         break;
3770     case FMT_MPEG1:
3771         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3772             ff_mpeg1_encode_picture_header(s, picture_number);
3773         break;
3774     default:
3775         av_assert0(0);
3776     }
3777     bits= put_bits_count(&s->pb);
3778     s->header_bits= bits - s->last_bits;
3779
3780     for(i=1; i<context_count; i++){
3781         update_duplicate_context_after_me(s->thread_context[i], s);
3782     }
3783     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3784     for(i=1; i<context_count; i++){
3785         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3786             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-BUF_BITS));
3787         merge_context_after_encode(s, s->thread_context[i]);
3788     }
3789     emms_c();
3790     return 0;
3791 }
3792
3793 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3794     const int intra= s->mb_intra;
3795     int i;
3796
3797     s->dct_count[intra]++;
3798
3799     for(i=0; i<64; i++){
3800         int level= block[i];
3801
3802         if(level){
3803             if(level>0){
3804                 s->dct_error_sum[intra][i] += level;
3805                 level -= s->dct_offset[intra][i];
3806                 if(level<0) level=0;
3807             }else{
3808                 s->dct_error_sum[intra][i] -= level;
3809                 level += s->dct_offset[intra][i];
3810                 if(level>0) level=0;
3811             }
3812             block[i]= level;
3813         }
3814     }
3815 }
3816
3817 static int dct_quantize_trellis_c(MpegEncContext *s,
3818                                   int16_t *block, int n,
3819                                   int qscale, int *overflow){
3820     const int *qmat;
3821     const uint16_t *matrix;
3822     const uint8_t *scantable;
3823     const uint8_t *perm_scantable;
3824     int max=0;
3825     unsigned int threshold1, threshold2;
3826     int bias=0;
3827     int run_tab[65];
3828     int level_tab[65];
3829     int score_tab[65];
3830     int survivor[65];
3831     int survivor_count;
3832     int last_run=0;
3833     int last_level=0;
3834     int last_score= 0;
3835     int last_i;
3836     int coeff[2][64];
3837     int coeff_count[64];
3838     int qmul, qadd, start_i, last_non_zero, i, dc;
3839     const int esc_length= s->ac_esc_length;
3840     uint8_t * length;
3841     uint8_t * last_length;
3842     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3843     int mpeg2_qscale;
3844
3845     s->fdsp.fdct(block);
3846
3847     if(s->dct_error_sum)
3848         s->denoise_dct(s, block);
3849     qmul= qscale*16;
3850     qadd= ((qscale-1)|1)*8;
3851
3852     if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
3853     else                 mpeg2_qscale = qscale << 1;
3854
3855     if (s->mb_intra) {
3856         int q;
3857         scantable= s->intra_scantable.scantable;
3858         perm_scantable= s->intra_scantable.permutated;
3859         if (!s->h263_aic) {
3860             if (n < 4)
3861                 q = s->y_dc_scale;
3862             else
3863                 q = s->c_dc_scale;
3864             q = q << 3;
3865         } else{
3866             /* For AIC we skip quant/dequant of INTRADC */
3867             q = 1 << 3;
3868             qadd=0;
3869         }
3870
3871         /* note: block[0] is assumed to be positive */
3872         block[0] = (block[0] + (q >> 1)) / q;
3873         start_i = 1;
3874         last_non_zero = 0;
3875         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3876         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3877         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3878             bias= 1<<(QMAT_SHIFT-1);
3879
3880         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3881             length     = s->intra_chroma_ac_vlc_length;
3882             last_length= s->intra_chroma_ac_vlc_last_length;
3883         } else {
3884             length     = s->intra_ac_vlc_length;
3885             last_length= s->intra_ac_vlc_last_length;
3886         }
3887     } else {
3888         scantable= s->inter_scantable.scantable;
3889         perm_scantable= s->inter_scantable.permutated;
3890         start_i = 0;
3891         last_non_zero = -1;
3892         qmat = s->q_inter_matrix[qscale];
3893         matrix = s->inter_matrix;
3894         length     = s->inter_ac_vlc_length;
3895         last_length= s->inter_ac_vlc_last_length;
3896     }
3897     last_i= start_i;
3898
3899     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3900     threshold2= (threshold1<<1);
3901
3902     for(i=63; i>=start_i; i--) {
3903         const int j = scantable[i];
3904         int level = block[j] * qmat[j];
3905
3906         if(((unsigned)(level+threshold1))>threshold2){
3907             last_non_zero = i;
3908             break;
3909         }
3910     }
3911
3912     for(i=start_i; i<=last_non_zero; i++) {
3913         const int j = scantable[i];
3914         int level = block[j] * qmat[j];
3915
3916 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3917 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3918         if(((unsigned)(level+threshold1))>threshold2){
3919             if(level>0){
3920                 level= (bias + level)>>QMAT_SHIFT;
3921                 coeff[0][i]= level;
3922                 coeff[1][i]= level-1;
3923 //                coeff[2][k]= level-2;
3924             }else{
3925                 level= (bias - level)>>QMAT_SHIFT;
3926                 coeff[0][i]= -level;
3927                 coeff[1][i]= -level+1;
3928 //                coeff[2][k]= -level+2;
3929             }
3930             coeff_count[i]= FFMIN(level, 2);
3931             av_assert2(coeff_count[i]);
3932             max |=level;
3933         }else{
3934             coeff[0][i]= (level>>31)|1;
3935             coeff_count[i]= 1;
3936         }
3937     }
3938
3939     *overflow= s->max_qcoeff < max; //overflow might have happened
3940
3941     if(last_non_zero < start_i){
3942         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3943         return last_non_zero;
3944     }
3945
3946     score_tab[start_i]= 0;
3947     survivor[0]= start_i;
3948     survivor_count= 1;
3949
3950     for(i=start_i; i<=last_non_zero; i++){
3951         int level_index, j, zero_distortion;
3952         int dct_coeff= FFABS(block[ scantable[i] ]);
3953         int best_score=256*256*256*120;
3954
3955         if (s->fdsp.fdct == ff_fdct_ifast)
3956             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3957         zero_distortion= dct_coeff*dct_coeff;
3958
3959         for(level_index=0; level_index < coeff_count[i]; level_index++){
3960             int distortion;
3961             int level= coeff[level_index][i];
3962             const int alevel= FFABS(level);
3963             int unquant_coeff;
3964
3965             av_assert2(level);
3966
3967             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3968                 unquant_coeff= alevel*qmul + qadd;
3969             } else if(s->out_format == FMT_MJPEG) {
3970                 j = s->idsp.idct_permutation[scantable[i]];
3971                 unquant_coeff = alevel * matrix[j] * 8;
3972             }else{ // MPEG-1
3973                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3974                 if(s->mb_intra){
3975                         unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
3976                         unquant_coeff =   (unquant_coeff - 1) | 1;
3977                 }else{
3978                         unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
3979                         unquant_coeff =   (unquant_coeff - 1) | 1;
3980                 }
3981                 unquant_coeff<<= 3;
3982             }
3983
3984             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3985             level+=64;
3986             if((level&(~127)) == 0){
3987                 for(j=survivor_count-1; j>=0; j--){
3988                     int run= i - survivor[j];
3989                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3990                     score += score_tab[i-run];
3991
3992                     if(score < best_score){
3993                         best_score= score;
3994                         run_tab[i+1]= run;
3995                         level_tab[i+1]= level-64;
3996                     }
3997                 }
3998
3999                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4000                     for(j=survivor_count-1; j>=0; j--){
4001                         int run= i - survivor[j];
4002                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4003                         score += score_tab[i-run];
4004                         if(score < last_score){
4005                             last_score= score;
4006                             last_run= run;
4007                             last_level= level-64;
4008                             last_i= i+1;
4009                         }
4010                     }
4011                 }
4012             }else{
4013                 distortion += esc_length*lambda;
4014                 for(j=survivor_count-1; j>=0; j--){
4015                     int run= i - survivor[j];
4016                     int score= distortion + score_tab[i-run];
4017
4018                     if(score < best_score){
4019                         best_score= score;
4020                         run_tab[i+1]= run;
4021                         level_tab[i+1]= level-64;
4022                     }
4023                 }
4024
4025                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4026                   for(j=survivor_count-1; j>=0; j--){
4027                         int run= i - survivor[j];
4028                         int score= distortion + score_tab[i-run];
4029                         if(score < last_score){
4030                             last_score= score;
4031                             last_run= run;
4032                             last_level= level-64;
4033                             last_i= i+1;
4034                         }
4035                     }
4036                 }
4037             }
4038         }
4039
4040         score_tab[i+1]= best_score;
4041
4042         // Note: there is a vlc code in MPEG-4 which is 1 bit shorter then another one with a shorter run and the same level
4043         if(last_non_zero <= 27){
4044             for(; survivor_count; survivor_count--){
4045                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4046                     break;
4047             }
4048         }else{
4049             for(; survivor_count; survivor_count--){
4050                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4051                     break;
4052             }
4053         }
4054
4055         survivor[ survivor_count++ ]= i+1;
4056     }
4057
4058     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4059         last_score= 256*256*256*120;
4060         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4061             int score= score_tab[i];
4062             if (i)
4063                 score += lambda * 2; // FIXME more exact?
4064
4065             if(score < last_score){
4066                 last_score= score;
4067                 last_i= i;
4068                 last_level= level_tab[i];
4069                 last_run= run_tab[i];
4070             }
4071         }
4072     }
4073
4074     s->coded_score[n] = last_score;
4075
4076     dc= FFABS(block[0]);
4077     last_non_zero= last_i - 1;
4078     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4079
4080     if(last_non_zero < start_i)
4081         return last_non_zero;
4082
4083     if(last_non_zero == 0 && start_i == 0){
4084         int best_level= 0;
4085         int best_score= dc * dc;
4086
4087         for(i=0; i<coeff_count[0]; i++){
4088             int level= coeff[i][0];
4089             int alevel= FFABS(level);
4090             int unquant_coeff, score, distortion;
4091
4092             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4093                     unquant_coeff= (alevel*qmul + qadd)>>3;
4094             } else{ // MPEG-1
4095                     unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
4096                     unquant_coeff =   (unquant_coeff - 1) | 1;
4097             }
4098             unquant_coeff = (unquant_coeff + 4) >> 3;
4099             unquant_coeff<<= 3 + 3;
4100
4101             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4102             level+=64;
4103             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4104             else                    score= distortion + esc_length*lambda;
4105
4106             if(score < best_score){
4107                 best_score= score;
4108                 best_level= level - 64;
4109             }
4110         }
4111         block[0]= best_level;
4112         s->coded_score[n] = best_score - dc*dc;
4113         if(best_level == 0) return -1;
4114         else                return last_non_zero;
4115     }
4116
4117     i= last_i;
4118     av_assert2(last_level);
4119
4120     block[ perm_scantable[last_non_zero] ]= last_level;
4121     i -= last_run + 1;
4122
4123     for(; i>start_i; i -= run_tab[i] + 1){
4124         block[ perm_scantable[i-1] ]= level_tab[i];
4125     }
4126
4127     return last_non_zero;
4128 }
4129
4130 static int16_t basis[64][64];
4131
4132 static void build_basis(uint8_t *perm){
4133     int i, j, x, y;
4134     emms_c();
4135     for(i=0; i<8; i++){
4136         for(j=0; j<8; j++){
4137             for(y=0; y<8; y++){
4138                 for(x=0; x<8; x++){
4139                     double s= 0.25*(1<<BASIS_SHIFT);
4140                     int index= 8*i + j;
4141                     int perm_index= perm[index];
4142                     if(i==0) s*= sqrt(0.5);
4143                     if(j==0) s*= sqrt(0.5);
4144                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4145                 }
4146             }
4147         }
4148     }
4149 }
4150
4151 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4152                         int16_t *block, int16_t *weight, int16_t *orig,
4153                         int n, int qscale){
4154     int16_t rem[64];
4155     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4156     const uint8_t *scantable;
4157     const uint8_t *perm_scantable;
4158 //    unsigned int threshold1, threshold2;
4159 //    int bias=0;
4160     int run_tab[65];
4161     int prev_run=0;
4162     int prev_level=0;
4163     int qmul, qadd, start_i, last_non_zero, i, dc;
4164     uint8_t * length;
4165     uint8_t * last_length;
4166     int lambda;
4167     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4168
4169     if(basis[0][0] == 0)
4170         build_basis(s->idsp.idct_permutation);
4171
4172     qmul= qscale*2;
4173     qadd= (qscale-1)|1;
4174     if (s->mb_intra) {
4175         scantable= s->intra_scantable.scantable;
4176         perm_scantable= s->intra_scantable.permutated;
4177         if (!s->h263_aic) {
4178             if (n < 4)
4179                 q = s->y_dc_scale;
4180             else
4181                 q = s->c_dc_scale;
4182         } else{
4183             /* For AIC we skip quant/dequant of INTRADC */
4184             q = 1;
4185             qadd=0;
4186         }
4187         q <<= RECON_SHIFT-3;
4188         /* note: block[0] is assumed to be positive */
4189         dc= block[0]*q;
4190 //        block[0] = (block[0] + (q >> 1)) / q;
4191         start_i = 1;
4192 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4193 //            bias= 1<<(QMAT_SHIFT-1);
4194         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4195             length     = s->intra_chroma_ac_vlc_length;
4196             last_length= s->intra_chroma_ac_vlc_last_length;
4197         } else {
4198             length     = s->intra_ac_vlc_length;
4199             last_length= s->intra_ac_vlc_last_length;
4200         }
4201     } else {
4202         scantable= s->inter_scantable.scantable;
4203         perm_scantable= s->inter_scantable.permutated;
4204         dc= 0;
4205         start_i = 0;
4206         length     = s->inter_ac_vlc_length;
4207         last_length= s->inter_ac_vlc_last_length;
4208     }
4209     last_non_zero = s->block_last_index[n];
4210
4211     dc += (1<<(RECON_SHIFT-1));
4212     for(i=0; i<64; i++){
4213         rem[i] = dc - (orig[i] << RECON_SHIFT); // FIXME use orig directly instead of copying to rem[]
4214     }
4215
4216     sum=0;
4217     for(i=0; i<64; i++){
4218         int one= 36;
4219         int qns=4;
4220         int w;
4221
4222         w= FFABS(weight[i]) + qns*one;
4223         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4224
4225         weight[i] = w;
4226 //        w=weight[i] = (63*qns + (w/2)) / w;
4227
4228         av_assert2(w>0);
4229         av_assert2(w<(1<<6));
4230         sum += w*w;
4231     }
4232     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4233
4234     run=0;
4235     rle_index=0;
4236     for(i=start_i; i<=last_non_zero; i++){
4237         int j= perm_scantable[i];
4238         const int level= block[j];
4239         int coeff;
4240
4241         if(level){
4242             if(level<0) coeff= qmul*level - qadd;
4243             else        coeff= qmul*level + qadd;
4244             run_tab[rle_index++]=run;
4245             run=0;
4246
4247             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4248         }else{
4249             run++;
4250         }
4251     }
4252
4253     for(;;){
4254         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4255         int best_coeff=0;
4256         int best_change=0;
4257         int run2, best_unquant_change=0, analyze_gradient;
4258         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4259
4260         if(analyze_gradient){
4261             for(i=0; i<64; i++){
4262                 int w= weight[i];
4263
4264                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4265             }
4266             s->fdsp.fdct(d1);
4267         }
4268
4269         if(start_i){
4270             const int level= block[0];
4271             int change, old_coeff;
4272
4273             av_assert2(s->mb_intra);
4274
4275             old_coeff= q*level;
4276
4277             for(change=-1; change<=1; change+=2){
4278                 int new_level= level + change;
4279                 int score, new_coeff;
4280
4281                 new_coeff= q*new_level;
4282                 if(new_coeff >= 2048 || new_coeff < 0)
4283                     continue;
4284
4285                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4286                                                   new_coeff - old_coeff);
4287                 if(score<best_score){
4288                     best_score= score;
4289                     best_coeff= 0;
4290                     best_change= change;
4291                     best_unquant_change= new_coeff - old_coeff;
4292                 }
4293             }
4294         }
4295
4296         run=0;
4297         rle_index=0;
4298         run2= run_tab[rle_index++];
4299         prev_level=0;
4300         prev_run=0;
4301
4302         for(i=start_i; i<64; i++){
4303             int j= perm_scantable[i];
4304             const int level= block[j];
4305             int change, old_coeff;
4306
4307             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4308                 break;
4309
4310             if(level){
4311                 if(level<0) old_coeff= qmul*level - qadd;
4312                 else        old_coeff= qmul*level + qadd;
4313                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4314             }else{
4315                 old_coeff=0;
4316                 run2--;
4317                 av_assert2(run2>=0 || i >= last_non_zero );
4318             }
4319
4320             for(change=-1; change<=1; change+=2){
4321                 int new_level= level + change;
4322                 int score, new_coeff, unquant_change;
4323
4324                 score=0;
4325                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4326                    continue;
4327
4328                 if(new_level){
4329                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4330                     else            new_coeff= qmul*new_level + qadd;
4331                     if(new_coeff >= 2048 || new_coeff <= -2048)
4332                         continue;
4333                     //FIXME check for overflow
4334
4335                     if(level){
4336                         if(level < 63 && level > -63){
4337                             if(i < last_non_zero)
4338                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4339                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4340                             else
4341                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4342                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4343                         }
4344                     }else{
4345                         av_assert2(FFABS(new_level)==1);
4346
4347                         if(analyze_gradient){
4348                             int g= d1[ scantable[i] ];
4349                             if(g && (g^new_level) >= 0)
4350                                 continue;
4351                         }
4352
4353                         if(i < last_non_zero){
4354                             int next_i= i + run2 + 1;
4355                             int next_level= block[ perm_scantable[next_i] ] + 64;
4356
4357                             if(next_level&(~127))
4358                                 next_level= 0;
4359
4360                             if(next_i < last_non_zero)
4361                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4362                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4363                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4364                             else
4365                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4366                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4367                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4368                         }else{
4369                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4370                             if(prev_level){
4371                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4372                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4373                             }
4374                         }
4375                     }
4376                 }else{
4377                     new_coeff=0;
4378                     av_assert2(FFABS(level)==1);
4379
4380                     if(i < last_non_zero){
4381                         int next_i= i + run2 + 1;
4382                         int next_level= block[ perm_scantable[next_i] ] + 64;
4383
4384                         if(next_level&(~127))
4385                             next_level= 0;
4386
4387                         if(next_i < last_non_zero)
4388                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4389                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4390                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4391                         else
4392                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4393                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4394                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4395                     }else{
4396                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4397                         if(prev_level){
4398                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4399                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4400                         }
4401                     }
4402                 }
4403
4404                 score *= lambda;
4405
4406                 unquant_change= new_coeff - old_coeff;
4407                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4408
4409                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4410                                                    unquant_change);
4411                 if(score<best_score){
4412                     best_score= score;
4413                     best_coeff= i;
4414                     best_change= change;
4415                     best_unquant_change= unquant_change;
4416                 }
4417             }
4418             if(level){
4419                 prev_level= level + 64;
4420                 if(prev_level&(~127))
4421                     prev_level= 0;
4422                 prev_run= run;
4423                 run=0;
4424             }else{
4425                 run++;
4426             }
4427         }
4428
4429         if(best_change){
4430             int j= perm_scantable[ best_coeff ];
4431
4432             block[j] += best_change;
4433
4434             if(best_coeff > last_non_zero){
4435                 last_non_zero= best_coeff;
4436                 av_assert2(block[j]);
4437             }else{
4438                 for(; last_non_zero>=start_i; last_non_zero--){
4439                     if(block[perm_scantable[last_non_zero]])
4440                         break;
4441                 }
4442             }
4443
4444             run=0;
4445             rle_index=0;
4446             for(i=start_i; i<=last_non_zero; i++){
4447                 int j= perm_scantable[i];
4448                 const int level= block[j];
4449
4450                  if(level){
4451                      run_tab[rle_index++]=run;
4452                      run=0;
4453                  }else{
4454                      run++;
4455                  }
4456             }
4457
4458             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4459         }else{
4460             break;
4461         }
4462     }
4463
4464     return last_non_zero;
4465 }
4466
4467 /**
4468  * Permute an 8x8 block according to permutation.
4469  * @param block the block which will be permuted according to
4470  *              the given permutation vector
4471  * @param permutation the permutation vector
4472  * @param last the last non zero coefficient in scantable order, used to
4473  *             speed the permutation up
4474  * @param scantable the used scantable, this is only used to speed the
4475  *                  permutation up, the block is not (inverse) permutated
4476  *                  to scantable order!
4477  */
4478 void ff_block_permute(int16_t *block, uint8_t *permutation,
4479                       const uint8_t *scantable, int last)
4480 {
4481     int i;
4482     int16_t temp[64];
4483
4484     if (last <= 0)
4485         return;
4486     //FIXME it is ok but not clean and might fail for some permutations
4487     // if (permutation[1] == 1)
4488     // return;
4489
4490     for (i = 0; i <= last; i++) {
4491         const int j = scantable[i];
4492         temp[j] = block[j];
4493         block[j] = 0;
4494     }
4495
4496     for (i = 0; i <= last; i++) {
4497         const int j = scantable[i];
4498         const int perm_j = permutation[j];
4499         block[perm_j] = temp[j];
4500     }
4501 }
4502
4503 int ff_dct_quantize_c(MpegEncContext *s,
4504                         int16_t *block, int n,
4505                         int qscale, int *overflow)
4506 {
4507     int i, j, level, last_non_zero, q, start_i;
4508     const int *qmat;
4509     const uint8_t *scantable;
4510     int bias;
4511     int max=0;
4512     unsigned int threshold1, threshold2;
4513
4514     s->fdsp.fdct(block);
4515
4516     if(s->dct_error_sum)
4517         s->denoise_dct(s, block);
4518
4519     if (s->mb_intra) {
4520         scantable= s->intra_scantable.scantable;
4521         if (!s->h263_aic) {
4522             if (n < 4)
4523                 q = s->y_dc_scale;
4524             else
4525                 q = s->c_dc_scale;
4526             q = q << 3;
4527         } else
4528             /* For AIC we skip quant/dequant of INTRADC */
4529             q = 1 << 3;
4530
4531         /* note: block[0] is assumed to be positive */
4532         block[0] = (block[0] + (q >> 1)) / q;
4533         start_i = 1;
4534         last_non_zero = 0;
4535         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4536         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4537     } else {
4538         scantable= s->inter_scantable.scantable;
4539         start_i = 0;
4540         last_non_zero = -1;
4541         qmat = s->q_inter_matrix[qscale];
4542         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4543     }
4544     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4545     threshold2= (threshold1<<1);
4546     for(i=63;i>=start_i;i--) {
4547         j = scantable[i];
4548         level = block[j] * qmat[j];
4549
4550         if(((unsigned)(level+threshold1))>threshold2){
4551             last_non_zero = i;
4552             break;
4553         }else{
4554             block[j]=0;
4555         }
4556     }
4557     for(i=start_i; i<=last_non_zero; i++) {
4558         j = scantable[i];
4559         level = block[j] * qmat[j];
4560
4561 //        if(   bias+level >= (1<<QMAT_SHIFT)
4562 //           || bias-level >= (1<<QMAT_SHIFT)){
4563         if(((unsigned)(level+threshold1))>threshold2){
4564             if(level>0){
4565                 level= (bias + level)>>QMAT_SHIFT;
4566                 block[j]= level;
4567             }else{
4568                 level= (bias - level)>>QMAT_SHIFT;
4569                 block[j]= -level;
4570             }
4571             max |=level;
4572         }else{
4573             block[j]=0;
4574         }
4575     }
4576     *overflow= s->max_qcoeff < max; //overflow might have happened
4577
4578     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4579     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4580         ff_block_permute(block, s->idsp.idct_permutation,
4581                       scantable, last_non_zero);
4582
4583     return last_non_zero;
4584 }
4585
4586 #define OFFSET(x) offsetof(MpegEncContext, x)
4587 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4588 static const AVOption h263_options[] = {
4589     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4590     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4591     FF_MPV_COMMON_OPTS
4592 #if FF_API_MPEGVIDEO_OPTS
4593     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
4594     FF_MPV_DEPRECATED_A53_CC_OPT
4595     FF_MPV_DEPRECATED_MATRIX_OPT
4596     FF_MPV_DEPRECATED_BFRAME_OPTS
4597 #endif
4598     { NULL },
4599 };
4600
4601 static const AVClass h263_class = {
4602     .class_name = "H.263 encoder",
4603     .item_name  = av_default_item_name,
4604     .option     = h263_options,
4605     .version    = LIBAVUTIL_VERSION_INT,
4606 };
4607
4608 AVCodec ff_h263_encoder = {
4609     .name           = "h263",
4610     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4611     .type           = AVMEDIA_TYPE_VIDEO,
4612     .id             = AV_CODEC_ID_H263,
4613     .priv_data_size = sizeof(MpegEncContext),
4614     .init           = ff_mpv_encode_init,
4615     .encode2        = ff_mpv_encode_picture,
4616     .close          = ff_mpv_encode_end,
4617     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4618     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4619     .priv_class     = &h263_class,
4620 };
4621
4622 static const AVOption h263p_options[] = {
4623     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus),       AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4624     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4625     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4626     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE},
4627     FF_MPV_COMMON_OPTS
4628 #if FF_API_MPEGVIDEO_OPTS
4629     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
4630     FF_MPV_DEPRECATED_A53_CC_OPT
4631     FF_MPV_DEPRECATED_MATRIX_OPT
4632     FF_MPV_DEPRECATED_BFRAME_OPTS
4633 #endif
4634     { NULL },
4635 };
4636 static const AVClass h263p_class = {
4637     .class_name = "H.263p encoder",
4638     .item_name  = av_default_item_name,
4639     .option     = h263p_options,
4640     .version    = LIBAVUTIL_VERSION_INT,
4641 };
4642
4643 AVCodec ff_h263p_encoder = {
4644     .name           = "h263p",
4645     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4646     .type           = AVMEDIA_TYPE_VIDEO,
4647     .id             = AV_CODEC_ID_H263P,
4648     .priv_data_size = sizeof(MpegEncContext),
4649     .init           = ff_mpv_encode_init,
4650     .encode2        = ff_mpv_encode_picture,
4651     .close          = ff_mpv_encode_end,
4652     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4653     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4654     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4655     .priv_class     = &h263p_class,
4656 };
4657
4658 static const AVClass msmpeg4v2_class = {
4659     .class_name = "msmpeg4v2 encoder",
4660     .item_name  = av_default_item_name,
4661     .option     = ff_mpv_generic_options,
4662     .version    = LIBAVUTIL_VERSION_INT,
4663 };
4664
4665 AVCodec ff_msmpeg4v2_encoder = {
4666     .name           = "msmpeg4v2",
4667     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4668     .type           = AVMEDIA_TYPE_VIDEO,
4669     .id             = AV_CODEC_ID_MSMPEG4V2,
4670     .priv_data_size = sizeof(MpegEncContext),
4671     .init           = ff_mpv_encode_init,
4672     .encode2        = ff_mpv_encode_picture,
4673     .close          = ff_mpv_encode_end,
4674     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4675     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4676     .priv_class     = &msmpeg4v2_class,
4677 };
4678
4679 static const AVClass msmpeg4v3_class = {
4680     .class_name = "msmpeg4v3 encoder",
4681     .item_name  = av_default_item_name,
4682     .option     = ff_mpv_generic_options,
4683     .version    = LIBAVUTIL_VERSION_INT,
4684 };
4685
4686 AVCodec ff_msmpeg4v3_encoder = {
4687     .name           = "msmpeg4",
4688     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4689     .type           = AVMEDIA_TYPE_VIDEO,
4690     .id             = AV_CODEC_ID_MSMPEG4V3,
4691     .priv_data_size = sizeof(MpegEncContext),
4692     .init           = ff_mpv_encode_init,
4693     .encode2        = ff_mpv_encode_picture,
4694     .close          = ff_mpv_encode_end,
4695     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4696     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4697     .priv_class     = &msmpeg4v3_class,
4698 };
4699
4700 static const AVClass wmv1_class = {
4701     .class_name = "wmv1 encoder",
4702     .item_name  = av_default_item_name,
4703     .option     = ff_mpv_generic_options,
4704     .version    = LIBAVUTIL_VERSION_INT,
4705 };
4706
4707 AVCodec ff_wmv1_encoder = {
4708     .name           = "wmv1",
4709     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4710     .type           = AVMEDIA_TYPE_VIDEO,
4711     .id             = AV_CODEC_ID_WMV1,
4712     .priv_data_size = sizeof(MpegEncContext),
4713     .init           = ff_mpv_encode_init,
4714     .encode2        = ff_mpv_encode_picture,
4715     .close          = ff_mpv_encode_end,
4716     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4717     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4718     .priv_class     = &wmv1_class,
4719 };