]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
avcodec: Remove private options from AVCodecContext
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /*
26  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
27  */
28
29 /**
30  * @file
31  * The simplest mpeg encoder (well, it was the simplest!).
32  */
33
34 #include <stdint.h>
35
36 #include "libavutil/internal.h"
37 #include "libavutil/intmath.h"
38 #include "libavutil/mathematics.h"
39 #include "libavutil/mem_internal.h"
40 #include "libavutil/pixdesc.h"
41 #include "libavutil/opt.h"
42 #include "libavutil/thread.h"
43 #include "avcodec.h"
44 #include "dct.h"
45 #include "idctdsp.h"
46 #include "mpeg12.h"
47 #include "mpegvideo.h"
48 #include "mpegvideodata.h"
49 #include "h261.h"
50 #include "h263.h"
51 #include "h263data.h"
52 #include "mjpegenc_common.h"
53 #include "mathops.h"
54 #include "mpegutils.h"
55 #include "mjpegenc.h"
56 #include "speedhqenc.h"
57 #include "msmpeg4.h"
58 #include "pixblockdsp.h"
59 #include "qpeldsp.h"
60 #include "faandct.h"
61 #include "thread.h"
62 #include "aandcttab.h"
63 #include "flv.h"
64 #include "mpeg4video.h"
65 #include "internal.h"
66 #include "bytestream.h"
67 #include "wmv2.h"
68 #include "rv10.h"
69 #include "packet_internal.h"
70 #include <limits.h>
71 #include "sp5x.h"
72
73 #define QUANT_BIAS_SHIFT 8
74
75 #define QMAT_SHIFT_MMX 16
76 #define QMAT_SHIFT 21
77
78 static int encode_picture(MpegEncContext *s, int picture_number);
79 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
80 static int sse_mb(MpegEncContext *s);
81 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
82 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
83
84 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_DMV * 2 + 1];
85 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
86
87 const AVOption ff_mpv_generic_options[] = {
88     FF_MPV_COMMON_OPTS
89 #if FF_API_MPEGVIDEO_OPTS
90     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
91     FF_MPV_DEPRECATED_A53_CC_OPT
92     FF_MPV_DEPRECATED_MATRIX_OPT
93     FF_MPV_DEPRECATED_BFRAME_OPTS
94 #endif
95     { NULL },
96 };
97
98 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
99                        uint16_t (*qmat16)[2][64],
100                        const uint16_t *quant_matrix,
101                        int bias, int qmin, int qmax, int intra)
102 {
103     FDCTDSPContext *fdsp = &s->fdsp;
104     int qscale;
105     int shift = 0;
106
107     for (qscale = qmin; qscale <= qmax; qscale++) {
108         int i;
109         int qscale2;
110
111         if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
112         else                 qscale2 = qscale << 1;
113
114         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
115 #if CONFIG_FAANDCT
116             fdsp->fdct == ff_faandct            ||
117 #endif /* CONFIG_FAANDCT */
118             fdsp->fdct == ff_jpeg_fdct_islow_10) {
119             for (i = 0; i < 64; i++) {
120                 const int j = s->idsp.idct_permutation[i];
121                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
122                 /* 16 <= qscale * quant_matrix[i] <= 7905
123                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
124                  *             19952 <=              x  <= 249205026
125                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
126                  *           3444240 >= (1 << 36) / (x) >= 275 */
127
128                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
129             }
130         } else if (fdsp->fdct == ff_fdct_ifast) {
131             for (i = 0; i < 64; i++) {
132                 const int j = s->idsp.idct_permutation[i];
133                 int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
134                 /* 16 <= qscale * quant_matrix[i] <= 7905
135                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
136                  *             19952 <=              x  <= 249205026
137                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
138                  *           3444240 >= (1 << 36) / (x) >= 275 */
139
140                 qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
141             }
142         } else {
143             for (i = 0; i < 64; i++) {
144                 const int j = s->idsp.idct_permutation[i];
145                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
146                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
147                  * Assume x = qscale * quant_matrix[i]
148                  * So             16 <=              x  <= 7905
149                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
150                  * so          32768 >= (1 << 19) / (x) >= 67 */
151                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
152                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
153                 //                    (qscale * quant_matrix[i]);
154                 qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
155
156                 if (qmat16[qscale][0][i] == 0 ||
157                     qmat16[qscale][0][i] == 128 * 256)
158                     qmat16[qscale][0][i] = 128 * 256 - 1;
159                 qmat16[qscale][1][i] =
160                     ROUNDED_DIV(bias * (1<<(16 - QUANT_BIAS_SHIFT)),
161                                 qmat16[qscale][0][i]);
162             }
163         }
164
165         for (i = intra; i < 64; i++) {
166             int64_t max = 8191;
167             if (fdsp->fdct == ff_fdct_ifast) {
168                 max = (8191LL * ff_aanscales[i]) >> 14;
169             }
170             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
171                 shift++;
172             }
173         }
174     }
175     if (shift) {
176         av_log(s->avctx, AV_LOG_INFO,
177                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
178                QMAT_SHIFT - shift);
179     }
180 }
181
182 static inline void update_qscale(MpegEncContext *s)
183 {
184     if (s->q_scale_type == 1 && 0) {
185         int i;
186         int bestdiff=INT_MAX;
187         int best = 1;
188
189         for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
190             int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
191             if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
192                 (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
193                 continue;
194             if (diff < bestdiff) {
195                 bestdiff = diff;
196                 best = i;
197             }
198         }
199         s->qscale = best;
200     } else {
201         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
202                     (FF_LAMBDA_SHIFT + 7);
203         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
204     }
205
206     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
207                  FF_LAMBDA_SHIFT;
208 }
209
210 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
211 {
212     int i;
213
214     if (matrix) {
215         put_bits(pb, 1, 1);
216         for (i = 0; i < 64; i++) {
217             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
218         }
219     } else
220         put_bits(pb, 1, 0);
221 }
222
223 /**
224  * init s->current_picture.qscale_table from s->lambda_table
225  */
226 void ff_init_qscale_tab(MpegEncContext *s)
227 {
228     int8_t * const qscale_table = s->current_picture.qscale_table;
229     int i;
230
231     for (i = 0; i < s->mb_num; i++) {
232         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
233         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
234         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
235                                                   s->avctx->qmax);
236     }
237 }
238
239 static void update_duplicate_context_after_me(MpegEncContext *dst,
240                                               MpegEncContext *src)
241 {
242 #define COPY(a) dst->a= src->a
243     COPY(pict_type);
244     COPY(current_picture);
245     COPY(f_code);
246     COPY(b_code);
247     COPY(qscale);
248     COPY(lambda);
249     COPY(lambda2);
250     COPY(picture_in_gop_number);
251     COPY(gop_picture_number);
252     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
253     COPY(progressive_frame);    // FIXME don't set in encode_header
254     COPY(partitioned_frame);    // FIXME don't set in encode_header
255 #undef COPY
256 }
257
258 static void mpv_encode_init_static(void)
259 {
260    for (int i = -16; i < 16; i++)
261         default_fcode_tab[i + MAX_MV] = 1;
262 }
263
264 /**
265  * Set the given MpegEncContext to defaults for encoding.
266  * the changed fields will not depend upon the prior state of the MpegEncContext.
267  */
268 static void mpv_encode_defaults(MpegEncContext *s)
269 {
270     static AVOnce init_static_once = AV_ONCE_INIT;
271
272     ff_mpv_common_defaults(s);
273
274     ff_thread_once(&init_static_once, mpv_encode_init_static);
275
276     s->me.mv_penalty = default_mv_penalty;
277     s->fcode_tab     = default_fcode_tab;
278
279     s->input_picture_number  = 0;
280     s->picture_in_gop_number = 0;
281 }
282
283 av_cold int ff_dct_encode_init(MpegEncContext *s)
284 {
285     if (ARCH_X86)
286         ff_dct_encode_init_x86(s);
287
288     if (CONFIG_H263_ENCODER)
289         ff_h263dsp_init(&s->h263dsp);
290     if (!s->dct_quantize)
291         s->dct_quantize = ff_dct_quantize_c;
292     if (!s->denoise_dct)
293         s->denoise_dct  = denoise_dct_c;
294     s->fast_dct_quantize = s->dct_quantize;
295     if (s->avctx->trellis)
296         s->dct_quantize  = dct_quantize_trellis_c;
297
298     return 0;
299 }
300
301 /* init video encoder */
302 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
303 {
304     MpegEncContext *s = avctx->priv_data;
305     AVCPBProperties *cpb_props;
306     int i, ret;
307
308     mpv_encode_defaults(s);
309
310     switch (avctx->pix_fmt) {
311     case AV_PIX_FMT_YUVJ444P:
312     case AV_PIX_FMT_YUV444P:
313         s->chroma_format = CHROMA_444;
314         break;
315     case AV_PIX_FMT_YUVJ422P:
316     case AV_PIX_FMT_YUV422P:
317         s->chroma_format = CHROMA_422;
318         break;
319     case AV_PIX_FMT_YUVJ420P:
320     case AV_PIX_FMT_YUV420P:
321     default:
322         s->chroma_format = CHROMA_420;
323         break;
324     }
325
326     avctx->bits_per_raw_sample = av_clip(avctx->bits_per_raw_sample, 0, 8);
327
328     s->bit_rate = avctx->bit_rate;
329     s->width    = avctx->width;
330     s->height   = avctx->height;
331     if (avctx->gop_size > 600 &&
332         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
333         av_log(avctx, AV_LOG_WARNING,
334                "keyframe interval too large!, reducing it from %d to %d\n",
335                avctx->gop_size, 600);
336         avctx->gop_size = 600;
337     }
338     s->gop_size     = avctx->gop_size;
339     s->avctx        = avctx;
340     if (avctx->max_b_frames > MAX_B_FRAMES) {
341         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
342                "is %d.\n", MAX_B_FRAMES);
343         avctx->max_b_frames = MAX_B_FRAMES;
344     }
345     s->max_b_frames = avctx->max_b_frames;
346     s->codec_id     = avctx->codec->id;
347     s->strict_std_compliance = avctx->strict_std_compliance;
348     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
349     s->rtp_mode           = !!s->rtp_payload_size;
350     s->intra_dc_precision = avctx->intra_dc_precision;
351
352     // workaround some differences between how applications specify dc precision
353     if (s->intra_dc_precision < 0) {
354         s->intra_dc_precision += 8;
355     } else if (s->intra_dc_precision >= 8)
356         s->intra_dc_precision -= 8;
357
358     if (s->intra_dc_precision < 0) {
359         av_log(avctx, AV_LOG_ERROR,
360                 "intra dc precision must be positive, note some applications use"
361                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
362         return AVERROR(EINVAL);
363     }
364
365     if (avctx->codec_id == AV_CODEC_ID_AMV || (avctx->active_thread_type & FF_THREAD_SLICE))
366         s->huffman = 0;
367
368     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
369         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
370         return AVERROR(EINVAL);
371     }
372     s->user_specified_pts = AV_NOPTS_VALUE;
373
374     if (s->gop_size <= 1) {
375         s->intra_only = 1;
376         s->gop_size   = 12;
377     } else {
378         s->intra_only = 0;
379     }
380
381     /* Fixed QSCALE */
382     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
383
384     s->adaptive_quant = (avctx->lumi_masking ||
385                          avctx->dark_masking ||
386                          avctx->temporal_cplx_masking ||
387                          avctx->spatial_cplx_masking  ||
388                          avctx->p_masking      ||
389                          s->border_masking ||
390                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
391                         !s->fixed_qscale;
392
393     s->loop_filter = !!(avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
394
395     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
396         switch(avctx->codec_id) {
397         case AV_CODEC_ID_MPEG1VIDEO:
398         case AV_CODEC_ID_MPEG2VIDEO:
399             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
400             break;
401         case AV_CODEC_ID_MPEG4:
402         case AV_CODEC_ID_MSMPEG4V1:
403         case AV_CODEC_ID_MSMPEG4V2:
404         case AV_CODEC_ID_MSMPEG4V3:
405             if       (avctx->rc_max_rate >= 15000000) {
406                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
407             } else if(avctx->rc_max_rate >=  2000000) {
408                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
409             } else if(avctx->rc_max_rate >=   384000) {
410                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
411             } else
412                 avctx->rc_buffer_size = 40;
413             avctx->rc_buffer_size *= 16384;
414             break;
415         }
416         if (avctx->rc_buffer_size) {
417             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
418         }
419     }
420
421     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
422         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
423         return AVERROR(EINVAL);
424     }
425
426     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
427         av_log(avctx, AV_LOG_INFO,
428                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
429     }
430
431     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
432         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
433         return AVERROR(EINVAL);
434     }
435
436     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
437         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
438         return AVERROR(EINVAL);
439     }
440
441     if (avctx->rc_max_rate &&
442         avctx->rc_max_rate == avctx->bit_rate &&
443         avctx->rc_max_rate != avctx->rc_min_rate) {
444         av_log(avctx, AV_LOG_INFO,
445                "impossible bitrate constraints, this will fail\n");
446     }
447
448     if (avctx->rc_buffer_size &&
449         avctx->bit_rate * (int64_t)avctx->time_base.num >
450             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
451         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
452         return AVERROR(EINVAL);
453     }
454
455     if (!s->fixed_qscale &&
456         avctx->bit_rate * av_q2d(avctx->time_base) >
457             avctx->bit_rate_tolerance) {
458         av_log(avctx, AV_LOG_WARNING,
459                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
460         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
461     }
462
463     if (avctx->rc_max_rate &&
464         avctx->rc_min_rate == avctx->rc_max_rate &&
465         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
466          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
467         90000LL * (avctx->rc_buffer_size - 1) >
468             avctx->rc_max_rate * 0xFFFFLL) {
469         av_log(avctx, AV_LOG_INFO,
470                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
471                "specified vbv buffer is too large for the given bitrate!\n");
472     }
473
474     if ((avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
475         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
476         s->codec_id != AV_CODEC_ID_FLV1) {
477         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
478         return AVERROR(EINVAL);
479     }
480
481     if (s->obmc && avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
482         av_log(avctx, AV_LOG_ERROR,
483                "OBMC is only supported with simple mb decision\n");
484         return AVERROR(EINVAL);
485     }
486
487     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
488         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
489         return AVERROR(EINVAL);
490     }
491
492     if (s->max_b_frames                    &&
493         s->codec_id != AV_CODEC_ID_MPEG4      &&
494         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
495         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
496         av_log(avctx, AV_LOG_ERROR, "B-frames not supported by codec\n");
497         return AVERROR(EINVAL);
498     }
499     if (s->max_b_frames < 0) {
500         av_log(avctx, AV_LOG_ERROR,
501                "max b frames must be 0 or positive for mpegvideo based encoders\n");
502         return AVERROR(EINVAL);
503     }
504
505     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
506          s->codec_id == AV_CODEC_ID_H263  ||
507          s->codec_id == AV_CODEC_ID_H263P) &&
508         (avctx->sample_aspect_ratio.num > 255 ||
509          avctx->sample_aspect_ratio.den > 255)) {
510         av_log(avctx, AV_LOG_WARNING,
511                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
512                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
513         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
514                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
515     }
516
517     if ((s->codec_id == AV_CODEC_ID_H263  ||
518          s->codec_id == AV_CODEC_ID_H263P) &&
519         (avctx->width  > 2048 ||
520          avctx->height > 1152 )) {
521         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
522         return AVERROR(EINVAL);
523     }
524     if ((s->codec_id == AV_CODEC_ID_H263  ||
525          s->codec_id == AV_CODEC_ID_H263P ||
526          s->codec_id == AV_CODEC_ID_RV20) &&
527         ((avctx->width &3) ||
528          (avctx->height&3) )) {
529         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
530         return AVERROR(EINVAL);
531     }
532
533     if (s->codec_id == AV_CODEC_ID_RV10 &&
534         (avctx->width &15 ||
535          avctx->height&15 )) {
536         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
537         return AVERROR(EINVAL);
538     }
539
540     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
541          s->codec_id == AV_CODEC_ID_WMV2) &&
542          avctx->width & 1) {
543         av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
544         return AVERROR(EINVAL);
545     }
546
547     if ((avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
548         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
549         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
550         return AVERROR(EINVAL);
551     }
552
553     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
554         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
555         return AVERROR(EINVAL);
556     }
557
558     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
559         avctx->mb_decision != FF_MB_DECISION_RD) {
560         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
561         return AVERROR(EINVAL);
562     }
563
564     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
565             (s->codec_id == AV_CODEC_ID_AMV ||
566              s->codec_id == AV_CODEC_ID_MJPEG)) {
567         // Used to produce garbage with MJPEG.
568         av_log(avctx, AV_LOG_ERROR,
569                "QP RD is no longer compatible with MJPEG or AMV\n");
570         return AVERROR(EINVAL);
571     }
572
573     if (s->scenechange_threshold < 1000000000 &&
574         (avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
575         av_log(avctx, AV_LOG_ERROR,
576                "closed gop with scene change detection are not supported yet, "
577                "set threshold to 1000000000\n");
578         return AVERROR_PATCHWELCOME;
579     }
580
581     if (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
582         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
583             s->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
584             av_log(avctx, AV_LOG_ERROR,
585                    "low delay forcing is only available for mpeg2, "
586                    "set strict_std_compliance to 'unofficial' or lower in order to allow it\n");
587             return AVERROR(EINVAL);
588         }
589         if (s->max_b_frames != 0) {
590             av_log(avctx, AV_LOG_ERROR,
591                    "B-frames cannot be used with low delay\n");
592             return AVERROR(EINVAL);
593         }
594     }
595
596     if (s->q_scale_type == 1) {
597         if (avctx->qmax > 28) {
598             av_log(avctx, AV_LOG_ERROR,
599                    "non linear quant only supports qmax <= 28 currently\n");
600             return AVERROR_PATCHWELCOME;
601         }
602     }
603
604     if (avctx->slices > 1 &&
605         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
606         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
607         return AVERROR(EINVAL);
608     }
609
610     if (avctx->thread_count > 1         &&
611         s->codec_id != AV_CODEC_ID_MPEG4      &&
612         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
613         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
614         s->codec_id != AV_CODEC_ID_MJPEG      &&
615         (s->codec_id != AV_CODEC_ID_H263P)) {
616         av_log(avctx, AV_LOG_ERROR,
617                "multi threaded encoding not supported by codec\n");
618         return AVERROR_PATCHWELCOME;
619     }
620
621     if (avctx->thread_count < 1) {
622         av_log(avctx, AV_LOG_ERROR,
623                "automatic thread number detection not supported by codec, "
624                "patch welcome\n");
625         return AVERROR_PATCHWELCOME;
626     }
627
628     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
629         av_log(avctx, AV_LOG_INFO,
630                "notice: b_frame_strategy only affects the first pass\n");
631         s->b_frame_strategy = 0;
632     }
633
634     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
635     if (i > 1) {
636         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
637         avctx->time_base.den /= i;
638         avctx->time_base.num /= i;
639         //return -1;
640     }
641
642     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id == AV_CODEC_ID_AMV || s->codec_id == AV_CODEC_ID_SPEEDHQ) {
643         // (a + x * 3 / 8) / x
644         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
645         s->inter_quant_bias = 0;
646     } else {
647         s->intra_quant_bias = 0;
648         // (a - x / 4) / x
649         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
650     }
651
652     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
653         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
654         return AVERROR(EINVAL);
655     }
656
657     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
658
659     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
660         avctx->time_base.den > (1 << 16) - 1) {
661         av_log(avctx, AV_LOG_ERROR,
662                "timebase %d/%d not supported by MPEG 4 standard, "
663                "the maximum admitted value for the timebase denominator "
664                "is %d\n", avctx->time_base.num, avctx->time_base.den,
665                (1 << 16) - 1);
666         return AVERROR(EINVAL);
667     }
668     s->time_increment_bits = av_log2(avctx->time_base.den - 1) + 1;
669
670     switch (avctx->codec->id) {
671     case AV_CODEC_ID_MPEG1VIDEO:
672         s->out_format = FMT_MPEG1;
673         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
674         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
675         break;
676     case AV_CODEC_ID_MPEG2VIDEO:
677         s->out_format = FMT_MPEG1;
678         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
679         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
680         s->rtp_mode   = 1;
681         break;
682 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
683     case AV_CODEC_ID_MJPEG:
684     case AV_CODEC_ID_AMV:
685         s->out_format = FMT_MJPEG;
686         s->intra_only = 1; /* force intra only for jpeg */
687         if ((ret = ff_mjpeg_encode_init(s)) < 0)
688             return ret;
689         avctx->delay = 0;
690         s->low_delay = 1;
691         break;
692 #endif
693     case AV_CODEC_ID_SPEEDHQ:
694         s->out_format = FMT_SPEEDHQ;
695         s->intra_only = 1; /* force intra only for SHQ */
696         if (!CONFIG_SPEEDHQ_ENCODER)
697             return AVERROR_ENCODER_NOT_FOUND;
698         if ((ret = ff_speedhq_encode_init(s)) < 0)
699             return ret;
700         avctx->delay = 0;
701         s->low_delay = 1;
702         break;
703     case AV_CODEC_ID_H261:
704         if (!CONFIG_H261_ENCODER)
705             return AVERROR_ENCODER_NOT_FOUND;
706         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
707             av_log(avctx, AV_LOG_ERROR,
708                    "The specified picture size of %dx%d is not valid for the "
709                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
710                     s->width, s->height);
711             return AVERROR(EINVAL);
712         }
713         s->out_format = FMT_H261;
714         avctx->delay  = 0;
715         s->low_delay  = 1;
716         s->rtp_mode   = 0; /* Sliced encoding not supported */
717         break;
718     case AV_CODEC_ID_H263:
719         if (!CONFIG_H263_ENCODER)
720             return AVERROR_ENCODER_NOT_FOUND;
721         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
722                              s->width, s->height) == 8) {
723             av_log(avctx, AV_LOG_ERROR,
724                    "The specified picture size of %dx%d is not valid for "
725                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
726                    "352x288, 704x576, and 1408x1152. "
727                    "Try H.263+.\n", s->width, s->height);
728             return AVERROR(EINVAL);
729         }
730         s->out_format = FMT_H263;
731         avctx->delay  = 0;
732         s->low_delay  = 1;
733         break;
734     case AV_CODEC_ID_H263P:
735         s->out_format = FMT_H263;
736         s->h263_plus  = 1;
737         /* Fx */
738         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
739         s->modified_quant  = s->h263_aic;
740         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
741         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
742
743         /* /Fx */
744         /* These are just to be sure */
745         avctx->delay = 0;
746         s->low_delay = 1;
747         break;
748     case AV_CODEC_ID_FLV1:
749         s->out_format      = FMT_H263;
750         s->h263_flv        = 2; /* format = 1; 11-bit codes */
751         s->unrestricted_mv = 1;
752         s->rtp_mode  = 0; /* don't allow GOB */
753         avctx->delay = 0;
754         s->low_delay = 1;
755         break;
756     case AV_CODEC_ID_RV10:
757         s->out_format = FMT_H263;
758         avctx->delay  = 0;
759         s->low_delay  = 1;
760         break;
761     case AV_CODEC_ID_RV20:
762         s->out_format      = FMT_H263;
763         avctx->delay       = 0;
764         s->low_delay       = 1;
765         s->modified_quant  = 1;
766         s->h263_aic        = 1;
767         s->h263_plus       = 1;
768         s->loop_filter     = 1;
769         s->unrestricted_mv = 0;
770         break;
771     case AV_CODEC_ID_MPEG4:
772         s->out_format      = FMT_H263;
773         s->h263_pred       = 1;
774         s->unrestricted_mv = 1;
775         s->low_delay       = s->max_b_frames ? 0 : 1;
776         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
777         break;
778     case AV_CODEC_ID_MSMPEG4V2:
779         s->out_format      = FMT_H263;
780         s->h263_pred       = 1;
781         s->unrestricted_mv = 1;
782         s->msmpeg4_version = 2;
783         avctx->delay       = 0;
784         s->low_delay       = 1;
785         break;
786     case AV_CODEC_ID_MSMPEG4V3:
787         s->out_format        = FMT_H263;
788         s->h263_pred         = 1;
789         s->unrestricted_mv   = 1;
790         s->msmpeg4_version   = 3;
791         s->flipflop_rounding = 1;
792         avctx->delay         = 0;
793         s->low_delay         = 1;
794         break;
795     case AV_CODEC_ID_WMV1:
796         s->out_format        = FMT_H263;
797         s->h263_pred         = 1;
798         s->unrestricted_mv   = 1;
799         s->msmpeg4_version   = 4;
800         s->flipflop_rounding = 1;
801         avctx->delay         = 0;
802         s->low_delay         = 1;
803         break;
804     case AV_CODEC_ID_WMV2:
805         s->out_format        = FMT_H263;
806         s->h263_pred         = 1;
807         s->unrestricted_mv   = 1;
808         s->msmpeg4_version   = 5;
809         s->flipflop_rounding = 1;
810         avctx->delay         = 0;
811         s->low_delay         = 1;
812         break;
813     default:
814         return AVERROR(EINVAL);
815     }
816
817     avctx->has_b_frames = !s->low_delay;
818
819     s->encoding = 1;
820
821     s->progressive_frame    =
822     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
823                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
824                                 s->alternate_scan);
825
826     /* init */
827     ff_mpv_idct_init(s);
828     if ((ret = ff_mpv_common_init(s)) < 0)
829         return ret;
830
831     ff_fdctdsp_init(&s->fdsp, avctx);
832     ff_me_cmp_init(&s->mecc, avctx);
833     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
834     ff_pixblockdsp_init(&s->pdsp, avctx);
835     ff_qpeldsp_init(&s->qdsp);
836
837     if (s->msmpeg4_version) {
838         int ac_stats_size = 2 * 2 * (MAX_LEVEL + 1) *  (MAX_RUN + 1) * 2 * sizeof(int);
839         if (!(s->ac_stats = av_mallocz(ac_stats_size)))
840             return AVERROR(ENOMEM);
841     }
842
843     if (!(avctx->stats_out = av_mallocz(256))               ||
844         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix,          32) ||
845         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix,   32) ||
846         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix,          32) ||
847         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix16,        32) ||
848         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix16, 32) ||
849         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix16,        32) ||
850         !FF_ALLOCZ_TYPED_ARRAY(s->input_picture,           MAX_PICTURE_COUNT) ||
851         !FF_ALLOCZ_TYPED_ARRAY(s->reordered_input_picture, MAX_PICTURE_COUNT))
852         return AVERROR(ENOMEM);
853
854     if (s->noise_reduction) {
855         if (!FF_ALLOCZ_TYPED_ARRAY(s->dct_offset, 2))
856             return AVERROR(ENOMEM);
857     }
858
859     ff_dct_encode_init(s);
860
861     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
862         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
863
864     if (s->slice_context_count > 1) {
865         s->rtp_mode = 1;
866
867         if (avctx->codec_id == AV_CODEC_ID_H263P)
868             s->h263_slice_structured = 1;
869     }
870
871     s->quant_precision = 5;
872
873     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      avctx->ildct_cmp);
874     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
875
876     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
877         ff_h261_encode_init(s);
878     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
879         ff_h263_encode_init(s);
880     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
881         ff_msmpeg4_encode_init(s);
882     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
883         && s->out_format == FMT_MPEG1)
884         ff_mpeg1_encode_init(s);
885
886     /* init q matrix */
887     for (i = 0; i < 64; i++) {
888         int j = s->idsp.idct_permutation[i];
889         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
890             s->mpeg_quant) {
891             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
892             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
893         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
894             s->intra_matrix[j] =
895             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
896         } else if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
897             s->intra_matrix[j] =
898             s->inter_matrix[j] = ff_mpeg1_default_intra_matrix[i];
899         } else {
900             /* MPEG-1/2 */
901             s->chroma_intra_matrix[j] =
902             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
903             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
904         }
905         if (avctx->intra_matrix)
906             s->intra_matrix[j] = avctx->intra_matrix[i];
907         if (avctx->inter_matrix)
908             s->inter_matrix[j] = avctx->inter_matrix[i];
909     }
910
911     /* precompute matrix */
912     /* for mjpeg, we do include qscale in the matrix */
913     if (s->out_format != FMT_MJPEG) {
914         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
915                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
916                           31, 1);
917         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
918                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
919                           31, 0);
920     }
921
922     if ((ret = ff_rate_control_init(s)) < 0)
923         return ret;
924
925     if (s->b_frame_strategy == 2) {
926         for (i = 0; i < s->max_b_frames + 2; i++) {
927             s->tmp_frames[i] = av_frame_alloc();
928             if (!s->tmp_frames[i])
929                 return AVERROR(ENOMEM);
930
931             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
932             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
933             s->tmp_frames[i]->height = s->height >> s->brd_scale;
934
935             ret = av_frame_get_buffer(s->tmp_frames[i], 0);
936             if (ret < 0)
937                 return ret;
938         }
939     }
940
941     cpb_props = ff_add_cpb_side_data(avctx);
942     if (!cpb_props)
943         return AVERROR(ENOMEM);
944     cpb_props->max_bitrate = avctx->rc_max_rate;
945     cpb_props->min_bitrate = avctx->rc_min_rate;
946     cpb_props->avg_bitrate = avctx->bit_rate;
947     cpb_props->buffer_size = avctx->rc_buffer_size;
948
949     return 0;
950 }
951
952 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
953 {
954     MpegEncContext *s = avctx->priv_data;
955     int i;
956
957     ff_rate_control_uninit(s);
958
959     ff_mpv_common_end(s);
960     if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
961         s->out_format == FMT_MJPEG)
962         ff_mjpeg_encode_close(s);
963
964     av_freep(&avctx->extradata);
965
966     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
967         av_frame_free(&s->tmp_frames[i]);
968
969     ff_free_picture_tables(&s->new_picture);
970     ff_mpeg_unref_picture(avctx, &s->new_picture);
971
972     av_freep(&avctx->stats_out);
973     av_freep(&s->ac_stats);
974
975     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
976     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
977     s->q_chroma_intra_matrix=   NULL;
978     s->q_chroma_intra_matrix16= NULL;
979     av_freep(&s->q_intra_matrix);
980     av_freep(&s->q_inter_matrix);
981     av_freep(&s->q_intra_matrix16);
982     av_freep(&s->q_inter_matrix16);
983     av_freep(&s->input_picture);
984     av_freep(&s->reordered_input_picture);
985     av_freep(&s->dct_offset);
986
987     return 0;
988 }
989
990 static int get_sae(uint8_t *src, int ref, int stride)
991 {
992     int x,y;
993     int acc = 0;
994
995     for (y = 0; y < 16; y++) {
996         for (x = 0; x < 16; x++) {
997             acc += FFABS(src[x + y * stride] - ref);
998         }
999     }
1000
1001     return acc;
1002 }
1003
1004 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1005                            uint8_t *ref, int stride)
1006 {
1007     int x, y, w, h;
1008     int acc = 0;
1009
1010     w = s->width  & ~15;
1011     h = s->height & ~15;
1012
1013     for (y = 0; y < h; y += 16) {
1014         for (x = 0; x < w; x += 16) {
1015             int offset = x + y * stride;
1016             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1017                                       stride, 16);
1018             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1019             int sae  = get_sae(src + offset, mean, stride);
1020
1021             acc += sae + 500 < sad;
1022         }
1023     }
1024     return acc;
1025 }
1026
1027 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1028 {
1029     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1030                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1031                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1032                             &s->linesize, &s->uvlinesize);
1033 }
1034
1035 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1036 {
1037     Picture *pic = NULL;
1038     int64_t pts;
1039     int i, display_picture_number = 0, ret;
1040     int encoding_delay = s->max_b_frames ? s->max_b_frames
1041                                          : (s->low_delay ? 0 : 1);
1042     int flush_offset = 1;
1043     int direct = 1;
1044
1045     if (pic_arg) {
1046         pts = pic_arg->pts;
1047         display_picture_number = s->input_picture_number++;
1048
1049         if (pts != AV_NOPTS_VALUE) {
1050             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1051                 int64_t last = s->user_specified_pts;
1052
1053                 if (pts <= last) {
1054                     av_log(s->avctx, AV_LOG_ERROR,
1055                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1056                            pts, last);
1057                     return AVERROR(EINVAL);
1058                 }
1059
1060                 if (!s->low_delay && display_picture_number == 1)
1061                     s->dts_delta = pts - last;
1062             }
1063             s->user_specified_pts = pts;
1064         } else {
1065             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1066                 s->user_specified_pts =
1067                 pts = s->user_specified_pts + 1;
1068                 av_log(s->avctx, AV_LOG_INFO,
1069                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1070                        pts);
1071             } else {
1072                 pts = display_picture_number;
1073             }
1074         }
1075
1076         if (!pic_arg->buf[0] ||
1077             pic_arg->linesize[0] != s->linesize ||
1078             pic_arg->linesize[1] != s->uvlinesize ||
1079             pic_arg->linesize[2] != s->uvlinesize)
1080             direct = 0;
1081         if ((s->width & 15) || (s->height & 15))
1082             direct = 0;
1083         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1084             direct = 0;
1085         if (s->linesize & (STRIDE_ALIGN-1))
1086             direct = 0;
1087
1088         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1089                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1090
1091         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1092         if (i < 0)
1093             return i;
1094
1095         pic = &s->picture[i];
1096         pic->reference = 3;
1097
1098         if (direct) {
1099             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1100                 return ret;
1101         }
1102         ret = alloc_picture(s, pic, direct);
1103         if (ret < 0)
1104             return ret;
1105
1106         if (!direct) {
1107             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1108                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1109                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1110                 // empty
1111             } else {
1112                 int h_chroma_shift, v_chroma_shift;
1113                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1114                                                  &h_chroma_shift,
1115                                                  &v_chroma_shift);
1116
1117                 for (i = 0; i < 3; i++) {
1118                     int src_stride = pic_arg->linesize[i];
1119                     int dst_stride = i ? s->uvlinesize : s->linesize;
1120                     int h_shift = i ? h_chroma_shift : 0;
1121                     int v_shift = i ? v_chroma_shift : 0;
1122                     int w = s->width  >> h_shift;
1123                     int h = s->height >> v_shift;
1124                     uint8_t *src = pic_arg->data[i];
1125                     uint8_t *dst = pic->f->data[i];
1126                     int vpad = 16;
1127
1128                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1129                         && !s->progressive_sequence
1130                         && FFALIGN(s->height, 32) - s->height > 16)
1131                         vpad = 32;
1132
1133                     if (!s->avctx->rc_buffer_size)
1134                         dst += INPLACE_OFFSET;
1135
1136                     if (src_stride == dst_stride)
1137                         memcpy(dst, src, src_stride * h);
1138                     else {
1139                         int h2 = h;
1140                         uint8_t *dst2 = dst;
1141                         while (h2--) {
1142                             memcpy(dst2, src, w);
1143                             dst2 += dst_stride;
1144                             src += src_stride;
1145                         }
1146                     }
1147                     if ((s->width & 15) || (s->height & (vpad-1))) {
1148                         s->mpvencdsp.draw_edges(dst, dst_stride,
1149                                                 w, h,
1150                                                 16 >> h_shift,
1151                                                 vpad >> v_shift,
1152                                                 EDGE_BOTTOM);
1153                     }
1154                 }
1155                 emms_c();
1156             }
1157         }
1158         ret = av_frame_copy_props(pic->f, pic_arg);
1159         if (ret < 0)
1160             return ret;
1161
1162         pic->f->display_picture_number = display_picture_number;
1163         pic->f->pts = pts; // we set this here to avoid modifying pic_arg
1164     } else {
1165         /* Flushing: When we have not received enough input frames,
1166          * ensure s->input_picture[0] contains the first picture */
1167         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1168             if (s->input_picture[flush_offset])
1169                 break;
1170
1171         if (flush_offset <= 1)
1172             flush_offset = 1;
1173         else
1174             encoding_delay = encoding_delay - flush_offset + 1;
1175     }
1176
1177     /* shift buffer entries */
1178     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1179         s->input_picture[i - flush_offset] = s->input_picture[i];
1180
1181     s->input_picture[encoding_delay] = (Picture*) pic;
1182
1183     return 0;
1184 }
1185
1186 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1187 {
1188     int x, y, plane;
1189     int score = 0;
1190     int64_t score64 = 0;
1191
1192     for (plane = 0; plane < 3; plane++) {
1193         const int stride = p->f->linesize[plane];
1194         const int bw = plane ? 1 : 2;
1195         for (y = 0; y < s->mb_height * bw; y++) {
1196             for (x = 0; x < s->mb_width * bw; x++) {
1197                 int off = p->shared ? 0 : 16;
1198                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1199                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1200                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1201
1202                 switch (FFABS(s->frame_skip_exp)) {
1203                 case 0: score    =  FFMAX(score, v);          break;
1204                 case 1: score   += FFABS(v);                  break;
1205                 case 2: score64 += v * (int64_t)v;                       break;
1206                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1207                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1208                 }
1209             }
1210         }
1211     }
1212     emms_c();
1213
1214     if (score)
1215         score64 = score;
1216     if (s->frame_skip_exp < 0)
1217         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1218                       -1.0/s->frame_skip_exp);
1219
1220     if (score64 < s->frame_skip_threshold)
1221         return 1;
1222     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1223         return 1;
1224     return 0;
1225 }
1226
1227 static int encode_frame(AVCodecContext *c, AVFrame *frame, AVPacket *pkt)
1228 {
1229     int ret;
1230     int size = 0;
1231
1232     ret = avcodec_send_frame(c, frame);
1233     if (ret < 0)
1234         return ret;
1235
1236     do {
1237         ret = avcodec_receive_packet(c, pkt);
1238         if (ret >= 0) {
1239             size += pkt->size;
1240             av_packet_unref(pkt);
1241         } else if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
1242             return ret;
1243     } while (ret >= 0);
1244
1245     return size;
1246 }
1247
1248 static int estimate_best_b_count(MpegEncContext *s)
1249 {
1250     const AVCodec *codec = avcodec_find_encoder(s->avctx->codec_id);
1251     AVPacket *pkt;
1252     const int scale = s->brd_scale;
1253     int width  = s->width  >> scale;
1254     int height = s->height >> scale;
1255     int i, j, out_size, p_lambda, b_lambda, lambda2;
1256     int64_t best_rd  = INT64_MAX;
1257     int best_b_count = -1;
1258     int ret = 0;
1259
1260     av_assert0(scale >= 0 && scale <= 3);
1261
1262     pkt = av_packet_alloc();
1263     if (!pkt)
1264         return AVERROR(ENOMEM);
1265
1266     //emms_c();
1267     //s->next_picture_ptr->quality;
1268     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1269     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1270     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1271     if (!b_lambda) // FIXME we should do this somewhere else
1272         b_lambda = p_lambda;
1273     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1274                FF_LAMBDA_SHIFT;
1275
1276     for (i = 0; i < s->max_b_frames + 2; i++) {
1277         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1278                                                 s->next_picture_ptr;
1279         uint8_t *data[4];
1280
1281         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1282             pre_input = *pre_input_ptr;
1283             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1284
1285             if (!pre_input.shared && i) {
1286                 data[0] += INPLACE_OFFSET;
1287                 data[1] += INPLACE_OFFSET;
1288                 data[2] += INPLACE_OFFSET;
1289             }
1290
1291             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1292                                        s->tmp_frames[i]->linesize[0],
1293                                        data[0],
1294                                        pre_input.f->linesize[0],
1295                                        width, height);
1296             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1297                                        s->tmp_frames[i]->linesize[1],
1298                                        data[1],
1299                                        pre_input.f->linesize[1],
1300                                        width >> 1, height >> 1);
1301             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1302                                        s->tmp_frames[i]->linesize[2],
1303                                        data[2],
1304                                        pre_input.f->linesize[2],
1305                                        width >> 1, height >> 1);
1306         }
1307     }
1308
1309     for (j = 0; j < s->max_b_frames + 1; j++) {
1310         AVCodecContext *c;
1311         int64_t rd = 0;
1312
1313         if (!s->input_picture[j])
1314             break;
1315
1316         c = avcodec_alloc_context3(NULL);
1317         if (!c) {
1318             ret = AVERROR(ENOMEM);
1319             goto fail;
1320         }
1321
1322         c->width        = width;
1323         c->height       = height;
1324         c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1325         c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1326         c->mb_decision  = s->avctx->mb_decision;
1327         c->me_cmp       = s->avctx->me_cmp;
1328         c->mb_cmp       = s->avctx->mb_cmp;
1329         c->me_sub_cmp   = s->avctx->me_sub_cmp;
1330         c->pix_fmt      = AV_PIX_FMT_YUV420P;
1331         c->time_base    = s->avctx->time_base;
1332         c->max_b_frames = s->max_b_frames;
1333
1334         ret = avcodec_open2(c, codec, NULL);
1335         if (ret < 0)
1336             goto fail;
1337
1338
1339         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1340         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1341
1342         out_size = encode_frame(c, s->tmp_frames[0], pkt);
1343         if (out_size < 0) {
1344             ret = out_size;
1345             goto fail;
1346         }
1347
1348         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1349
1350         for (i = 0; i < s->max_b_frames + 1; i++) {
1351             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1352
1353             s->tmp_frames[i + 1]->pict_type = is_p ?
1354                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1355             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1356
1357             out_size = encode_frame(c, s->tmp_frames[i + 1], pkt);
1358             if (out_size < 0) {
1359                 ret = out_size;
1360                 goto fail;
1361             }
1362
1363             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1364         }
1365
1366         /* get the delayed frames */
1367         out_size = encode_frame(c, NULL, pkt);
1368         if (out_size < 0) {
1369             ret = out_size;
1370             goto fail;
1371         }
1372         rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1373
1374         rd += c->error[0] + c->error[1] + c->error[2];
1375
1376         if (rd < best_rd) {
1377             best_rd = rd;
1378             best_b_count = j;
1379         }
1380
1381 fail:
1382         avcodec_free_context(&c);
1383         av_packet_unref(pkt);
1384         if (ret < 0) {
1385             best_b_count = ret;
1386             break;
1387         }
1388     }
1389
1390     av_packet_free(&pkt);
1391
1392     return best_b_count;
1393 }
1394
1395 static int select_input_picture(MpegEncContext *s)
1396 {
1397     int i, ret;
1398
1399     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1400         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1401     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1402
1403     /* set next picture type & ordering */
1404     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1405         if (s->frame_skip_threshold || s->frame_skip_factor) {
1406             if (s->picture_in_gop_number < s->gop_size &&
1407                 s->next_picture_ptr &&
1408                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1409                 // FIXME check that the gop check above is +-1 correct
1410                 av_frame_unref(s->input_picture[0]->f);
1411
1412                 ff_vbv_update(s, 0);
1413
1414                 goto no_output_pic;
1415             }
1416         }
1417
1418         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1419             !s->next_picture_ptr || s->intra_only) {
1420             s->reordered_input_picture[0] = s->input_picture[0];
1421             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1422             s->reordered_input_picture[0]->f->coded_picture_number =
1423                 s->coded_picture_number++;
1424         } else {
1425             int b_frames = 0;
1426
1427             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1428                 for (i = 0; i < s->max_b_frames + 1; i++) {
1429                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1430
1431                     if (pict_num >= s->rc_context.num_entries)
1432                         break;
1433                     if (!s->input_picture[i]) {
1434                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1435                         break;
1436                     }
1437
1438                     s->input_picture[i]->f->pict_type =
1439                         s->rc_context.entry[pict_num].new_pict_type;
1440                 }
1441             }
1442
1443             if (s->b_frame_strategy == 0) {
1444                 b_frames = s->max_b_frames;
1445                 while (b_frames && !s->input_picture[b_frames])
1446                     b_frames--;
1447             } else if (s->b_frame_strategy == 1) {
1448                 for (i = 1; i < s->max_b_frames + 1; i++) {
1449                     if (s->input_picture[i] &&
1450                         s->input_picture[i]->b_frame_score == 0) {
1451                         s->input_picture[i]->b_frame_score =
1452                             get_intra_count(s,
1453                                             s->input_picture[i    ]->f->data[0],
1454                                             s->input_picture[i - 1]->f->data[0],
1455                                             s->linesize) + 1;
1456                     }
1457                 }
1458                 for (i = 0; i < s->max_b_frames + 1; i++) {
1459                     if (!s->input_picture[i] ||
1460                         s->input_picture[i]->b_frame_score - 1 >
1461                             s->mb_num / s->b_sensitivity)
1462                         break;
1463                 }
1464
1465                 b_frames = FFMAX(0, i - 1);
1466
1467                 /* reset scores */
1468                 for (i = 0; i < b_frames + 1; i++) {
1469                     s->input_picture[i]->b_frame_score = 0;
1470                 }
1471             } else if (s->b_frame_strategy == 2) {
1472                 b_frames = estimate_best_b_count(s);
1473                 if (b_frames < 0)
1474                     return b_frames;
1475             }
1476
1477             emms_c();
1478
1479             for (i = b_frames - 1; i >= 0; i--) {
1480                 int type = s->input_picture[i]->f->pict_type;
1481                 if (type && type != AV_PICTURE_TYPE_B)
1482                     b_frames = i;
1483             }
1484             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1485                 b_frames == s->max_b_frames) {
1486                 av_log(s->avctx, AV_LOG_ERROR,
1487                        "warning, too many B-frames in a row\n");
1488             }
1489
1490             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1491                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1492                     s->gop_size > s->picture_in_gop_number) {
1493                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1494                 } else {
1495                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1496                         b_frames = 0;
1497                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1498                 }
1499             }
1500
1501             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1502                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1503                 b_frames--;
1504
1505             s->reordered_input_picture[0] = s->input_picture[b_frames];
1506             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1507                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1508             s->reordered_input_picture[0]->f->coded_picture_number =
1509                 s->coded_picture_number++;
1510             for (i = 0; i < b_frames; i++) {
1511                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1512                 s->reordered_input_picture[i + 1]->f->pict_type =
1513                     AV_PICTURE_TYPE_B;
1514                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1515                     s->coded_picture_number++;
1516             }
1517         }
1518     }
1519 no_output_pic:
1520     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1521
1522     if (s->reordered_input_picture[0]) {
1523         s->reordered_input_picture[0]->reference =
1524            s->reordered_input_picture[0]->f->pict_type !=
1525                AV_PICTURE_TYPE_B ? 3 : 0;
1526
1527         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1528             return ret;
1529
1530         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1531             // input is a shared pix, so we can't modify it -> allocate a new
1532             // one & ensure that the shared one is reuseable
1533
1534             Picture *pic;
1535             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1536             if (i < 0)
1537                 return i;
1538             pic = &s->picture[i];
1539
1540             pic->reference = s->reordered_input_picture[0]->reference;
1541             if (alloc_picture(s, pic, 0) < 0) {
1542                 return -1;
1543             }
1544
1545             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1546             if (ret < 0)
1547                 return ret;
1548
1549             /* mark us unused / free shared pic */
1550             av_frame_unref(s->reordered_input_picture[0]->f);
1551             s->reordered_input_picture[0]->shared = 0;
1552
1553             s->current_picture_ptr = pic;
1554         } else {
1555             // input is not a shared pix -> reuse buffer for current_pix
1556             s->current_picture_ptr = s->reordered_input_picture[0];
1557             for (i = 0; i < 4; i++) {
1558                 if (s->new_picture.f->data[i])
1559                     s->new_picture.f->data[i] += INPLACE_OFFSET;
1560             }
1561         }
1562         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1563         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1564                                        s->current_picture_ptr)) < 0)
1565             return ret;
1566
1567         s->picture_number = s->new_picture.f->display_picture_number;
1568     }
1569     return 0;
1570 }
1571
1572 static void frame_end(MpegEncContext *s)
1573 {
1574     if (s->unrestricted_mv &&
1575         s->current_picture.reference &&
1576         !s->intra_only) {
1577         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1578         int hshift = desc->log2_chroma_w;
1579         int vshift = desc->log2_chroma_h;
1580         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1581                                 s->current_picture.f->linesize[0],
1582                                 s->h_edge_pos, s->v_edge_pos,
1583                                 EDGE_WIDTH, EDGE_WIDTH,
1584                                 EDGE_TOP | EDGE_BOTTOM);
1585         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1586                                 s->current_picture.f->linesize[1],
1587                                 s->h_edge_pos >> hshift,
1588                                 s->v_edge_pos >> vshift,
1589                                 EDGE_WIDTH >> hshift,
1590                                 EDGE_WIDTH >> vshift,
1591                                 EDGE_TOP | EDGE_BOTTOM);
1592         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1593                                 s->current_picture.f->linesize[2],
1594                                 s->h_edge_pos >> hshift,
1595                                 s->v_edge_pos >> vshift,
1596                                 EDGE_WIDTH >> hshift,
1597                                 EDGE_WIDTH >> vshift,
1598                                 EDGE_TOP | EDGE_BOTTOM);
1599     }
1600
1601     emms_c();
1602
1603     s->last_pict_type                 = s->pict_type;
1604     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1605     if (s->pict_type!= AV_PICTURE_TYPE_B)
1606         s->last_non_b_pict_type = s->pict_type;
1607
1608 #if FF_API_CODED_FRAME
1609 FF_DISABLE_DEPRECATION_WARNINGS
1610     av_frame_unref(s->avctx->coded_frame);
1611     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1612 FF_ENABLE_DEPRECATION_WARNINGS
1613 #endif
1614 #if FF_API_ERROR_FRAME
1615 FF_DISABLE_DEPRECATION_WARNINGS
1616     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1617            sizeof(s->current_picture.encoding_error));
1618 FF_ENABLE_DEPRECATION_WARNINGS
1619 #endif
1620 }
1621
1622 static void update_noise_reduction(MpegEncContext *s)
1623 {
1624     int intra, i;
1625
1626     for (intra = 0; intra < 2; intra++) {
1627         if (s->dct_count[intra] > (1 << 16)) {
1628             for (i = 0; i < 64; i++) {
1629                 s->dct_error_sum[intra][i] >>= 1;
1630             }
1631             s->dct_count[intra] >>= 1;
1632         }
1633
1634         for (i = 0; i < 64; i++) {
1635             s->dct_offset[intra][i] = (s->noise_reduction *
1636                                        s->dct_count[intra] +
1637                                        s->dct_error_sum[intra][i] / 2) /
1638                                       (s->dct_error_sum[intra][i] + 1);
1639         }
1640     }
1641 }
1642
1643 static int frame_start(MpegEncContext *s)
1644 {
1645     int ret;
1646
1647     /* mark & release old frames */
1648     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1649         s->last_picture_ptr != s->next_picture_ptr &&
1650         s->last_picture_ptr->f->buf[0]) {
1651         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1652     }
1653
1654     s->current_picture_ptr->f->pict_type = s->pict_type;
1655     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1656
1657     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1658     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1659                                    s->current_picture_ptr)) < 0)
1660         return ret;
1661
1662     if (s->pict_type != AV_PICTURE_TYPE_B) {
1663         s->last_picture_ptr = s->next_picture_ptr;
1664         if (!s->droppable)
1665             s->next_picture_ptr = s->current_picture_ptr;
1666     }
1667
1668     if (s->last_picture_ptr) {
1669         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1670         if (s->last_picture_ptr->f->buf[0] &&
1671             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1672                                        s->last_picture_ptr)) < 0)
1673             return ret;
1674     }
1675     if (s->next_picture_ptr) {
1676         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1677         if (s->next_picture_ptr->f->buf[0] &&
1678             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1679                                        s->next_picture_ptr)) < 0)
1680             return ret;
1681     }
1682
1683     if (s->picture_structure!= PICT_FRAME) {
1684         int i;
1685         for (i = 0; i < 4; i++) {
1686             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1687                 s->current_picture.f->data[i] +=
1688                     s->current_picture.f->linesize[i];
1689             }
1690             s->current_picture.f->linesize[i] *= 2;
1691             s->last_picture.f->linesize[i]    *= 2;
1692             s->next_picture.f->linesize[i]    *= 2;
1693         }
1694     }
1695
1696     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1697         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1698         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1699     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1700         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1701         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1702     } else {
1703         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1704         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1705     }
1706
1707     if (s->dct_error_sum) {
1708         av_assert2(s->noise_reduction && s->encoding);
1709         update_noise_reduction(s);
1710     }
1711
1712     return 0;
1713 }
1714
1715 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1716                           const AVFrame *pic_arg, int *got_packet)
1717 {
1718     MpegEncContext *s = avctx->priv_data;
1719     int i, stuffing_count, ret;
1720     int context_count = s->slice_context_count;
1721
1722     s->vbv_ignore_qmax = 0;
1723
1724     s->picture_in_gop_number++;
1725
1726     if (load_input_picture(s, pic_arg) < 0)
1727         return -1;
1728
1729     if (select_input_picture(s) < 0) {
1730         return -1;
1731     }
1732
1733     /* output? */
1734     if (s->new_picture.f->data[0]) {
1735         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1736         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1737                                               :
1738                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1739         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1740             return ret;
1741         if (s->mb_info) {
1742             s->mb_info_ptr = av_packet_new_side_data(pkt,
1743                                  AV_PKT_DATA_H263_MB_INFO,
1744                                  s->mb_width*s->mb_height*12);
1745             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1746         }
1747
1748         for (i = 0; i < context_count; i++) {
1749             int start_y = s->thread_context[i]->start_mb_y;
1750             int   end_y = s->thread_context[i]->  end_mb_y;
1751             int h       = s->mb_height;
1752             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1753             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1754
1755             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1756         }
1757
1758         s->pict_type = s->new_picture.f->pict_type;
1759         //emms_c();
1760         ret = frame_start(s);
1761         if (ret < 0)
1762             return ret;
1763 vbv_retry:
1764         ret = encode_picture(s, s->picture_number);
1765         if (growing_buffer) {
1766             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1767             pkt->data = s->pb.buf;
1768             pkt->size = avctx->internal->byte_buffer_size;
1769         }
1770         if (ret < 0)
1771             return -1;
1772
1773         frame_end(s);
1774
1775        if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) && s->out_format == FMT_MJPEG)
1776             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1777
1778         if (avctx->rc_buffer_size) {
1779             RateControlContext *rcc = &s->rc_context;
1780             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1781             int hq = (avctx->mb_decision == FF_MB_DECISION_RD || avctx->trellis);
1782             int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
1783
1784             if (put_bits_count(&s->pb) > max_size &&
1785                 s->lambda < s->lmax) {
1786                 s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
1787                                        (s->qscale + 1) / s->qscale);
1788                 if (s->adaptive_quant) {
1789                     int i;
1790                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1791                         s->lambda_table[i] =
1792                             FFMAX(s->lambda_table[i] + min_step,
1793                                   s->lambda_table[i] * (s->qscale + 1) /
1794                                   s->qscale);
1795                 }
1796                 s->mb_skipped = 0;        // done in frame_start()
1797                 // done in encode_picture() so we must undo it
1798                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1799                     if (s->flipflop_rounding          ||
1800                         s->codec_id == AV_CODEC_ID_H263P ||
1801                         s->codec_id == AV_CODEC_ID_MPEG4)
1802                         s->no_rounding ^= 1;
1803                 }
1804                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1805                     s->time_base       = s->last_time_base;
1806                     s->last_non_b_time = s->time - s->pp_time;
1807                 }
1808                 for (i = 0; i < context_count; i++) {
1809                     PutBitContext *pb = &s->thread_context[i]->pb;
1810                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1811                 }
1812                 s->vbv_ignore_qmax = 1;
1813                 av_log(avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1814                 goto vbv_retry;
1815             }
1816
1817             av_assert0(avctx->rc_max_rate);
1818         }
1819
1820         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1821             ff_write_pass1_stats(s);
1822
1823         for (i = 0; i < 4; i++) {
1824             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1825             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1826         }
1827         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1828                                        s->current_picture_ptr->encoding_error,
1829                                        (avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1830                                        s->pict_type);
1831
1832         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1833             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1834                                              s->misc_bits + s->i_tex_bits +
1835                                              s->p_tex_bits);
1836         flush_put_bits(&s->pb);
1837         s->frame_bits  = put_bits_count(&s->pb);
1838
1839         stuffing_count = ff_vbv_update(s, s->frame_bits);
1840         s->stuffing_bits = 8*stuffing_count;
1841         if (stuffing_count) {
1842             if (put_bytes_left(&s->pb, 0) < stuffing_count + 50) {
1843                 av_log(avctx, AV_LOG_ERROR, "stuffing too large\n");
1844                 return -1;
1845             }
1846
1847             switch (s->codec_id) {
1848             case AV_CODEC_ID_MPEG1VIDEO:
1849             case AV_CODEC_ID_MPEG2VIDEO:
1850                 while (stuffing_count--) {
1851                     put_bits(&s->pb, 8, 0);
1852                 }
1853             break;
1854             case AV_CODEC_ID_MPEG4:
1855                 put_bits(&s->pb, 16, 0);
1856                 put_bits(&s->pb, 16, 0x1C3);
1857                 stuffing_count -= 4;
1858                 while (stuffing_count--) {
1859                     put_bits(&s->pb, 8, 0xFF);
1860                 }
1861             break;
1862             default:
1863                 av_log(avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1864             }
1865             flush_put_bits(&s->pb);
1866             s->frame_bits  = put_bits_count(&s->pb);
1867         }
1868
1869         /* update MPEG-1/2 vbv_delay for CBR */
1870         if (avctx->rc_max_rate                          &&
1871             avctx->rc_min_rate == avctx->rc_max_rate &&
1872             s->out_format == FMT_MPEG1                     &&
1873             90000LL * (avctx->rc_buffer_size - 1) <=
1874                 avctx->rc_max_rate * 0xFFFFLL) {
1875             AVCPBProperties *props;
1876             size_t props_size;
1877
1878             int vbv_delay, min_delay;
1879             double inbits  = avctx->rc_max_rate *
1880                              av_q2d(avctx->time_base);
1881             int    minbits = s->frame_bits - 8 *
1882                              (s->vbv_delay_ptr - s->pb.buf - 1);
1883             double bits    = s->rc_context.buffer_index + minbits - inbits;
1884
1885             if (bits < 0)
1886                 av_log(avctx, AV_LOG_ERROR,
1887                        "Internal error, negative bits\n");
1888
1889             av_assert1(s->repeat_first_field == 0);
1890
1891             vbv_delay = bits * 90000 / avctx->rc_max_rate;
1892             min_delay = (minbits * 90000LL + avctx->rc_max_rate - 1) /
1893                         avctx->rc_max_rate;
1894
1895             vbv_delay = FFMAX(vbv_delay, min_delay);
1896
1897             av_assert0(vbv_delay < 0xFFFF);
1898
1899             s->vbv_delay_ptr[0] &= 0xF8;
1900             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1901             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1902             s->vbv_delay_ptr[2] &= 0x07;
1903             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1904
1905             props = av_cpb_properties_alloc(&props_size);
1906             if (!props)
1907                 return AVERROR(ENOMEM);
1908             props->vbv_delay = vbv_delay * 300;
1909
1910             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1911                                           (uint8_t*)props, props_size);
1912             if (ret < 0) {
1913                 av_freep(&props);
1914                 return ret;
1915             }
1916
1917 #if FF_API_VBV_DELAY
1918 FF_DISABLE_DEPRECATION_WARNINGS
1919             avctx->vbv_delay     = vbv_delay * 300;
1920 FF_ENABLE_DEPRECATION_WARNINGS
1921 #endif
1922         }
1923         s->total_bits     += s->frame_bits;
1924
1925         pkt->pts = s->current_picture.f->pts;
1926         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1927             if (!s->current_picture.f->coded_picture_number)
1928                 pkt->dts = pkt->pts - s->dts_delta;
1929             else
1930                 pkt->dts = s->reordered_pts;
1931             s->reordered_pts = pkt->pts;
1932         } else
1933             pkt->dts = pkt->pts;
1934         if (s->current_picture.f->key_frame)
1935             pkt->flags |= AV_PKT_FLAG_KEY;
1936         if (s->mb_info)
1937             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1938     } else {
1939         s->frame_bits = 0;
1940     }
1941
1942     /* release non-reference frames */
1943     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1944         if (!s->picture[i].reference)
1945             ff_mpeg_unref_picture(avctx, &s->picture[i]);
1946     }
1947
1948     av_assert1((s->frame_bits & 7) == 0);
1949
1950     pkt->size = s->frame_bits / 8;
1951     *got_packet = !!pkt->size;
1952     return 0;
1953 }
1954
1955 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1956                                                 int n, int threshold)
1957 {
1958     static const char tab[64] = {
1959         3, 2, 2, 1, 1, 1, 1, 1,
1960         1, 1, 1, 1, 1, 1, 1, 1,
1961         1, 1, 1, 1, 1, 1, 1, 1,
1962         0, 0, 0, 0, 0, 0, 0, 0,
1963         0, 0, 0, 0, 0, 0, 0, 0,
1964         0, 0, 0, 0, 0, 0, 0, 0,
1965         0, 0, 0, 0, 0, 0, 0, 0,
1966         0, 0, 0, 0, 0, 0, 0, 0
1967     };
1968     int score = 0;
1969     int run = 0;
1970     int i;
1971     int16_t *block = s->block[n];
1972     const int last_index = s->block_last_index[n];
1973     int skip_dc;
1974
1975     if (threshold < 0) {
1976         skip_dc = 0;
1977         threshold = -threshold;
1978     } else
1979         skip_dc = 1;
1980
1981     /* Are all we could set to zero already zero? */
1982     if (last_index <= skip_dc - 1)
1983         return;
1984
1985     for (i = 0; i <= last_index; i++) {
1986         const int j = s->intra_scantable.permutated[i];
1987         const int level = FFABS(block[j]);
1988         if (level == 1) {
1989             if (skip_dc && i == 0)
1990                 continue;
1991             score += tab[run];
1992             run = 0;
1993         } else if (level > 1) {
1994             return;
1995         } else {
1996             run++;
1997         }
1998     }
1999     if (score >= threshold)
2000         return;
2001     for (i = skip_dc; i <= last_index; i++) {
2002         const int j = s->intra_scantable.permutated[i];
2003         block[j] = 0;
2004     }
2005     if (block[0])
2006         s->block_last_index[n] = 0;
2007     else
2008         s->block_last_index[n] = -1;
2009 }
2010
2011 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2012                                int last_index)
2013 {
2014     int i;
2015     const int maxlevel = s->max_qcoeff;
2016     const int minlevel = s->min_qcoeff;
2017     int overflow = 0;
2018
2019     if (s->mb_intra) {
2020         i = 1; // skip clipping of intra dc
2021     } else
2022         i = 0;
2023
2024     for (; i <= last_index; i++) {
2025         const int j = s->intra_scantable.permutated[i];
2026         int level = block[j];
2027
2028         if (level > maxlevel) {
2029             level = maxlevel;
2030             overflow++;
2031         } else if (level < minlevel) {
2032             level = minlevel;
2033             overflow++;
2034         }
2035
2036         block[j] = level;
2037     }
2038
2039     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2040         av_log(s->avctx, AV_LOG_INFO,
2041                "warning, clipping %d dct coefficients to %d..%d\n",
2042                overflow, minlevel, maxlevel);
2043 }
2044
2045 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2046 {
2047     int x, y;
2048     // FIXME optimize
2049     for (y = 0; y < 8; y++) {
2050         for (x = 0; x < 8; x++) {
2051             int x2, y2;
2052             int sum = 0;
2053             int sqr = 0;
2054             int count = 0;
2055
2056             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2057                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2058                     int v = ptr[x2 + y2 * stride];
2059                     sum += v;
2060                     sqr += v * v;
2061                     count++;
2062                 }
2063             }
2064             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2065         }
2066     }
2067 }
2068
2069 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2070                                                 int motion_x, int motion_y,
2071                                                 int mb_block_height,
2072                                                 int mb_block_width,
2073                                                 int mb_block_count)
2074 {
2075     int16_t weight[12][64];
2076     int16_t orig[12][64];
2077     const int mb_x = s->mb_x;
2078     const int mb_y = s->mb_y;
2079     int i;
2080     int skip_dct[12];
2081     int dct_offset = s->linesize * 8; // default for progressive frames
2082     int uv_dct_offset = s->uvlinesize * 8;
2083     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2084     ptrdiff_t wrap_y, wrap_c;
2085
2086     for (i = 0; i < mb_block_count; i++)
2087         skip_dct[i] = s->skipdct;
2088
2089     if (s->adaptive_quant) {
2090         const int last_qp = s->qscale;
2091         const int mb_xy = mb_x + mb_y * s->mb_stride;
2092
2093         s->lambda = s->lambda_table[mb_xy];
2094         update_qscale(s);
2095
2096         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2097             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2098             s->dquant = s->qscale - last_qp;
2099
2100             if (s->out_format == FMT_H263) {
2101                 s->dquant = av_clip(s->dquant, -2, 2);
2102
2103                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2104                     if (!s->mb_intra) {
2105                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2106                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2107                                 s->dquant = 0;
2108                         }
2109                         if (s->mv_type == MV_TYPE_8X8)
2110                             s->dquant = 0;
2111                     }
2112                 }
2113             }
2114         }
2115         ff_set_qscale(s, last_qp + s->dquant);
2116     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2117         ff_set_qscale(s, s->qscale + s->dquant);
2118
2119     wrap_y = s->linesize;
2120     wrap_c = s->uvlinesize;
2121     ptr_y  = s->new_picture.f->data[0] +
2122              (mb_y * 16 * wrap_y)              + mb_x * 16;
2123     ptr_cb = s->new_picture.f->data[1] +
2124              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2125     ptr_cr = s->new_picture.f->data[2] +
2126              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2127
2128     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2129         uint8_t *ebuf = s->sc.edge_emu_buffer + 38 * wrap_y;
2130         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2131         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2132         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2133                                  wrap_y, wrap_y,
2134                                  16, 16, mb_x * 16, mb_y * 16,
2135                                  s->width, s->height);
2136         ptr_y = ebuf;
2137         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2138                                  wrap_c, wrap_c,
2139                                  mb_block_width, mb_block_height,
2140                                  mb_x * mb_block_width, mb_y * mb_block_height,
2141                                  cw, ch);
2142         ptr_cb = ebuf + 16 * wrap_y;
2143         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2144                                  wrap_c, wrap_c,
2145                                  mb_block_width, mb_block_height,
2146                                  mb_x * mb_block_width, mb_y * mb_block_height,
2147                                  cw, ch);
2148         ptr_cr = ebuf + 16 * wrap_y + 16;
2149     }
2150
2151     if (s->mb_intra) {
2152         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2153             int progressive_score, interlaced_score;
2154
2155             s->interlaced_dct = 0;
2156             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2157                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2158                                                      NULL, wrap_y, 8) - 400;
2159
2160             if (progressive_score > 0) {
2161                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2162                                                         NULL, wrap_y * 2, 8) +
2163                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2164                                                         NULL, wrap_y * 2, 8);
2165                 if (progressive_score > interlaced_score) {
2166                     s->interlaced_dct = 1;
2167
2168                     dct_offset = wrap_y;
2169                     uv_dct_offset = wrap_c;
2170                     wrap_y <<= 1;
2171                     if (s->chroma_format == CHROMA_422 ||
2172                         s->chroma_format == CHROMA_444)
2173                         wrap_c <<= 1;
2174                 }
2175             }
2176         }
2177
2178         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2179         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2180         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2181         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2182
2183         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2184             skip_dct[4] = 1;
2185             skip_dct[5] = 1;
2186         } else {
2187             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2188             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2189             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2190                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2191                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2192             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2193                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2194                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2195                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2196                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2197                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2198                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2199             }
2200         }
2201     } else {
2202         op_pixels_func (*op_pix)[4];
2203         qpel_mc_func (*op_qpix)[16];
2204         uint8_t *dest_y, *dest_cb, *dest_cr;
2205
2206         dest_y  = s->dest[0];
2207         dest_cb = s->dest[1];
2208         dest_cr = s->dest[2];
2209
2210         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2211             op_pix  = s->hdsp.put_pixels_tab;
2212             op_qpix = s->qdsp.put_qpel_pixels_tab;
2213         } else {
2214             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2215             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2216         }
2217
2218         if (s->mv_dir & MV_DIR_FORWARD) {
2219             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2220                           s->last_picture.f->data,
2221                           op_pix, op_qpix);
2222             op_pix  = s->hdsp.avg_pixels_tab;
2223             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2224         }
2225         if (s->mv_dir & MV_DIR_BACKWARD) {
2226             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2227                           s->next_picture.f->data,
2228                           op_pix, op_qpix);
2229         }
2230
2231         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2232             int progressive_score, interlaced_score;
2233
2234             s->interlaced_dct = 0;
2235             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2236                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2237                                                      ptr_y + wrap_y * 8,
2238                                                      wrap_y, 8) - 400;
2239
2240             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2241                 progressive_score -= 400;
2242
2243             if (progressive_score > 0) {
2244                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2245                                                         wrap_y * 2, 8) +
2246                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2247                                                         ptr_y + wrap_y,
2248                                                         wrap_y * 2, 8);
2249
2250                 if (progressive_score > interlaced_score) {
2251                     s->interlaced_dct = 1;
2252
2253                     dct_offset = wrap_y;
2254                     uv_dct_offset = wrap_c;
2255                     wrap_y <<= 1;
2256                     if (s->chroma_format == CHROMA_422)
2257                         wrap_c <<= 1;
2258                 }
2259             }
2260         }
2261
2262         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2263         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2264         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2265                             dest_y + dct_offset, wrap_y);
2266         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2267                             dest_y + dct_offset + 8, wrap_y);
2268
2269         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2270             skip_dct[4] = 1;
2271             skip_dct[5] = 1;
2272         } else {
2273             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2274             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2275             if (!s->chroma_y_shift) { /* 422 */
2276                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2277                                     dest_cb + uv_dct_offset, wrap_c);
2278                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2279                                     dest_cr + uv_dct_offset, wrap_c);
2280             }
2281         }
2282         /* pre quantization */
2283         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2284                 2 * s->qscale * s->qscale) {
2285             // FIXME optimize
2286             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2287                 skip_dct[0] = 1;
2288             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2289                 skip_dct[1] = 1;
2290             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2291                                wrap_y, 8) < 20 * s->qscale)
2292                 skip_dct[2] = 1;
2293             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2294                                wrap_y, 8) < 20 * s->qscale)
2295                 skip_dct[3] = 1;
2296             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2297                 skip_dct[4] = 1;
2298             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2299                 skip_dct[5] = 1;
2300             if (!s->chroma_y_shift) { /* 422 */
2301                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2302                                    dest_cb + uv_dct_offset,
2303                                    wrap_c, 8) < 20 * s->qscale)
2304                     skip_dct[6] = 1;
2305                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2306                                    dest_cr + uv_dct_offset,
2307                                    wrap_c, 8) < 20 * s->qscale)
2308                     skip_dct[7] = 1;
2309             }
2310         }
2311     }
2312
2313     if (s->quantizer_noise_shaping) {
2314         if (!skip_dct[0])
2315             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2316         if (!skip_dct[1])
2317             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2318         if (!skip_dct[2])
2319             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2320         if (!skip_dct[3])
2321             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2322         if (!skip_dct[4])
2323             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2324         if (!skip_dct[5])
2325             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2326         if (!s->chroma_y_shift) { /* 422 */
2327             if (!skip_dct[6])
2328                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2329                                   wrap_c);
2330             if (!skip_dct[7])
2331                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2332                                   wrap_c);
2333         }
2334         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2335     }
2336
2337     /* DCT & quantize */
2338     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2339     {
2340         for (i = 0; i < mb_block_count; i++) {
2341             if (!skip_dct[i]) {
2342                 int overflow;
2343                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2344                 // FIXME we could decide to change to quantizer instead of
2345                 // clipping
2346                 // JS: I don't think that would be a good idea it could lower
2347                 //     quality instead of improve it. Just INTRADC clipping
2348                 //     deserves changes in quantizer
2349                 if (overflow)
2350                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2351             } else
2352                 s->block_last_index[i] = -1;
2353         }
2354         if (s->quantizer_noise_shaping) {
2355             for (i = 0; i < mb_block_count; i++) {
2356                 if (!skip_dct[i]) {
2357                     s->block_last_index[i] =
2358                         dct_quantize_refine(s, s->block[i], weight[i],
2359                                             orig[i], i, s->qscale);
2360                 }
2361             }
2362         }
2363
2364         if (s->luma_elim_threshold && !s->mb_intra)
2365             for (i = 0; i < 4; i++)
2366                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2367         if (s->chroma_elim_threshold && !s->mb_intra)
2368             for (i = 4; i < mb_block_count; i++)
2369                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2370
2371         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2372             for (i = 0; i < mb_block_count; i++) {
2373                 if (s->block_last_index[i] == -1)
2374                     s->coded_score[i] = INT_MAX / 256;
2375             }
2376         }
2377     }
2378
2379     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2380         s->block_last_index[4] =
2381         s->block_last_index[5] = 0;
2382         s->block[4][0] =
2383         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2384         if (!s->chroma_y_shift) { /* 422 / 444 */
2385             for (i=6; i<12; i++) {
2386                 s->block_last_index[i] = 0;
2387                 s->block[i][0] = s->block[4][0];
2388             }
2389         }
2390     }
2391
2392     // non c quantize code returns incorrect block_last_index FIXME
2393     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2394         for (i = 0; i < mb_block_count; i++) {
2395             int j;
2396             if (s->block_last_index[i] > 0) {
2397                 for (j = 63; j > 0; j--) {
2398                     if (s->block[i][s->intra_scantable.permutated[j]])
2399                         break;
2400                 }
2401                 s->block_last_index[i] = j;
2402             }
2403         }
2404     }
2405
2406     /* huffman encode */
2407     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2408     case AV_CODEC_ID_MPEG1VIDEO:
2409     case AV_CODEC_ID_MPEG2VIDEO:
2410         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2411             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2412         break;
2413     case AV_CODEC_ID_MPEG4:
2414         if (CONFIG_MPEG4_ENCODER)
2415             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2416         break;
2417     case AV_CODEC_ID_MSMPEG4V2:
2418     case AV_CODEC_ID_MSMPEG4V3:
2419     case AV_CODEC_ID_WMV1:
2420         if (CONFIG_MSMPEG4_ENCODER)
2421             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2422         break;
2423     case AV_CODEC_ID_WMV2:
2424         if (CONFIG_WMV2_ENCODER)
2425             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2426         break;
2427     case AV_CODEC_ID_H261:
2428         if (CONFIG_H261_ENCODER)
2429             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2430         break;
2431     case AV_CODEC_ID_H263:
2432     case AV_CODEC_ID_H263P:
2433     case AV_CODEC_ID_FLV1:
2434     case AV_CODEC_ID_RV10:
2435     case AV_CODEC_ID_RV20:
2436         if (CONFIG_H263_ENCODER)
2437             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2438         break;
2439 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
2440     case AV_CODEC_ID_MJPEG:
2441     case AV_CODEC_ID_AMV:
2442         ff_mjpeg_encode_mb(s, s->block);
2443         break;
2444 #endif
2445     case AV_CODEC_ID_SPEEDHQ:
2446         if (CONFIG_SPEEDHQ_ENCODER)
2447             ff_speedhq_encode_mb(s, s->block);
2448         break;
2449     default:
2450         av_assert1(0);
2451     }
2452 }
2453
2454 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2455 {
2456     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2457     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2458     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2459 }
2460
2461 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2462     int i;
2463
2464     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2465
2466     /* MPEG-1 */
2467     d->mb_skip_run= s->mb_skip_run;
2468     for(i=0; i<3; i++)
2469         d->last_dc[i] = s->last_dc[i];
2470
2471     /* statistics */
2472     d->mv_bits= s->mv_bits;
2473     d->i_tex_bits= s->i_tex_bits;
2474     d->p_tex_bits= s->p_tex_bits;
2475     d->i_count= s->i_count;
2476     d->f_count= s->f_count;
2477     d->b_count= s->b_count;
2478     d->skip_count= s->skip_count;
2479     d->misc_bits= s->misc_bits;
2480     d->last_bits= 0;
2481
2482     d->mb_skipped= 0;
2483     d->qscale= s->qscale;
2484     d->dquant= s->dquant;
2485
2486     d->esc3_level_length= s->esc3_level_length;
2487 }
2488
2489 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2490     int i;
2491
2492     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2493     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2494
2495     /* MPEG-1 */
2496     d->mb_skip_run= s->mb_skip_run;
2497     for(i=0; i<3; i++)
2498         d->last_dc[i] = s->last_dc[i];
2499
2500     /* statistics */
2501     d->mv_bits= s->mv_bits;
2502     d->i_tex_bits= s->i_tex_bits;
2503     d->p_tex_bits= s->p_tex_bits;
2504     d->i_count= s->i_count;
2505     d->f_count= s->f_count;
2506     d->b_count= s->b_count;
2507     d->skip_count= s->skip_count;
2508     d->misc_bits= s->misc_bits;
2509
2510     d->mb_intra= s->mb_intra;
2511     d->mb_skipped= s->mb_skipped;
2512     d->mv_type= s->mv_type;
2513     d->mv_dir= s->mv_dir;
2514     d->pb= s->pb;
2515     if(s->data_partitioning){
2516         d->pb2= s->pb2;
2517         d->tex_pb= s->tex_pb;
2518     }
2519     d->block= s->block;
2520     for(i=0; i<8; i++)
2521         d->block_last_index[i]= s->block_last_index[i];
2522     d->interlaced_dct= s->interlaced_dct;
2523     d->qscale= s->qscale;
2524
2525     d->esc3_level_length= s->esc3_level_length;
2526 }
2527
2528 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2529                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2530                            int *dmin, int *next_block, int motion_x, int motion_y)
2531 {
2532     int score;
2533     uint8_t *dest_backup[3];
2534
2535     copy_context_before_encode(s, backup, type);
2536
2537     s->block= s->blocks[*next_block];
2538     s->pb= pb[*next_block];
2539     if(s->data_partitioning){
2540         s->pb2   = pb2   [*next_block];
2541         s->tex_pb= tex_pb[*next_block];
2542     }
2543
2544     if(*next_block){
2545         memcpy(dest_backup, s->dest, sizeof(s->dest));
2546         s->dest[0] = s->sc.rd_scratchpad;
2547         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2548         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2549         av_assert0(s->linesize >= 32); //FIXME
2550     }
2551
2552     encode_mb(s, motion_x, motion_y);
2553
2554     score= put_bits_count(&s->pb);
2555     if(s->data_partitioning){
2556         score+= put_bits_count(&s->pb2);
2557         score+= put_bits_count(&s->tex_pb);
2558     }
2559
2560     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2561         ff_mpv_reconstruct_mb(s, s->block);
2562
2563         score *= s->lambda2;
2564         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2565     }
2566
2567     if(*next_block){
2568         memcpy(s->dest, dest_backup, sizeof(s->dest));
2569     }
2570
2571     if(score<*dmin){
2572         *dmin= score;
2573         *next_block^=1;
2574
2575         copy_context_after_encode(best, s, type);
2576     }
2577 }
2578
2579 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2580     const uint32_t *sq = ff_square_tab + 256;
2581     int acc=0;
2582     int x,y;
2583
2584     if(w==16 && h==16)
2585         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2586     else if(w==8 && h==8)
2587         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2588
2589     for(y=0; y<h; y++){
2590         for(x=0; x<w; x++){
2591             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2592         }
2593     }
2594
2595     av_assert2(acc>=0);
2596
2597     return acc;
2598 }
2599
2600 static int sse_mb(MpegEncContext *s){
2601     int w= 16;
2602     int h= 16;
2603
2604     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2605     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2606
2607     if(w==16 && h==16)
2608       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2609         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2610                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2611                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2612       }else{
2613         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2614                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2615                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2616       }
2617     else
2618         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2619                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2620                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2621 }
2622
2623 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2624     MpegEncContext *s= *(void**)arg;
2625
2626
2627     s->me.pre_pass=1;
2628     s->me.dia_size= s->avctx->pre_dia_size;
2629     s->first_slice_line=1;
2630     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2631         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2632             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2633         }
2634         s->first_slice_line=0;
2635     }
2636
2637     s->me.pre_pass=0;
2638
2639     return 0;
2640 }
2641
2642 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2643     MpegEncContext *s= *(void**)arg;
2644
2645     s->me.dia_size= s->avctx->dia_size;
2646     s->first_slice_line=1;
2647     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2648         s->mb_x=0; //for block init below
2649         ff_init_block_index(s);
2650         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2651             s->block_index[0]+=2;
2652             s->block_index[1]+=2;
2653             s->block_index[2]+=2;
2654             s->block_index[3]+=2;
2655
2656             /* compute motion vector & mb_type and store in context */
2657             if(s->pict_type==AV_PICTURE_TYPE_B)
2658                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2659             else
2660                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2661         }
2662         s->first_slice_line=0;
2663     }
2664     return 0;
2665 }
2666
2667 static int mb_var_thread(AVCodecContext *c, void *arg){
2668     MpegEncContext *s= *(void**)arg;
2669     int mb_x, mb_y;
2670
2671     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2672         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2673             int xx = mb_x * 16;
2674             int yy = mb_y * 16;
2675             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2676             int varc;
2677             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2678
2679             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2680                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2681
2682             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2683             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2684             s->me.mb_var_sum_temp    += varc;
2685         }
2686     }
2687     return 0;
2688 }
2689
2690 static void write_slice_end(MpegEncContext *s){
2691     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2692         if(s->partitioned_frame){
2693             ff_mpeg4_merge_partitions(s);
2694         }
2695
2696         ff_mpeg4_stuffing(&s->pb);
2697     } else if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
2698                s->out_format == FMT_MJPEG) {
2699         ff_mjpeg_encode_stuffing(s);
2700     } else if (CONFIG_SPEEDHQ_ENCODER && s->out_format == FMT_SPEEDHQ) {
2701         ff_speedhq_end_slice(s);
2702     }
2703
2704     flush_put_bits(&s->pb);
2705
2706     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2707         s->misc_bits+= get_bits_diff(s);
2708 }
2709
2710 static void write_mb_info(MpegEncContext *s)
2711 {
2712     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2713     int offset = put_bits_count(&s->pb);
2714     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2715     int gobn = s->mb_y / s->gob_index;
2716     int pred_x, pred_y;
2717     if (CONFIG_H263_ENCODER)
2718         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2719     bytestream_put_le32(&ptr, offset);
2720     bytestream_put_byte(&ptr, s->qscale);
2721     bytestream_put_byte(&ptr, gobn);
2722     bytestream_put_le16(&ptr, mba);
2723     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2724     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2725     /* 4MV not implemented */
2726     bytestream_put_byte(&ptr, 0); /* hmv2 */
2727     bytestream_put_byte(&ptr, 0); /* vmv2 */
2728 }
2729
2730 static void update_mb_info(MpegEncContext *s, int startcode)
2731 {
2732     if (!s->mb_info)
2733         return;
2734     if (put_bytes_count(&s->pb, 0) - s->prev_mb_info >= s->mb_info) {
2735         s->mb_info_size += 12;
2736         s->prev_mb_info = s->last_mb_info;
2737     }
2738     if (startcode) {
2739         s->prev_mb_info = put_bytes_count(&s->pb, 0);
2740         /* This might have incremented mb_info_size above, and we return without
2741          * actually writing any info into that slot yet. But in that case,
2742          * this will be called again at the start of the after writing the
2743          * start code, actually writing the mb info. */
2744         return;
2745     }
2746
2747     s->last_mb_info = put_bytes_count(&s->pb, 0);
2748     if (!s->mb_info_size)
2749         s->mb_info_size += 12;
2750     write_mb_info(s);
2751 }
2752
2753 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2754 {
2755     if (put_bytes_left(&s->pb, 0) < threshold
2756         && s->slice_context_count == 1
2757         && s->pb.buf == s->avctx->internal->byte_buffer) {
2758         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2759         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2760
2761         uint8_t *new_buffer = NULL;
2762         int new_buffer_size = 0;
2763
2764         if ((s->avctx->internal->byte_buffer_size + size_increase) >= INT_MAX/8) {
2765             av_log(s->avctx, AV_LOG_ERROR, "Cannot reallocate putbit buffer\n");
2766             return AVERROR(ENOMEM);
2767         }
2768
2769         emms_c();
2770
2771         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2772                               s->avctx->internal->byte_buffer_size + size_increase);
2773         if (!new_buffer)
2774             return AVERROR(ENOMEM);
2775
2776         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2777         av_free(s->avctx->internal->byte_buffer);
2778         s->avctx->internal->byte_buffer      = new_buffer;
2779         s->avctx->internal->byte_buffer_size = new_buffer_size;
2780         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2781         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2782         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2783     }
2784     if (put_bytes_left(&s->pb, 0) < threshold)
2785         return AVERROR(EINVAL);
2786     return 0;
2787 }
2788
2789 static int encode_thread(AVCodecContext *c, void *arg){
2790     MpegEncContext *s= *(void**)arg;
2791     int mb_x, mb_y, mb_y_order;
2792     int chr_h= 16>>s->chroma_y_shift;
2793     int i, j;
2794     MpegEncContext best_s = { 0 }, backup_s;
2795     uint8_t bit_buf[2][MAX_MB_BYTES];
2796     uint8_t bit_buf2[2][MAX_MB_BYTES];
2797     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2798     PutBitContext pb[2], pb2[2], tex_pb[2];
2799
2800     for(i=0; i<2; i++){
2801         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2802         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2803         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2804     }
2805
2806     s->last_bits= put_bits_count(&s->pb);
2807     s->mv_bits=0;
2808     s->misc_bits=0;
2809     s->i_tex_bits=0;
2810     s->p_tex_bits=0;
2811     s->i_count=0;
2812     s->f_count=0;
2813     s->b_count=0;
2814     s->skip_count=0;
2815
2816     for(i=0; i<3; i++){
2817         /* init last dc values */
2818         /* note: quant matrix value (8) is implied here */
2819         s->last_dc[i] = 128 << s->intra_dc_precision;
2820
2821         s->current_picture.encoding_error[i] = 0;
2822     }
2823     if(s->codec_id==AV_CODEC_ID_AMV){
2824         s->last_dc[0] = 128*8/13;
2825         s->last_dc[1] = 128*8/14;
2826         s->last_dc[2] = 128*8/14;
2827     }
2828     s->mb_skip_run = 0;
2829     memset(s->last_mv, 0, sizeof(s->last_mv));
2830
2831     s->last_mv_dir = 0;
2832
2833     switch(s->codec_id){
2834     case AV_CODEC_ID_H263:
2835     case AV_CODEC_ID_H263P:
2836     case AV_CODEC_ID_FLV1:
2837         if (CONFIG_H263_ENCODER)
2838             s->gob_index = H263_GOB_HEIGHT(s->height);
2839         break;
2840     case AV_CODEC_ID_MPEG4:
2841         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2842             ff_mpeg4_init_partitions(s);
2843         break;
2844     }
2845
2846     s->resync_mb_x=0;
2847     s->resync_mb_y=0;
2848     s->first_slice_line = 1;
2849     s->ptr_lastgob = s->pb.buf;
2850     for (mb_y_order = s->start_mb_y; mb_y_order < s->end_mb_y; mb_y_order++) {
2851         if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
2852             int first_in_slice;
2853             mb_y = ff_speedhq_mb_y_order_to_mb(mb_y_order, s->mb_height, &first_in_slice);
2854             if (first_in_slice && mb_y_order != s->start_mb_y)
2855                 ff_speedhq_end_slice(s);
2856             s->last_dc[0] = s->last_dc[1] = s->last_dc[2] = 1024 << s->intra_dc_precision;
2857         } else {
2858             mb_y = mb_y_order;
2859         }
2860         s->mb_x=0;
2861         s->mb_y= mb_y;
2862
2863         ff_set_qscale(s, s->qscale);
2864         ff_init_block_index(s);
2865
2866         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2867             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2868             int mb_type= s->mb_type[xy];
2869 //            int d;
2870             int dmin= INT_MAX;
2871             int dir;
2872             int size_increase =  s->avctx->internal->byte_buffer_size/4
2873                                + s->mb_width*MAX_MB_BYTES;
2874
2875             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2876             if (put_bytes_left(&s->pb, 0) < MAX_MB_BYTES){
2877                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2878                 return -1;
2879             }
2880             if(s->data_partitioning){
2881                 if (put_bytes_left(&s->pb2,    0) < MAX_MB_BYTES ||
2882                     put_bytes_left(&s->tex_pb, 0) < MAX_MB_BYTES) {
2883                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2884                     return -1;
2885                 }
2886             }
2887
2888             s->mb_x = mb_x;
2889             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2890             ff_update_block_index(s);
2891
2892             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2893                 ff_h261_reorder_mb_index(s);
2894                 xy= s->mb_y*s->mb_stride + s->mb_x;
2895                 mb_type= s->mb_type[xy];
2896             }
2897
2898             /* write gob / video packet header  */
2899             if(s->rtp_mode){
2900                 int current_packet_size, is_gob_start;
2901
2902                 current_packet_size = put_bytes_count(&s->pb, 1)
2903                                       - (s->ptr_lastgob - s->pb.buf);
2904
2905                 is_gob_start = s->rtp_payload_size &&
2906                                current_packet_size >= s->rtp_payload_size &&
2907                                mb_y + mb_x > 0;
2908
2909                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2910
2911                 switch(s->codec_id){
2912                 case AV_CODEC_ID_H263:
2913                 case AV_CODEC_ID_H263P:
2914                     if(!s->h263_slice_structured)
2915                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2916                     break;
2917                 case AV_CODEC_ID_MPEG2VIDEO:
2918                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2919                 case AV_CODEC_ID_MPEG1VIDEO:
2920                     if(s->mb_skip_run) is_gob_start=0;
2921                     break;
2922                 case AV_CODEC_ID_MJPEG:
2923                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2924                     break;
2925                 }
2926
2927                 if(is_gob_start){
2928                     if(s->start_mb_y != mb_y || mb_x!=0){
2929                         write_slice_end(s);
2930
2931                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2932                             ff_mpeg4_init_partitions(s);
2933                         }
2934                     }
2935
2936                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2937                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2938
2939                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2940                         int r = put_bytes_count(&s->pb, 0) + s->picture_number + 16 + s->mb_x + s->mb_y;
2941                         int d = 100 / s->error_rate;
2942                         if(r % d == 0){
2943                             current_packet_size=0;
2944                             s->pb.buf_ptr= s->ptr_lastgob;
2945                             av_assert1(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2946                         }
2947                     }
2948
2949 #if FF_API_RTP_CALLBACK
2950 FF_DISABLE_DEPRECATION_WARNINGS
2951                     if (s->avctx->rtp_callback){
2952                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2953                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2954                     }
2955 FF_ENABLE_DEPRECATION_WARNINGS
2956 #endif
2957                     update_mb_info(s, 1);
2958
2959                     switch(s->codec_id){
2960                     case AV_CODEC_ID_MPEG4:
2961                         if (CONFIG_MPEG4_ENCODER) {
2962                             ff_mpeg4_encode_video_packet_header(s);
2963                             ff_mpeg4_clean_buffers(s);
2964                         }
2965                     break;
2966                     case AV_CODEC_ID_MPEG1VIDEO:
2967                     case AV_CODEC_ID_MPEG2VIDEO:
2968                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2969                             ff_mpeg1_encode_slice_header(s);
2970                             ff_mpeg1_clean_buffers(s);
2971                         }
2972                     break;
2973                     case AV_CODEC_ID_H263:
2974                     case AV_CODEC_ID_H263P:
2975                         if (CONFIG_H263_ENCODER)
2976                             ff_h263_encode_gob_header(s, mb_y);
2977                     break;
2978                     }
2979
2980                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2981                         int bits= put_bits_count(&s->pb);
2982                         s->misc_bits+= bits - s->last_bits;
2983                         s->last_bits= bits;
2984                     }
2985
2986                     s->ptr_lastgob += current_packet_size;
2987                     s->first_slice_line=1;
2988                     s->resync_mb_x=mb_x;
2989                     s->resync_mb_y=mb_y;
2990                 }
2991             }
2992
2993             if(  (s->resync_mb_x   == s->mb_x)
2994                && s->resync_mb_y+1 == s->mb_y){
2995                 s->first_slice_line=0;
2996             }
2997
2998             s->mb_skipped=0;
2999             s->dquant=0; //only for QP_RD
3000
3001             update_mb_info(s, 0);
3002
3003             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
3004                 int next_block=0;
3005                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3006
3007                 copy_context_before_encode(&backup_s, s, -1);
3008                 backup_s.pb= s->pb;
3009                 best_s.data_partitioning= s->data_partitioning;
3010                 best_s.partitioned_frame= s->partitioned_frame;
3011                 if(s->data_partitioning){
3012                     backup_s.pb2= s->pb2;
3013                     backup_s.tex_pb= s->tex_pb;
3014                 }
3015
3016                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
3017                     s->mv_dir = MV_DIR_FORWARD;
3018                     s->mv_type = MV_TYPE_16X16;
3019                     s->mb_intra= 0;
3020                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3021                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3022                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
3023                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3024                 }
3025                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
3026                     s->mv_dir = MV_DIR_FORWARD;
3027                     s->mv_type = MV_TYPE_FIELD;
3028                     s->mb_intra= 0;
3029                     for(i=0; i<2; i++){
3030                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3031                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3032                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3033                     }
3034                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3035                                  &dmin, &next_block, 0, 0);
3036                 }
3037                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3038                     s->mv_dir = MV_DIR_FORWARD;
3039                     s->mv_type = MV_TYPE_16X16;
3040                     s->mb_intra= 0;
3041                     s->mv[0][0][0] = 0;
3042                     s->mv[0][0][1] = 0;
3043                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3044                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3045                 }
3046                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3047                     s->mv_dir = MV_DIR_FORWARD;
3048                     s->mv_type = MV_TYPE_8X8;
3049                     s->mb_intra= 0;
3050                     for(i=0; i<4; i++){
3051                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3052                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3053                     }
3054                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3055                                  &dmin, &next_block, 0, 0);
3056                 }
3057                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3058                     s->mv_dir = MV_DIR_FORWARD;
3059                     s->mv_type = MV_TYPE_16X16;
3060                     s->mb_intra= 0;
3061                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3062                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3063                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3064                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3065                 }
3066                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3067                     s->mv_dir = MV_DIR_BACKWARD;
3068                     s->mv_type = MV_TYPE_16X16;
3069                     s->mb_intra= 0;
3070                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3071                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3072                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3073                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3074                 }
3075                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3076                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3077                     s->mv_type = MV_TYPE_16X16;
3078                     s->mb_intra= 0;
3079                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3080                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3081                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3082                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3083                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3084                                  &dmin, &next_block, 0, 0);
3085                 }
3086                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3087                     s->mv_dir = MV_DIR_FORWARD;
3088                     s->mv_type = MV_TYPE_FIELD;
3089                     s->mb_intra= 0;
3090                     for(i=0; i<2; i++){
3091                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3092                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3093                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3094                     }
3095                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3096                                  &dmin, &next_block, 0, 0);
3097                 }
3098                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3099                     s->mv_dir = MV_DIR_BACKWARD;
3100                     s->mv_type = MV_TYPE_FIELD;
3101                     s->mb_intra= 0;
3102                     for(i=0; i<2; i++){
3103                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3104                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3105                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3106                     }
3107                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3108                                  &dmin, &next_block, 0, 0);
3109                 }
3110                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3111                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3112                     s->mv_type = MV_TYPE_FIELD;
3113                     s->mb_intra= 0;
3114                     for(dir=0; dir<2; dir++){
3115                         for(i=0; i<2; i++){
3116                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3117                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3118                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3119                         }
3120                     }
3121                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3122                                  &dmin, &next_block, 0, 0);
3123                 }
3124                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3125                     s->mv_dir = 0;
3126                     s->mv_type = MV_TYPE_16X16;
3127                     s->mb_intra= 1;
3128                     s->mv[0][0][0] = 0;
3129                     s->mv[0][0][1] = 0;
3130                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3131                                  &dmin, &next_block, 0, 0);
3132                     if(s->h263_pred || s->h263_aic){
3133                         if(best_s.mb_intra)
3134                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3135                         else
3136                             ff_clean_intra_table_entries(s); //old mode?
3137                     }
3138                 }
3139
3140                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3141                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3142                         const int last_qp= backup_s.qscale;
3143                         int qpi, qp, dc[6];
3144                         int16_t ac[6][16];
3145                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3146                         static const int dquant_tab[4]={-1,1,-2,2};
3147                         int storecoefs = s->mb_intra && s->dc_val[0];
3148
3149                         av_assert2(backup_s.dquant == 0);
3150
3151                         //FIXME intra
3152                         s->mv_dir= best_s.mv_dir;
3153                         s->mv_type = MV_TYPE_16X16;
3154                         s->mb_intra= best_s.mb_intra;
3155                         s->mv[0][0][0] = best_s.mv[0][0][0];
3156                         s->mv[0][0][1] = best_s.mv[0][0][1];
3157                         s->mv[1][0][0] = best_s.mv[1][0][0];
3158                         s->mv[1][0][1] = best_s.mv[1][0][1];
3159
3160                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3161                         for(; qpi<4; qpi++){
3162                             int dquant= dquant_tab[qpi];
3163                             qp= last_qp + dquant;
3164                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3165                                 continue;
3166                             backup_s.dquant= dquant;
3167                             if(storecoefs){
3168                                 for(i=0; i<6; i++){
3169                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3170                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3171                                 }
3172                             }
3173
3174                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3175                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3176                             if(best_s.qscale != qp){
3177                                 if(storecoefs){
3178                                     for(i=0; i<6; i++){
3179                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3180                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3181                                     }
3182                                 }
3183                             }
3184                         }
3185                     }
3186                 }
3187                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3188                     int mx= s->b_direct_mv_table[xy][0];
3189                     int my= s->b_direct_mv_table[xy][1];
3190
3191                     backup_s.dquant = 0;
3192                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3193                     s->mb_intra= 0;
3194                     ff_mpeg4_set_direct_mv(s, mx, my);
3195                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3196                                  &dmin, &next_block, mx, my);
3197                 }
3198                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3199                     backup_s.dquant = 0;
3200                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3201                     s->mb_intra= 0;
3202                     ff_mpeg4_set_direct_mv(s, 0, 0);
3203                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3204                                  &dmin, &next_block, 0, 0);
3205                 }
3206                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3207                     int coded=0;
3208                     for(i=0; i<6; i++)
3209                         coded |= s->block_last_index[i];
3210                     if(coded){
3211                         int mx,my;
3212                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3213                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3214                             mx=my=0; //FIXME find the one we actually used
3215                             ff_mpeg4_set_direct_mv(s, mx, my);
3216                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3217                             mx= s->mv[1][0][0];
3218                             my= s->mv[1][0][1];
3219                         }else{
3220                             mx= s->mv[0][0][0];
3221                             my= s->mv[0][0][1];
3222                         }
3223
3224                         s->mv_dir= best_s.mv_dir;
3225                         s->mv_type = best_s.mv_type;
3226                         s->mb_intra= 0;
3227 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3228                         s->mv[0][0][1] = best_s.mv[0][0][1];
3229                         s->mv[1][0][0] = best_s.mv[1][0][0];
3230                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3231                         backup_s.dquant= 0;
3232                         s->skipdct=1;
3233                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3234                                         &dmin, &next_block, mx, my);
3235                         s->skipdct=0;
3236                     }
3237                 }
3238
3239                 s->current_picture.qscale_table[xy] = best_s.qscale;
3240
3241                 copy_context_after_encode(s, &best_s, -1);
3242
3243                 pb_bits_count= put_bits_count(&s->pb);
3244                 flush_put_bits(&s->pb);
3245                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3246                 s->pb= backup_s.pb;
3247
3248                 if(s->data_partitioning){
3249                     pb2_bits_count= put_bits_count(&s->pb2);
3250                     flush_put_bits(&s->pb2);
3251                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3252                     s->pb2= backup_s.pb2;
3253
3254                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3255                     flush_put_bits(&s->tex_pb);
3256                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3257                     s->tex_pb= backup_s.tex_pb;
3258                 }
3259                 s->last_bits= put_bits_count(&s->pb);
3260
3261                 if (CONFIG_H263_ENCODER &&
3262                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3263                     ff_h263_update_motion_val(s);
3264
3265                 if(next_block==0){ //FIXME 16 vs linesize16
3266                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3267                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3268                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3269                 }
3270
3271                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3272                     ff_mpv_reconstruct_mb(s, s->block);
3273             } else {
3274                 int motion_x = 0, motion_y = 0;
3275                 s->mv_type=MV_TYPE_16X16;
3276                 // only one MB-Type possible
3277
3278                 switch(mb_type){
3279                 case CANDIDATE_MB_TYPE_INTRA:
3280                     s->mv_dir = 0;
3281                     s->mb_intra= 1;
3282                     motion_x= s->mv[0][0][0] = 0;
3283                     motion_y= s->mv[0][0][1] = 0;
3284                     break;
3285                 case CANDIDATE_MB_TYPE_INTER:
3286                     s->mv_dir = MV_DIR_FORWARD;
3287                     s->mb_intra= 0;
3288                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3289                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3290                     break;
3291                 case CANDIDATE_MB_TYPE_INTER_I:
3292                     s->mv_dir = MV_DIR_FORWARD;
3293                     s->mv_type = MV_TYPE_FIELD;
3294                     s->mb_intra= 0;
3295                     for(i=0; i<2; i++){
3296                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3297                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3298                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3299                     }
3300                     break;
3301                 case CANDIDATE_MB_TYPE_INTER4V:
3302                     s->mv_dir = MV_DIR_FORWARD;
3303                     s->mv_type = MV_TYPE_8X8;
3304                     s->mb_intra= 0;
3305                     for(i=0; i<4; i++){
3306                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3307                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3308                     }
3309                     break;
3310                 case CANDIDATE_MB_TYPE_DIRECT:
3311                     if (CONFIG_MPEG4_ENCODER) {
3312                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3313                         s->mb_intra= 0;
3314                         motion_x=s->b_direct_mv_table[xy][0];
3315                         motion_y=s->b_direct_mv_table[xy][1];
3316                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3317                     }
3318                     break;
3319                 case CANDIDATE_MB_TYPE_DIRECT0:
3320                     if (CONFIG_MPEG4_ENCODER) {
3321                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3322                         s->mb_intra= 0;
3323                         ff_mpeg4_set_direct_mv(s, 0, 0);
3324                     }
3325                     break;
3326                 case CANDIDATE_MB_TYPE_BIDIR:
3327                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3328                     s->mb_intra= 0;
3329                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3330                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3331                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3332                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3333                     break;
3334                 case CANDIDATE_MB_TYPE_BACKWARD:
3335                     s->mv_dir = MV_DIR_BACKWARD;
3336                     s->mb_intra= 0;
3337                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3338                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3339                     break;
3340                 case CANDIDATE_MB_TYPE_FORWARD:
3341                     s->mv_dir = MV_DIR_FORWARD;
3342                     s->mb_intra= 0;
3343                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3344                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3345                     break;
3346                 case CANDIDATE_MB_TYPE_FORWARD_I:
3347                     s->mv_dir = MV_DIR_FORWARD;
3348                     s->mv_type = MV_TYPE_FIELD;
3349                     s->mb_intra= 0;
3350                     for(i=0; i<2; i++){
3351                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3352                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3353                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3354                     }
3355                     break;
3356                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3357                     s->mv_dir = MV_DIR_BACKWARD;
3358                     s->mv_type = MV_TYPE_FIELD;
3359                     s->mb_intra= 0;
3360                     for(i=0; i<2; i++){
3361                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3362                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3363                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3364                     }
3365                     break;
3366                 case CANDIDATE_MB_TYPE_BIDIR_I:
3367                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3368                     s->mv_type = MV_TYPE_FIELD;
3369                     s->mb_intra= 0;
3370                     for(dir=0; dir<2; dir++){
3371                         for(i=0; i<2; i++){
3372                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3373                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3374                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3375                         }
3376                     }
3377                     break;
3378                 default:
3379                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3380                 }
3381
3382                 encode_mb(s, motion_x, motion_y);
3383
3384                 // RAL: Update last macroblock type
3385                 s->last_mv_dir = s->mv_dir;
3386
3387                 if (CONFIG_H263_ENCODER &&
3388                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3389                     ff_h263_update_motion_val(s);
3390
3391                 ff_mpv_reconstruct_mb(s, s->block);
3392             }
3393
3394             /* clean the MV table in IPS frames for direct mode in B-frames */
3395             if(s->mb_intra /* && I,P,S_TYPE */){
3396                 s->p_mv_table[xy][0]=0;
3397                 s->p_mv_table[xy][1]=0;
3398             }
3399
3400             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3401                 int w= 16;
3402                 int h= 16;
3403
3404                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3405                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3406
3407                 s->current_picture.encoding_error[0] += sse(
3408                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3409                     s->dest[0], w, h, s->linesize);
3410                 s->current_picture.encoding_error[1] += sse(
3411                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3412                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3413                 s->current_picture.encoding_error[2] += sse(
3414                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3415                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3416             }
3417             if(s->loop_filter){
3418                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3419                     ff_h263_loop_filter(s);
3420             }
3421             ff_dlog(s->avctx, "MB %d %d bits\n",
3422                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3423         }
3424     }
3425
3426     //not beautiful here but we must write it before flushing so it has to be here
3427     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3428         ff_msmpeg4_encode_ext_header(s);
3429
3430     write_slice_end(s);
3431
3432 #if FF_API_RTP_CALLBACK
3433 FF_DISABLE_DEPRECATION_WARNINGS
3434     /* Send the last GOB if RTP */
3435     if (s->avctx->rtp_callback) {
3436         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3437         int pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3438         /* Call the RTP callback to send the last GOB */
3439         emms_c();
3440         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3441     }
3442 FF_ENABLE_DEPRECATION_WARNINGS
3443 #endif
3444
3445     return 0;
3446 }
3447
3448 #define MERGE(field) dst->field += src->field; src->field=0
3449 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3450     MERGE(me.scene_change_score);
3451     MERGE(me.mc_mb_var_sum_temp);
3452     MERGE(me.mb_var_sum_temp);
3453 }
3454
3455 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3456     int i;
3457
3458     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3459     MERGE(dct_count[1]);
3460     MERGE(mv_bits);
3461     MERGE(i_tex_bits);
3462     MERGE(p_tex_bits);
3463     MERGE(i_count);
3464     MERGE(f_count);
3465     MERGE(b_count);
3466     MERGE(skip_count);
3467     MERGE(misc_bits);
3468     MERGE(er.error_count);
3469     MERGE(padding_bug_score);
3470     MERGE(current_picture.encoding_error[0]);
3471     MERGE(current_picture.encoding_error[1]);
3472     MERGE(current_picture.encoding_error[2]);
3473
3474     if (dst->noise_reduction){
3475         for(i=0; i<64; i++){
3476             MERGE(dct_error_sum[0][i]);
3477             MERGE(dct_error_sum[1][i]);
3478         }
3479     }
3480
3481     av_assert1(put_bits_count(&src->pb) % 8 ==0);
3482     av_assert1(put_bits_count(&dst->pb) % 8 ==0);
3483     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3484     flush_put_bits(&dst->pb);
3485 }
3486
3487 static int estimate_qp(MpegEncContext *s, int dry_run){
3488     if (s->next_lambda){
3489         s->current_picture_ptr->f->quality =
3490         s->current_picture.f->quality = s->next_lambda;
3491         if(!dry_run) s->next_lambda= 0;
3492     } else if (!s->fixed_qscale) {
3493         int quality = ff_rate_estimate_qscale(s, dry_run);
3494         s->current_picture_ptr->f->quality =
3495         s->current_picture.f->quality = quality;
3496         if (s->current_picture.f->quality < 0)
3497             return -1;
3498     }
3499
3500     if(s->adaptive_quant){
3501         switch(s->codec_id){
3502         case AV_CODEC_ID_MPEG4:
3503             if (CONFIG_MPEG4_ENCODER)
3504                 ff_clean_mpeg4_qscales(s);
3505             break;
3506         case AV_CODEC_ID_H263:
3507         case AV_CODEC_ID_H263P:
3508         case AV_CODEC_ID_FLV1:
3509             if (CONFIG_H263_ENCODER)
3510                 ff_clean_h263_qscales(s);
3511             break;
3512         default:
3513             ff_init_qscale_tab(s);
3514         }
3515
3516         s->lambda= s->lambda_table[0];
3517         //FIXME broken
3518     }else
3519         s->lambda = s->current_picture.f->quality;
3520     update_qscale(s);
3521     return 0;
3522 }
3523
3524 /* must be called before writing the header */
3525 static void set_frame_distances(MpegEncContext * s){
3526     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3527     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3528
3529     if(s->pict_type==AV_PICTURE_TYPE_B){
3530         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3531         av_assert1(s->pb_time > 0 && s->pb_time < s->pp_time);
3532     }else{
3533         s->pp_time= s->time - s->last_non_b_time;
3534         s->last_non_b_time= s->time;
3535         av_assert1(s->picture_number==0 || s->pp_time > 0);
3536     }
3537 }
3538
3539 static int encode_picture(MpegEncContext *s, int picture_number)
3540 {
3541     int i, ret;
3542     int bits;
3543     int context_count = s->slice_context_count;
3544
3545     s->picture_number = picture_number;
3546
3547     /* Reset the average MB variance */
3548     s->me.mb_var_sum_temp    =
3549     s->me.mc_mb_var_sum_temp = 0;
3550
3551     /* we need to initialize some time vars before we can encode B-frames */
3552     // RAL: Condition added for MPEG1VIDEO
3553     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3554         set_frame_distances(s);
3555     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3556         ff_set_mpeg4_time(s);
3557
3558     s->me.scene_change_score=0;
3559
3560 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3561
3562     if(s->pict_type==AV_PICTURE_TYPE_I){
3563         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3564         else                        s->no_rounding=0;
3565     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3566         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3567             s->no_rounding ^= 1;
3568     }
3569
3570     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3571         if (estimate_qp(s,1) < 0)
3572             return -1;
3573         ff_get_2pass_fcode(s);
3574     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3575         if(s->pict_type==AV_PICTURE_TYPE_B)
3576             s->lambda= s->last_lambda_for[s->pict_type];
3577         else
3578             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3579         update_qscale(s);
3580     }
3581
3582     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3583         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3584         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3585         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3586         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3587     }
3588
3589     s->mb_intra=0; //for the rate distortion & bit compare functions
3590     for(i=1; i<context_count; i++){
3591         ret = ff_update_duplicate_context(s->thread_context[i], s);
3592         if (ret < 0)
3593             return ret;
3594     }
3595
3596     if(ff_init_me(s)<0)
3597         return -1;
3598
3599     /* Estimate motion for every MB */
3600     if(s->pict_type != AV_PICTURE_TYPE_I){
3601         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3602         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3603         if (s->pict_type != AV_PICTURE_TYPE_B) {
3604             if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3605                 s->me_pre == 2) {
3606                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3607             }
3608         }
3609
3610         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3611     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3612         /* I-Frame */
3613         for(i=0; i<s->mb_stride*s->mb_height; i++)
3614             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3615
3616         if(!s->fixed_qscale){
3617             /* finding spatial complexity for I-frame rate control */
3618             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3619         }
3620     }
3621     for(i=1; i<context_count; i++){
3622         merge_context_after_me(s, s->thread_context[i]);
3623     }
3624     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3625     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3626     emms_c();
3627
3628     if (s->me.scene_change_score > s->scenechange_threshold &&
3629         s->pict_type == AV_PICTURE_TYPE_P) {
3630         s->pict_type= AV_PICTURE_TYPE_I;
3631         for(i=0; i<s->mb_stride*s->mb_height; i++)
3632             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3633         if(s->msmpeg4_version >= 3)
3634             s->no_rounding=1;
3635         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3636                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3637     }
3638
3639     if(!s->umvplus){
3640         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3641             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3642
3643             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3644                 int a,b;
3645                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3646                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3647                 s->f_code= FFMAX3(s->f_code, a, b);
3648             }
3649
3650             ff_fix_long_p_mvs(s, s->intra_penalty ? CANDIDATE_MB_TYPE_INTER : CANDIDATE_MB_TYPE_INTRA);
3651             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, !!s->intra_penalty);
3652             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3653                 int j;
3654                 for(i=0; i<2; i++){
3655                     for(j=0; j<2; j++)
3656                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3657                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, !!s->intra_penalty);
3658                 }
3659             }
3660         }
3661
3662         if(s->pict_type==AV_PICTURE_TYPE_B){
3663             int a, b;
3664
3665             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3666             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3667             s->f_code = FFMAX(a, b);
3668
3669             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3670             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3671             s->b_code = FFMAX(a, b);
3672
3673             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3674             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3675             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3676             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3677             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3678                 int dir, j;
3679                 for(dir=0; dir<2; dir++){
3680                     for(i=0; i<2; i++){
3681                         for(j=0; j<2; j++){
3682                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3683                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3684                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3685                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3686                         }
3687                     }
3688                 }
3689             }
3690         }
3691     }
3692
3693     if (estimate_qp(s, 0) < 0)
3694         return -1;
3695
3696     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3697         s->pict_type == AV_PICTURE_TYPE_I &&
3698         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3699         s->qscale= 3; //reduce clipping problems
3700
3701     if (s->out_format == FMT_MJPEG) {
3702         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3703         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3704
3705         if (s->avctx->intra_matrix) {
3706             chroma_matrix =
3707             luma_matrix = s->avctx->intra_matrix;
3708         }
3709         if (s->avctx->chroma_intra_matrix)
3710             chroma_matrix = s->avctx->chroma_intra_matrix;
3711
3712         /* for mjpeg, we do include qscale in the matrix */
3713         for(i=1;i<64;i++){
3714             int j = s->idsp.idct_permutation[i];
3715
3716             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3717             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3718         }
3719         s->y_dc_scale_table=
3720         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3721         s->chroma_intra_matrix[0] =
3722         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3723         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3724                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3725         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3726                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3727         s->qscale= 8;
3728     }
3729     if(s->codec_id == AV_CODEC_ID_AMV){
3730         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3731         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3732         for(i=1;i<64;i++){
3733             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3734
3735             s->intra_matrix[j]        = sp5x_qscale_five_quant_table[0][i];
3736             s->chroma_intra_matrix[j] = sp5x_qscale_five_quant_table[1][i];
3737         }
3738         s->y_dc_scale_table= y;
3739         s->c_dc_scale_table= c;
3740         s->intra_matrix[0] = 13;
3741         s->chroma_intra_matrix[0] = 14;
3742         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3743                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3744         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3745                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3746         s->qscale= 8;
3747     }
3748
3749     if (s->out_format == FMT_SPEEDHQ) {
3750         s->y_dc_scale_table=
3751         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[3];
3752     }
3753
3754     //FIXME var duplication
3755     s->current_picture_ptr->f->key_frame =
3756     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3757     s->current_picture_ptr->f->pict_type =
3758     s->current_picture.f->pict_type = s->pict_type;
3759
3760     if (s->current_picture.f->key_frame)
3761         s->picture_in_gop_number=0;
3762
3763     s->mb_x = s->mb_y = 0;
3764     s->last_bits= put_bits_count(&s->pb);
3765     switch(s->out_format) {
3766 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
3767     case FMT_MJPEG:
3768         /* s->huffman == HUFFMAN_TABLE_OPTIMAL can only be true for MJPEG. */
3769         if (!CONFIG_MJPEG_ENCODER || s->huffman != HUFFMAN_TABLE_OPTIMAL)
3770             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3771                                            s->pred, s->intra_matrix, s->chroma_intra_matrix);
3772         break;
3773 #endif
3774     case FMT_SPEEDHQ:
3775         if (CONFIG_SPEEDHQ_ENCODER)
3776             ff_speedhq_encode_picture_header(s);
3777         break;
3778     case FMT_H261:
3779         if (CONFIG_H261_ENCODER)
3780             ff_h261_encode_picture_header(s, picture_number);
3781         break;
3782     case FMT_H263:
3783         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3784             ff_wmv2_encode_picture_header(s, picture_number);
3785         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3786             ff_msmpeg4_encode_picture_header(s, picture_number);
3787         else if (CONFIG_MPEG4_ENCODER && s->h263_pred) {
3788             ret = ff_mpeg4_encode_picture_header(s, picture_number);
3789             if (ret < 0)
3790                 return ret;
3791         } else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3792             ret = ff_rv10_encode_picture_header(s, picture_number);
3793             if (ret < 0)
3794                 return ret;
3795         }
3796         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3797             ff_rv20_encode_picture_header(s, picture_number);
3798         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3799             ff_flv_encode_picture_header(s, picture_number);
3800         else if (CONFIG_H263_ENCODER)
3801             ff_h263_encode_picture_header(s, picture_number);
3802         break;
3803     case FMT_MPEG1:
3804         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3805             ff_mpeg1_encode_picture_header(s, picture_number);
3806         break;
3807     default:
3808         av_assert0(0);
3809     }
3810     bits= put_bits_count(&s->pb);
3811     s->header_bits= bits - s->last_bits;
3812
3813     for(i=1; i<context_count; i++){
3814         update_duplicate_context_after_me(s->thread_context[i], s);
3815     }
3816     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3817     for(i=1; i<context_count; i++){
3818         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3819             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-BUF_BITS));
3820         merge_context_after_encode(s, s->thread_context[i]);
3821     }
3822     emms_c();
3823     return 0;
3824 }
3825
3826 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3827     const int intra= s->mb_intra;
3828     int i;
3829
3830     s->dct_count[intra]++;
3831
3832     for(i=0; i<64; i++){
3833         int level= block[i];
3834
3835         if(level){
3836             if(level>0){
3837                 s->dct_error_sum[intra][i] += level;
3838                 level -= s->dct_offset[intra][i];
3839                 if(level<0) level=0;
3840             }else{
3841                 s->dct_error_sum[intra][i] -= level;
3842                 level += s->dct_offset[intra][i];
3843                 if(level>0) level=0;
3844             }
3845             block[i]= level;
3846         }
3847     }
3848 }
3849
3850 static int dct_quantize_trellis_c(MpegEncContext *s,
3851                                   int16_t *block, int n,
3852                                   int qscale, int *overflow){
3853     const int *qmat;
3854     const uint16_t *matrix;
3855     const uint8_t *scantable;
3856     const uint8_t *perm_scantable;
3857     int max=0;
3858     unsigned int threshold1, threshold2;
3859     int bias=0;
3860     int run_tab[65];
3861     int level_tab[65];
3862     int score_tab[65];
3863     int survivor[65];
3864     int survivor_count;
3865     int last_run=0;
3866     int last_level=0;
3867     int last_score= 0;
3868     int last_i;
3869     int coeff[2][64];
3870     int coeff_count[64];
3871     int qmul, qadd, start_i, last_non_zero, i, dc;
3872     const int esc_length= s->ac_esc_length;
3873     uint8_t * length;
3874     uint8_t * last_length;
3875     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3876     int mpeg2_qscale;
3877
3878     s->fdsp.fdct(block);
3879
3880     if(s->dct_error_sum)
3881         s->denoise_dct(s, block);
3882     qmul= qscale*16;
3883     qadd= ((qscale-1)|1)*8;
3884
3885     if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
3886     else                 mpeg2_qscale = qscale << 1;
3887
3888     if (s->mb_intra) {
3889         int q;
3890         scantable= s->intra_scantable.scantable;
3891         perm_scantable= s->intra_scantable.permutated;
3892         if (!s->h263_aic) {
3893             if (n < 4)
3894                 q = s->y_dc_scale;
3895             else
3896                 q = s->c_dc_scale;
3897             q = q << 3;
3898         } else{
3899             /* For AIC we skip quant/dequant of INTRADC */
3900             q = 1 << 3;
3901             qadd=0;
3902         }
3903
3904         /* note: block[0] is assumed to be positive */
3905         block[0] = (block[0] + (q >> 1)) / q;
3906         start_i = 1;
3907         last_non_zero = 0;
3908         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3909         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3910         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3911             bias= 1<<(QMAT_SHIFT-1);
3912
3913         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3914             length     = s->intra_chroma_ac_vlc_length;
3915             last_length= s->intra_chroma_ac_vlc_last_length;
3916         } else {
3917             length     = s->intra_ac_vlc_length;
3918             last_length= s->intra_ac_vlc_last_length;
3919         }
3920     } else {
3921         scantable= s->inter_scantable.scantable;
3922         perm_scantable= s->inter_scantable.permutated;
3923         start_i = 0;
3924         last_non_zero = -1;
3925         qmat = s->q_inter_matrix[qscale];
3926         matrix = s->inter_matrix;
3927         length     = s->inter_ac_vlc_length;
3928         last_length= s->inter_ac_vlc_last_length;
3929     }
3930     last_i= start_i;
3931
3932     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3933     threshold2= (threshold1<<1);
3934
3935     for(i=63; i>=start_i; i--) {
3936         const int j = scantable[i];
3937         int level = block[j] * qmat[j];
3938
3939         if(((unsigned)(level+threshold1))>threshold2){
3940             last_non_zero = i;
3941             break;
3942         }
3943     }
3944
3945     for(i=start_i; i<=last_non_zero; i++) {
3946         const int j = scantable[i];
3947         int level = block[j] * qmat[j];
3948
3949 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3950 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3951         if(((unsigned)(level+threshold1))>threshold2){
3952             if(level>0){
3953                 level= (bias + level)>>QMAT_SHIFT;
3954                 coeff[0][i]= level;
3955                 coeff[1][i]= level-1;
3956 //                coeff[2][k]= level-2;
3957             }else{
3958                 level= (bias - level)>>QMAT_SHIFT;
3959                 coeff[0][i]= -level;
3960                 coeff[1][i]= -level+1;
3961 //                coeff[2][k]= -level+2;
3962             }
3963             coeff_count[i]= FFMIN(level, 2);
3964             av_assert2(coeff_count[i]);
3965             max |=level;
3966         }else{
3967             coeff[0][i]= (level>>31)|1;
3968             coeff_count[i]= 1;
3969         }
3970     }
3971
3972     *overflow= s->max_qcoeff < max; //overflow might have happened
3973
3974     if(last_non_zero < start_i){
3975         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3976         return last_non_zero;
3977     }
3978
3979     score_tab[start_i]= 0;
3980     survivor[0]= start_i;
3981     survivor_count= 1;
3982
3983     for(i=start_i; i<=last_non_zero; i++){
3984         int level_index, j, zero_distortion;
3985         int dct_coeff= FFABS(block[ scantable[i] ]);
3986         int best_score=256*256*256*120;
3987
3988         if (s->fdsp.fdct == ff_fdct_ifast)
3989             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3990         zero_distortion= dct_coeff*dct_coeff;
3991
3992         for(level_index=0; level_index < coeff_count[i]; level_index++){
3993             int distortion;
3994             int level= coeff[level_index][i];
3995             const int alevel= FFABS(level);
3996             int unquant_coeff;
3997
3998             av_assert2(level);
3999
4000             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4001                 unquant_coeff= alevel*qmul + qadd;
4002             } else if(s->out_format == FMT_MJPEG) {
4003                 j = s->idsp.idct_permutation[scantable[i]];
4004                 unquant_coeff = alevel * matrix[j] * 8;
4005             }else{ // MPEG-1
4006                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
4007                 if(s->mb_intra){
4008                         unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
4009                         unquant_coeff =   (unquant_coeff - 1) | 1;
4010                 }else{
4011                         unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
4012                         unquant_coeff =   (unquant_coeff - 1) | 1;
4013                 }
4014                 unquant_coeff<<= 3;
4015             }
4016
4017             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
4018             level+=64;
4019             if((level&(~127)) == 0){
4020                 for(j=survivor_count-1; j>=0; j--){
4021                     int run= i - survivor[j];
4022                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4023                     score += score_tab[i-run];
4024
4025                     if(score < best_score){
4026                         best_score= score;
4027                         run_tab[i+1]= run;
4028                         level_tab[i+1]= level-64;
4029                     }
4030                 }
4031
4032                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4033                     for(j=survivor_count-1; j>=0; j--){
4034                         int run= i - survivor[j];
4035                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4036                         score += score_tab[i-run];
4037                         if(score < last_score){
4038                             last_score= score;
4039                             last_run= run;
4040                             last_level= level-64;
4041                             last_i= i+1;
4042                         }
4043                     }
4044                 }
4045             }else{
4046                 distortion += esc_length*lambda;
4047                 for(j=survivor_count-1; j>=0; j--){
4048                     int run= i - survivor[j];
4049                     int score= distortion + score_tab[i-run];
4050
4051                     if(score < best_score){
4052                         best_score= score;
4053                         run_tab[i+1]= run;
4054                         level_tab[i+1]= level-64;
4055                     }
4056                 }
4057
4058                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4059                   for(j=survivor_count-1; j>=0; j--){
4060                         int run= i - survivor[j];
4061                         int score= distortion + score_tab[i-run];
4062                         if(score < last_score){
4063                             last_score= score;
4064                             last_run= run;
4065                             last_level= level-64;
4066                             last_i= i+1;
4067                         }
4068                     }
4069                 }
4070             }
4071         }
4072
4073         score_tab[i+1]= best_score;
4074
4075         // Note: there is a vlc code in MPEG-4 which is 1 bit shorter then another one with a shorter run and the same level
4076         if(last_non_zero <= 27){
4077             for(; survivor_count; survivor_count--){
4078                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4079                     break;
4080             }
4081         }else{
4082             for(; survivor_count; survivor_count--){
4083                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4084                     break;
4085             }
4086         }
4087
4088         survivor[ survivor_count++ ]= i+1;
4089     }
4090
4091     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4092         last_score= 256*256*256*120;
4093         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4094             int score= score_tab[i];
4095             if (i)
4096                 score += lambda * 2; // FIXME more exact?
4097
4098             if(score < last_score){
4099                 last_score= score;
4100                 last_i= i;
4101                 last_level= level_tab[i];
4102                 last_run= run_tab[i];
4103             }
4104         }
4105     }
4106
4107     s->coded_score[n] = last_score;
4108
4109     dc= FFABS(block[0]);
4110     last_non_zero= last_i - 1;
4111     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4112
4113     if(last_non_zero < start_i)
4114         return last_non_zero;
4115
4116     if(last_non_zero == 0 && start_i == 0){
4117         int best_level= 0;
4118         int best_score= dc * dc;
4119
4120         for(i=0; i<coeff_count[0]; i++){
4121             int level= coeff[i][0];
4122             int alevel= FFABS(level);
4123             int unquant_coeff, score, distortion;
4124
4125             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4126                     unquant_coeff= (alevel*qmul + qadd)>>3;
4127             } else{ // MPEG-1
4128                     unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
4129                     unquant_coeff =   (unquant_coeff - 1) | 1;
4130             }
4131             unquant_coeff = (unquant_coeff + 4) >> 3;
4132             unquant_coeff<<= 3 + 3;
4133
4134             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4135             level+=64;
4136             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4137             else                    score= distortion + esc_length*lambda;
4138
4139             if(score < best_score){
4140                 best_score= score;
4141                 best_level= level - 64;
4142             }
4143         }
4144         block[0]= best_level;
4145         s->coded_score[n] = best_score - dc*dc;
4146         if(best_level == 0) return -1;
4147         else                return last_non_zero;
4148     }
4149
4150     i= last_i;
4151     av_assert2(last_level);
4152
4153     block[ perm_scantable[last_non_zero] ]= last_level;
4154     i -= last_run + 1;
4155
4156     for(; i>start_i; i -= run_tab[i] + 1){
4157         block[ perm_scantable[i-1] ]= level_tab[i];
4158     }
4159
4160     return last_non_zero;
4161 }
4162
4163 static int16_t basis[64][64];
4164
4165 static void build_basis(uint8_t *perm){
4166     int i, j, x, y;
4167     emms_c();
4168     for(i=0; i<8; i++){
4169         for(j=0; j<8; j++){
4170             for(y=0; y<8; y++){
4171                 for(x=0; x<8; x++){
4172                     double s= 0.25*(1<<BASIS_SHIFT);
4173                     int index= 8*i + j;
4174                     int perm_index= perm[index];
4175                     if(i==0) s*= sqrt(0.5);
4176                     if(j==0) s*= sqrt(0.5);
4177                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4178                 }
4179             }
4180         }
4181     }
4182 }
4183
4184 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4185                         int16_t *block, int16_t *weight, int16_t *orig,
4186                         int n, int qscale){
4187     int16_t rem[64];
4188     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4189     const uint8_t *scantable;
4190     const uint8_t *perm_scantable;
4191 //    unsigned int threshold1, threshold2;
4192 //    int bias=0;
4193     int run_tab[65];
4194     int prev_run=0;
4195     int prev_level=0;
4196     int qmul, qadd, start_i, last_non_zero, i, dc;
4197     uint8_t * length;
4198     uint8_t * last_length;
4199     int lambda;
4200     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4201
4202     if(basis[0][0] == 0)
4203         build_basis(s->idsp.idct_permutation);
4204
4205     qmul= qscale*2;
4206     qadd= (qscale-1)|1;
4207     if (s->mb_intra) {
4208         scantable= s->intra_scantable.scantable;
4209         perm_scantable= s->intra_scantable.permutated;
4210         if (!s->h263_aic) {
4211             if (n < 4)
4212                 q = s->y_dc_scale;
4213             else
4214                 q = s->c_dc_scale;
4215         } else{
4216             /* For AIC we skip quant/dequant of INTRADC */
4217             q = 1;
4218             qadd=0;
4219         }
4220         q <<= RECON_SHIFT-3;
4221         /* note: block[0] is assumed to be positive */
4222         dc= block[0]*q;
4223 //        block[0] = (block[0] + (q >> 1)) / q;
4224         start_i = 1;
4225 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4226 //            bias= 1<<(QMAT_SHIFT-1);
4227         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4228             length     = s->intra_chroma_ac_vlc_length;
4229             last_length= s->intra_chroma_ac_vlc_last_length;
4230         } else {
4231             length     = s->intra_ac_vlc_length;
4232             last_length= s->intra_ac_vlc_last_length;
4233         }
4234     } else {
4235         scantable= s->inter_scantable.scantable;
4236         perm_scantable= s->inter_scantable.permutated;
4237         dc= 0;
4238         start_i = 0;
4239         length     = s->inter_ac_vlc_length;
4240         last_length= s->inter_ac_vlc_last_length;
4241     }
4242     last_non_zero = s->block_last_index[n];
4243
4244     dc += (1<<(RECON_SHIFT-1));
4245     for(i=0; i<64; i++){
4246         rem[i] = dc - (orig[i] << RECON_SHIFT); // FIXME use orig directly instead of copying to rem[]
4247     }
4248
4249     sum=0;
4250     for(i=0; i<64; i++){
4251         int one= 36;
4252         int qns=4;
4253         int w;
4254
4255         w= FFABS(weight[i]) + qns*one;
4256         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4257
4258         weight[i] = w;
4259 //        w=weight[i] = (63*qns + (w/2)) / w;
4260
4261         av_assert2(w>0);
4262         av_assert2(w<(1<<6));
4263         sum += w*w;
4264     }
4265     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4266
4267     run=0;
4268     rle_index=0;
4269     for(i=start_i; i<=last_non_zero; i++){
4270         int j= perm_scantable[i];
4271         const int level= block[j];
4272         int coeff;
4273
4274         if(level){
4275             if(level<0) coeff= qmul*level - qadd;
4276             else        coeff= qmul*level + qadd;
4277             run_tab[rle_index++]=run;
4278             run=0;
4279
4280             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4281         }else{
4282             run++;
4283         }
4284     }
4285
4286     for(;;){
4287         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4288         int best_coeff=0;
4289         int best_change=0;
4290         int run2, best_unquant_change=0, analyze_gradient;
4291         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4292
4293         if(analyze_gradient){
4294             for(i=0; i<64; i++){
4295                 int w= weight[i];
4296
4297                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4298             }
4299             s->fdsp.fdct(d1);
4300         }
4301
4302         if(start_i){
4303             const int level= block[0];
4304             int change, old_coeff;
4305
4306             av_assert2(s->mb_intra);
4307
4308             old_coeff= q*level;
4309
4310             for(change=-1; change<=1; change+=2){
4311                 int new_level= level + change;
4312                 int score, new_coeff;
4313
4314                 new_coeff= q*new_level;
4315                 if(new_coeff >= 2048 || new_coeff < 0)
4316                     continue;
4317
4318                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4319                                                   new_coeff - old_coeff);
4320                 if(score<best_score){
4321                     best_score= score;
4322                     best_coeff= 0;
4323                     best_change= change;
4324                     best_unquant_change= new_coeff - old_coeff;
4325                 }
4326             }
4327         }
4328
4329         run=0;
4330         rle_index=0;
4331         run2= run_tab[rle_index++];
4332         prev_level=0;
4333         prev_run=0;
4334
4335         for(i=start_i; i<64; i++){
4336             int j= perm_scantable[i];
4337             const int level= block[j];
4338             int change, old_coeff;
4339
4340             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4341                 break;
4342
4343             if(level){
4344                 if(level<0) old_coeff= qmul*level - qadd;
4345                 else        old_coeff= qmul*level + qadd;
4346                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4347             }else{
4348                 old_coeff=0;
4349                 run2--;
4350                 av_assert2(run2>=0 || i >= last_non_zero );
4351             }
4352
4353             for(change=-1; change<=1; change+=2){
4354                 int new_level= level + change;
4355                 int score, new_coeff, unquant_change;
4356
4357                 score=0;
4358                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4359                    continue;
4360
4361                 if(new_level){
4362                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4363                     else            new_coeff= qmul*new_level + qadd;
4364                     if(new_coeff >= 2048 || new_coeff <= -2048)
4365                         continue;
4366                     //FIXME check for overflow
4367
4368                     if(level){
4369                         if(level < 63 && level > -63){
4370                             if(i < last_non_zero)
4371                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4372                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4373                             else
4374                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4375                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4376                         }
4377                     }else{
4378                         av_assert2(FFABS(new_level)==1);
4379
4380                         if(analyze_gradient){
4381                             int g= d1[ scantable[i] ];
4382                             if(g && (g^new_level) >= 0)
4383                                 continue;
4384                         }
4385
4386                         if(i < last_non_zero){
4387                             int next_i= i + run2 + 1;
4388                             int next_level= block[ perm_scantable[next_i] ] + 64;
4389
4390                             if(next_level&(~127))
4391                                 next_level= 0;
4392
4393                             if(next_i < last_non_zero)
4394                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4395                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4396                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4397                             else
4398                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4399                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4400                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4401                         }else{
4402                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4403                             if(prev_level){
4404                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4405                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4406                             }
4407                         }
4408                     }
4409                 }else{
4410                     new_coeff=0;
4411                     av_assert2(FFABS(level)==1);
4412
4413                     if(i < last_non_zero){
4414                         int next_i= i + run2 + 1;
4415                         int next_level= block[ perm_scantable[next_i] ] + 64;
4416
4417                         if(next_level&(~127))
4418                             next_level= 0;
4419
4420                         if(next_i < last_non_zero)
4421                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4422                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4423                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4424                         else
4425                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4426                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4427                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4428                     }else{
4429                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4430                         if(prev_level){
4431                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4432                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4433                         }
4434                     }
4435                 }
4436
4437                 score *= lambda;
4438
4439                 unquant_change= new_coeff - old_coeff;
4440                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4441
4442                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4443                                                    unquant_change);
4444                 if(score<best_score){
4445                     best_score= score;
4446                     best_coeff= i;
4447                     best_change= change;
4448                     best_unquant_change= unquant_change;
4449                 }
4450             }
4451             if(level){
4452                 prev_level= level + 64;
4453                 if(prev_level&(~127))
4454                     prev_level= 0;
4455                 prev_run= run;
4456                 run=0;
4457             }else{
4458                 run++;
4459             }
4460         }
4461
4462         if(best_change){
4463             int j= perm_scantable[ best_coeff ];
4464
4465             block[j] += best_change;
4466
4467             if(best_coeff > last_non_zero){
4468                 last_non_zero= best_coeff;
4469                 av_assert2(block[j]);
4470             }else{
4471                 for(; last_non_zero>=start_i; last_non_zero--){
4472                     if(block[perm_scantable[last_non_zero]])
4473                         break;
4474                 }
4475             }
4476
4477             run=0;
4478             rle_index=0;
4479             for(i=start_i; i<=last_non_zero; i++){
4480                 int j= perm_scantable[i];
4481                 const int level= block[j];
4482
4483                  if(level){
4484                      run_tab[rle_index++]=run;
4485                      run=0;
4486                  }else{
4487                      run++;
4488                  }
4489             }
4490
4491             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4492         }else{
4493             break;
4494         }
4495     }
4496
4497     return last_non_zero;
4498 }
4499
4500 /**
4501  * Permute an 8x8 block according to permutation.
4502  * @param block the block which will be permuted according to
4503  *              the given permutation vector
4504  * @param permutation the permutation vector
4505  * @param last the last non zero coefficient in scantable order, used to
4506  *             speed the permutation up
4507  * @param scantable the used scantable, this is only used to speed the
4508  *                  permutation up, the block is not (inverse) permutated
4509  *                  to scantable order!
4510  */
4511 void ff_block_permute(int16_t *block, uint8_t *permutation,
4512                       const uint8_t *scantable, int last)
4513 {
4514     int i;
4515     int16_t temp[64];
4516
4517     if (last <= 0)
4518         return;
4519     //FIXME it is ok but not clean and might fail for some permutations
4520     // if (permutation[1] == 1)
4521     // return;
4522
4523     for (i = 0; i <= last; i++) {
4524         const int j = scantable[i];
4525         temp[j] = block[j];
4526         block[j] = 0;
4527     }
4528
4529     for (i = 0; i <= last; i++) {
4530         const int j = scantable[i];
4531         const int perm_j = permutation[j];
4532         block[perm_j] = temp[j];
4533     }
4534 }
4535
4536 int ff_dct_quantize_c(MpegEncContext *s,
4537                         int16_t *block, int n,
4538                         int qscale, int *overflow)
4539 {
4540     int i, j, level, last_non_zero, q, start_i;
4541     const int *qmat;
4542     const uint8_t *scantable;
4543     int bias;
4544     int max=0;
4545     unsigned int threshold1, threshold2;
4546
4547     s->fdsp.fdct(block);
4548
4549     if(s->dct_error_sum)
4550         s->denoise_dct(s, block);
4551
4552     if (s->mb_intra) {
4553         scantable= s->intra_scantable.scantable;
4554         if (!s->h263_aic) {
4555             if (n < 4)
4556                 q = s->y_dc_scale;
4557             else
4558                 q = s->c_dc_scale;
4559             q = q << 3;
4560         } else
4561             /* For AIC we skip quant/dequant of INTRADC */
4562             q = 1 << 3;
4563
4564         /* note: block[0] is assumed to be positive */
4565         block[0] = (block[0] + (q >> 1)) / q;
4566         start_i = 1;
4567         last_non_zero = 0;
4568         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4569         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4570     } else {
4571         scantable= s->inter_scantable.scantable;
4572         start_i = 0;
4573         last_non_zero = -1;
4574         qmat = s->q_inter_matrix[qscale];
4575         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4576     }
4577     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4578     threshold2= (threshold1<<1);
4579     for(i=63;i>=start_i;i--) {
4580         j = scantable[i];
4581         level = block[j] * qmat[j];
4582
4583         if(((unsigned)(level+threshold1))>threshold2){
4584             last_non_zero = i;
4585             break;
4586         }else{
4587             block[j]=0;
4588         }
4589     }
4590     for(i=start_i; i<=last_non_zero; i++) {
4591         j = scantable[i];
4592         level = block[j] * qmat[j];
4593
4594 //        if(   bias+level >= (1<<QMAT_SHIFT)
4595 //           || bias-level >= (1<<QMAT_SHIFT)){
4596         if(((unsigned)(level+threshold1))>threshold2){
4597             if(level>0){
4598                 level= (bias + level)>>QMAT_SHIFT;
4599                 block[j]= level;
4600             }else{
4601                 level= (bias - level)>>QMAT_SHIFT;
4602                 block[j]= -level;
4603             }
4604             max |=level;
4605         }else{
4606             block[j]=0;
4607         }
4608     }
4609     *overflow= s->max_qcoeff < max; //overflow might have happened
4610
4611     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4612     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4613         ff_block_permute(block, s->idsp.idct_permutation,
4614                       scantable, last_non_zero);
4615
4616     return last_non_zero;
4617 }
4618
4619 #define OFFSET(x) offsetof(MpegEncContext, x)
4620 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4621 static const AVOption h263_options[] = {
4622     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4623     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4624     FF_MPV_COMMON_OPTS
4625 #if FF_API_MPEGVIDEO_OPTS
4626     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
4627     FF_MPV_DEPRECATED_A53_CC_OPT
4628     FF_MPV_DEPRECATED_MATRIX_OPT
4629     FF_MPV_DEPRECATED_BFRAME_OPTS
4630 #endif
4631     { NULL },
4632 };
4633
4634 static const AVClass h263_class = {
4635     .class_name = "H.263 encoder",
4636     .item_name  = av_default_item_name,
4637     .option     = h263_options,
4638     .version    = LIBAVUTIL_VERSION_INT,
4639 };
4640
4641 AVCodec ff_h263_encoder = {
4642     .name           = "h263",
4643     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4644     .type           = AVMEDIA_TYPE_VIDEO,
4645     .id             = AV_CODEC_ID_H263,
4646     .priv_data_size = sizeof(MpegEncContext),
4647     .init           = ff_mpv_encode_init,
4648     .encode2        = ff_mpv_encode_picture,
4649     .close          = ff_mpv_encode_end,
4650     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4651     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4652     .priv_class     = &h263_class,
4653 };
4654
4655 static const AVOption h263p_options[] = {
4656     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus),       AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4657     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4658     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4659     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE},
4660     FF_MPV_COMMON_OPTS
4661 #if FF_API_MPEGVIDEO_OPTS
4662     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
4663     FF_MPV_DEPRECATED_A53_CC_OPT
4664     FF_MPV_DEPRECATED_MATRIX_OPT
4665     FF_MPV_DEPRECATED_BFRAME_OPTS
4666 #endif
4667     { NULL },
4668 };
4669 static const AVClass h263p_class = {
4670     .class_name = "H.263p encoder",
4671     .item_name  = av_default_item_name,
4672     .option     = h263p_options,
4673     .version    = LIBAVUTIL_VERSION_INT,
4674 };
4675
4676 AVCodec ff_h263p_encoder = {
4677     .name           = "h263p",
4678     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4679     .type           = AVMEDIA_TYPE_VIDEO,
4680     .id             = AV_CODEC_ID_H263P,
4681     .priv_data_size = sizeof(MpegEncContext),
4682     .init           = ff_mpv_encode_init,
4683     .encode2        = ff_mpv_encode_picture,
4684     .close          = ff_mpv_encode_end,
4685     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4686     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4687     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4688     .priv_class     = &h263p_class,
4689 };
4690
4691 static const AVClass msmpeg4v2_class = {
4692     .class_name = "msmpeg4v2 encoder",
4693     .item_name  = av_default_item_name,
4694     .option     = ff_mpv_generic_options,
4695     .version    = LIBAVUTIL_VERSION_INT,
4696 };
4697
4698 AVCodec ff_msmpeg4v2_encoder = {
4699     .name           = "msmpeg4v2",
4700     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4701     .type           = AVMEDIA_TYPE_VIDEO,
4702     .id             = AV_CODEC_ID_MSMPEG4V2,
4703     .priv_data_size = sizeof(MpegEncContext),
4704     .init           = ff_mpv_encode_init,
4705     .encode2        = ff_mpv_encode_picture,
4706     .close          = ff_mpv_encode_end,
4707     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4708     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4709     .priv_class     = &msmpeg4v2_class,
4710 };
4711
4712 static const AVClass msmpeg4v3_class = {
4713     .class_name = "msmpeg4v3 encoder",
4714     .item_name  = av_default_item_name,
4715     .option     = ff_mpv_generic_options,
4716     .version    = LIBAVUTIL_VERSION_INT,
4717 };
4718
4719 AVCodec ff_msmpeg4v3_encoder = {
4720     .name           = "msmpeg4",
4721     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4722     .type           = AVMEDIA_TYPE_VIDEO,
4723     .id             = AV_CODEC_ID_MSMPEG4V3,
4724     .priv_data_size = sizeof(MpegEncContext),
4725     .init           = ff_mpv_encode_init,
4726     .encode2        = ff_mpv_encode_picture,
4727     .close          = ff_mpv_encode_end,
4728     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4729     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4730     .priv_class     = &msmpeg4v3_class,
4731 };
4732
4733 static const AVClass wmv1_class = {
4734     .class_name = "wmv1 encoder",
4735     .item_name  = av_default_item_name,
4736     .option     = ff_mpv_generic_options,
4737     .version    = LIBAVUTIL_VERSION_INT,
4738 };
4739
4740 AVCodec ff_wmv1_encoder = {
4741     .name           = "wmv1",
4742     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4743     .type           = AVMEDIA_TYPE_VIDEO,
4744     .id             = AV_CODEC_ID_WMV1,
4745     .priv_data_size = sizeof(MpegEncContext),
4746     .init           = ff_mpv_encode_init,
4747     .encode2        = ff_mpv_encode_picture,
4748     .close          = ff_mpv_encode_end,
4749     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4750     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4751     .priv_class     = &wmv1_class,
4752 };