]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
c4fd6f293a4073cf8b4bcece1718657fb46c761e
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /*
26  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
27  */
28
29 /**
30  * @file
31  * The simplest mpeg encoder (well, it was the simplest!).
32  */
33
34 #include <stdint.h>
35
36 #include "libavutil/internal.h"
37 #include "libavutil/intmath.h"
38 #include "libavutil/mathematics.h"
39 #include "libavutil/mem_internal.h"
40 #include "libavutil/pixdesc.h"
41 #include "libavutil/opt.h"
42 #include "libavutil/thread.h"
43 #include "avcodec.h"
44 #include "dct.h"
45 #include "idctdsp.h"
46 #include "mpeg12.h"
47 #include "mpegvideo.h"
48 #include "mpegvideodata.h"
49 #include "h261.h"
50 #include "h263.h"
51 #include "h263data.h"
52 #include "mjpegenc_common.h"
53 #include "mathops.h"
54 #include "mpegutils.h"
55 #include "mjpegenc.h"
56 #include "speedhqenc.h"
57 #include "msmpeg4.h"
58 #include "pixblockdsp.h"
59 #include "qpeldsp.h"
60 #include "faandct.h"
61 #include "thread.h"
62 #include "aandcttab.h"
63 #include "flv.h"
64 #include "mpeg4video.h"
65 #include "internal.h"
66 #include "bytestream.h"
67 #include "wmv2.h"
68 #include "rv10.h"
69 #include "packet_internal.h"
70 #include <limits.h>
71 #include "sp5x.h"
72
73 #define QUANT_BIAS_SHIFT 8
74
75 #define QMAT_SHIFT_MMX 16
76 #define QMAT_SHIFT 21
77
78 static int encode_picture(MpegEncContext *s, int picture_number);
79 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
80 static int sse_mb(MpegEncContext *s);
81 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
82 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
83
84 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_DMV * 2 + 1];
85 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
86
87 const AVOption ff_mpv_generic_options[] = {
88     FF_MPV_COMMON_OPTS
89 #if FF_API_MPEGVIDEO_OPTS
90     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
91     FF_MPV_DEPRECATED_A53_CC_OPT
92     FF_MPV_DEPRECATED_MATRIX_OPT
93     FF_MPV_DEPRECATED_BFRAME_OPTS
94 #endif
95     { NULL },
96 };
97
98 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
99                        uint16_t (*qmat16)[2][64],
100                        const uint16_t *quant_matrix,
101                        int bias, int qmin, int qmax, int intra)
102 {
103     FDCTDSPContext *fdsp = &s->fdsp;
104     int qscale;
105     int shift = 0;
106
107     for (qscale = qmin; qscale <= qmax; qscale++) {
108         int i;
109         int qscale2;
110
111         if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
112         else                 qscale2 = qscale << 1;
113
114         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
115 #if CONFIG_FAANDCT
116             fdsp->fdct == ff_faandct            ||
117 #endif /* CONFIG_FAANDCT */
118             fdsp->fdct == ff_jpeg_fdct_islow_10) {
119             for (i = 0; i < 64; i++) {
120                 const int j = s->idsp.idct_permutation[i];
121                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
122                 /* 16 <= qscale * quant_matrix[i] <= 7905
123                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
124                  *             19952 <=              x  <= 249205026
125                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
126                  *           3444240 >= (1 << 36) / (x) >= 275 */
127
128                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
129             }
130         } else if (fdsp->fdct == ff_fdct_ifast) {
131             for (i = 0; i < 64; i++) {
132                 const int j = s->idsp.idct_permutation[i];
133                 int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
134                 /* 16 <= qscale * quant_matrix[i] <= 7905
135                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
136                  *             19952 <=              x  <= 249205026
137                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
138                  *           3444240 >= (1 << 36) / (x) >= 275 */
139
140                 qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
141             }
142         } else {
143             for (i = 0; i < 64; i++) {
144                 const int j = s->idsp.idct_permutation[i];
145                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
146                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
147                  * Assume x = qscale * quant_matrix[i]
148                  * So             16 <=              x  <= 7905
149                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
150                  * so          32768 >= (1 << 19) / (x) >= 67 */
151                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
152                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
153                 //                    (qscale * quant_matrix[i]);
154                 qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
155
156                 if (qmat16[qscale][0][i] == 0 ||
157                     qmat16[qscale][0][i] == 128 * 256)
158                     qmat16[qscale][0][i] = 128 * 256 - 1;
159                 qmat16[qscale][1][i] =
160                     ROUNDED_DIV(bias * (1<<(16 - QUANT_BIAS_SHIFT)),
161                                 qmat16[qscale][0][i]);
162             }
163         }
164
165         for (i = intra; i < 64; i++) {
166             int64_t max = 8191;
167             if (fdsp->fdct == ff_fdct_ifast) {
168                 max = (8191LL * ff_aanscales[i]) >> 14;
169             }
170             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
171                 shift++;
172             }
173         }
174     }
175     if (shift) {
176         av_log(s->avctx, AV_LOG_INFO,
177                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
178                QMAT_SHIFT - shift);
179     }
180 }
181
182 static inline void update_qscale(MpegEncContext *s)
183 {
184     if (s->q_scale_type == 1 && 0) {
185         int i;
186         int bestdiff=INT_MAX;
187         int best = 1;
188
189         for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
190             int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
191             if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
192                 (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
193                 continue;
194             if (diff < bestdiff) {
195                 bestdiff = diff;
196                 best = i;
197             }
198         }
199         s->qscale = best;
200     } else {
201         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
202                     (FF_LAMBDA_SHIFT + 7);
203         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
204     }
205
206     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
207                  FF_LAMBDA_SHIFT;
208 }
209
210 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
211 {
212     int i;
213
214     if (matrix) {
215         put_bits(pb, 1, 1);
216         for (i = 0; i < 64; i++) {
217             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
218         }
219     } else
220         put_bits(pb, 1, 0);
221 }
222
223 /**
224  * init s->current_picture.qscale_table from s->lambda_table
225  */
226 void ff_init_qscale_tab(MpegEncContext *s)
227 {
228     int8_t * const qscale_table = s->current_picture.qscale_table;
229     int i;
230
231     for (i = 0; i < s->mb_num; i++) {
232         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
233         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
234         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
235                                                   s->avctx->qmax);
236     }
237 }
238
239 static void update_duplicate_context_after_me(MpegEncContext *dst,
240                                               MpegEncContext *src)
241 {
242 #define COPY(a) dst->a= src->a
243     COPY(pict_type);
244     COPY(current_picture);
245     COPY(f_code);
246     COPY(b_code);
247     COPY(qscale);
248     COPY(lambda);
249     COPY(lambda2);
250     COPY(picture_in_gop_number);
251     COPY(gop_picture_number);
252     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
253     COPY(progressive_frame);    // FIXME don't set in encode_header
254     COPY(partitioned_frame);    // FIXME don't set in encode_header
255 #undef COPY
256 }
257
258 static void mpv_encode_init_static(void)
259 {
260    for (int i = -16; i < 16; i++)
261         default_fcode_tab[i + MAX_MV] = 1;
262 }
263
264 /**
265  * Set the given MpegEncContext to defaults for encoding.
266  * the changed fields will not depend upon the prior state of the MpegEncContext.
267  */
268 static void mpv_encode_defaults(MpegEncContext *s)
269 {
270     static AVOnce init_static_once = AV_ONCE_INIT;
271
272     ff_mpv_common_defaults(s);
273
274     ff_thread_once(&init_static_once, mpv_encode_init_static);
275
276     s->me.mv_penalty = default_mv_penalty;
277     s->fcode_tab     = default_fcode_tab;
278
279     s->input_picture_number  = 0;
280     s->picture_in_gop_number = 0;
281 }
282
283 av_cold int ff_dct_encode_init(MpegEncContext *s)
284 {
285     if (ARCH_X86)
286         ff_dct_encode_init_x86(s);
287
288     if (CONFIG_H263_ENCODER)
289         ff_h263dsp_init(&s->h263dsp);
290     if (!s->dct_quantize)
291         s->dct_quantize = ff_dct_quantize_c;
292     if (!s->denoise_dct)
293         s->denoise_dct  = denoise_dct_c;
294     s->fast_dct_quantize = s->dct_quantize;
295     if (s->avctx->trellis)
296         s->dct_quantize  = dct_quantize_trellis_c;
297
298     return 0;
299 }
300
301 /* init video encoder */
302 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
303 {
304     MpegEncContext *s = avctx->priv_data;
305     AVCPBProperties *cpb_props;
306     int i, ret;
307
308     mpv_encode_defaults(s);
309
310     switch (avctx->pix_fmt) {
311     case AV_PIX_FMT_YUVJ444P:
312     case AV_PIX_FMT_YUV444P:
313         s->chroma_format = CHROMA_444;
314         break;
315     case AV_PIX_FMT_YUVJ422P:
316     case AV_PIX_FMT_YUV422P:
317         s->chroma_format = CHROMA_422;
318         break;
319     case AV_PIX_FMT_YUVJ420P:
320     case AV_PIX_FMT_YUV420P:
321     default:
322         s->chroma_format = CHROMA_420;
323         break;
324     }
325
326     avctx->bits_per_raw_sample = av_clip(avctx->bits_per_raw_sample, 0, 8);
327
328     s->bit_rate = avctx->bit_rate;
329     s->width    = avctx->width;
330     s->height   = avctx->height;
331     if (avctx->gop_size > 600 &&
332         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
333         av_log(avctx, AV_LOG_WARNING,
334                "keyframe interval too large!, reducing it from %d to %d\n",
335                avctx->gop_size, 600);
336         avctx->gop_size = 600;
337     }
338     s->gop_size     = avctx->gop_size;
339     s->avctx        = avctx;
340     if (avctx->max_b_frames > MAX_B_FRAMES) {
341         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
342                "is %d.\n", MAX_B_FRAMES);
343         avctx->max_b_frames = MAX_B_FRAMES;
344     }
345     s->max_b_frames = avctx->max_b_frames;
346     s->codec_id     = avctx->codec->id;
347     s->strict_std_compliance = avctx->strict_std_compliance;
348     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
349     s->rtp_mode           = !!s->rtp_payload_size;
350     s->intra_dc_precision = avctx->intra_dc_precision;
351
352     // workaround some differences between how applications specify dc precision
353     if (s->intra_dc_precision < 0) {
354         s->intra_dc_precision += 8;
355     } else if (s->intra_dc_precision >= 8)
356         s->intra_dc_precision -= 8;
357
358     if (s->intra_dc_precision < 0) {
359         av_log(avctx, AV_LOG_ERROR,
360                 "intra dc precision must be positive, note some applications use"
361                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
362         return AVERROR(EINVAL);
363     }
364
365     if (avctx->codec_id == AV_CODEC_ID_AMV || (avctx->active_thread_type & FF_THREAD_SLICE))
366         s->huffman = 0;
367
368     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
369         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
370         return AVERROR(EINVAL);
371     }
372     s->user_specified_pts = AV_NOPTS_VALUE;
373
374     if (s->gop_size <= 1) {
375         s->intra_only = 1;
376         s->gop_size   = 12;
377     } else {
378         s->intra_only = 0;
379     }
380
381     /* Fixed QSCALE */
382     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
383
384     s->adaptive_quant = (avctx->lumi_masking ||
385                          avctx->dark_masking ||
386                          avctx->temporal_cplx_masking ||
387                          avctx->spatial_cplx_masking  ||
388                          avctx->p_masking      ||
389                          s->border_masking ||
390                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
391                         !s->fixed_qscale;
392
393     s->loop_filter = !!(avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
394
395     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
396         switch(avctx->codec_id) {
397         case AV_CODEC_ID_MPEG1VIDEO:
398         case AV_CODEC_ID_MPEG2VIDEO:
399             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
400             break;
401         case AV_CODEC_ID_MPEG4:
402         case AV_CODEC_ID_MSMPEG4V1:
403         case AV_CODEC_ID_MSMPEG4V2:
404         case AV_CODEC_ID_MSMPEG4V3:
405             if       (avctx->rc_max_rate >= 15000000) {
406                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
407             } else if(avctx->rc_max_rate >=  2000000) {
408                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
409             } else if(avctx->rc_max_rate >=   384000) {
410                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
411             } else
412                 avctx->rc_buffer_size = 40;
413             avctx->rc_buffer_size *= 16384;
414             break;
415         }
416         if (avctx->rc_buffer_size) {
417             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
418         }
419     }
420
421     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
422         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
423         return AVERROR(EINVAL);
424     }
425
426     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
427         av_log(avctx, AV_LOG_INFO,
428                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
429     }
430
431     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
432         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
433         return AVERROR(EINVAL);
434     }
435
436     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
437         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
438         return AVERROR(EINVAL);
439     }
440
441     if (avctx->rc_max_rate &&
442         avctx->rc_max_rate == avctx->bit_rate &&
443         avctx->rc_max_rate != avctx->rc_min_rate) {
444         av_log(avctx, AV_LOG_INFO,
445                "impossible bitrate constraints, this will fail\n");
446     }
447
448     if (avctx->rc_buffer_size &&
449         avctx->bit_rate * (int64_t)avctx->time_base.num >
450             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
451         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
452         return AVERROR(EINVAL);
453     }
454
455     if (!s->fixed_qscale &&
456         avctx->bit_rate * av_q2d(avctx->time_base) >
457             avctx->bit_rate_tolerance) {
458         av_log(avctx, AV_LOG_WARNING,
459                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
460         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
461     }
462
463     if (avctx->rc_max_rate &&
464         avctx->rc_min_rate == avctx->rc_max_rate &&
465         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
466          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
467         90000LL * (avctx->rc_buffer_size - 1) >
468             avctx->rc_max_rate * 0xFFFFLL) {
469         av_log(avctx, AV_LOG_INFO,
470                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
471                "specified vbv buffer is too large for the given bitrate!\n");
472     }
473
474     if ((avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
475         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
476         s->codec_id != AV_CODEC_ID_FLV1) {
477         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
478         return AVERROR(EINVAL);
479     }
480
481     if (s->obmc && avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
482         av_log(avctx, AV_LOG_ERROR,
483                "OBMC is only supported with simple mb decision\n");
484         return AVERROR(EINVAL);
485     }
486
487     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
488         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
489         return AVERROR(EINVAL);
490     }
491
492     if (s->max_b_frames                    &&
493         s->codec_id != AV_CODEC_ID_MPEG4      &&
494         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
495         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
496         av_log(avctx, AV_LOG_ERROR, "B-frames not supported by codec\n");
497         return AVERROR(EINVAL);
498     }
499     if (s->max_b_frames < 0) {
500         av_log(avctx, AV_LOG_ERROR,
501                "max b frames must be 0 or positive for mpegvideo based encoders\n");
502         return AVERROR(EINVAL);
503     }
504
505     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
506          s->codec_id == AV_CODEC_ID_H263  ||
507          s->codec_id == AV_CODEC_ID_H263P) &&
508         (avctx->sample_aspect_ratio.num > 255 ||
509          avctx->sample_aspect_ratio.den > 255)) {
510         av_log(avctx, AV_LOG_WARNING,
511                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
512                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
513         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
514                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
515     }
516
517     if ((s->codec_id == AV_CODEC_ID_H263  ||
518          s->codec_id == AV_CODEC_ID_H263P) &&
519         (avctx->width  > 2048 ||
520          avctx->height > 1152 )) {
521         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
522         return AVERROR(EINVAL);
523     }
524     if ((s->codec_id == AV_CODEC_ID_H263  ||
525          s->codec_id == AV_CODEC_ID_H263P ||
526          s->codec_id == AV_CODEC_ID_RV20) &&
527         ((avctx->width &3) ||
528          (avctx->height&3) )) {
529         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
530         return AVERROR(EINVAL);
531     }
532
533     if (s->codec_id == AV_CODEC_ID_RV10 &&
534         (avctx->width &15 ||
535          avctx->height&15 )) {
536         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
537         return AVERROR(EINVAL);
538     }
539
540     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
541          s->codec_id == AV_CODEC_ID_WMV2) &&
542          avctx->width & 1) {
543         av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
544         return AVERROR(EINVAL);
545     }
546
547     if ((avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
548         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
549         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
550         return AVERROR(EINVAL);
551     }
552
553     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
554         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
555         return AVERROR(EINVAL);
556     }
557
558     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
559         avctx->mb_decision != FF_MB_DECISION_RD) {
560         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
561         return AVERROR(EINVAL);
562     }
563
564     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
565             (s->codec_id == AV_CODEC_ID_AMV ||
566              s->codec_id == AV_CODEC_ID_MJPEG)) {
567         // Used to produce garbage with MJPEG.
568         av_log(avctx, AV_LOG_ERROR,
569                "QP RD is no longer compatible with MJPEG or AMV\n");
570         return AVERROR(EINVAL);
571     }
572
573     if (s->scenechange_threshold < 1000000000 &&
574         (avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
575         av_log(avctx, AV_LOG_ERROR,
576                "closed gop with scene change detection are not supported yet, "
577                "set threshold to 1000000000\n");
578         return AVERROR_PATCHWELCOME;
579     }
580
581     if (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
582         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
583             s->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
584             av_log(avctx, AV_LOG_ERROR,
585                    "low delay forcing is only available for mpeg2, "
586                    "set strict_std_compliance to 'unofficial' or lower in order to allow it\n");
587             return AVERROR(EINVAL);
588         }
589         if (s->max_b_frames != 0) {
590             av_log(avctx, AV_LOG_ERROR,
591                    "B-frames cannot be used with low delay\n");
592             return AVERROR(EINVAL);
593         }
594     }
595
596     if (s->q_scale_type == 1) {
597         if (avctx->qmax > 28) {
598             av_log(avctx, AV_LOG_ERROR,
599                    "non linear quant only supports qmax <= 28 currently\n");
600             return AVERROR_PATCHWELCOME;
601         }
602     }
603
604     if (avctx->slices > 1 &&
605         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
606         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
607         return AVERROR(EINVAL);
608     }
609
610     if (avctx->thread_count > 1         &&
611         s->codec_id != AV_CODEC_ID_MPEG4      &&
612         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
613         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
614         s->codec_id != AV_CODEC_ID_MJPEG      &&
615         (s->codec_id != AV_CODEC_ID_H263P)) {
616         av_log(avctx, AV_LOG_ERROR,
617                "multi threaded encoding not supported by codec\n");
618         return AVERROR_PATCHWELCOME;
619     }
620
621     if (avctx->thread_count < 1) {
622         av_log(avctx, AV_LOG_ERROR,
623                "automatic thread number detection not supported by codec, "
624                "patch welcome\n");
625         return AVERROR_PATCHWELCOME;
626     }
627
628     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
629         av_log(avctx, AV_LOG_INFO,
630                "notice: b_frame_strategy only affects the first pass\n");
631         s->b_frame_strategy = 0;
632     }
633
634     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
635     if (i > 1) {
636         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
637         avctx->time_base.den /= i;
638         avctx->time_base.num /= i;
639         //return -1;
640     }
641
642     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id == AV_CODEC_ID_AMV || s->codec_id == AV_CODEC_ID_SPEEDHQ) {
643         // (a + x * 3 / 8) / x
644         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
645         s->inter_quant_bias = 0;
646     } else {
647         s->intra_quant_bias = 0;
648         // (a - x / 4) / x
649         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
650     }
651
652     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
653         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
654         return AVERROR(EINVAL);
655     }
656
657     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
658
659     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
660         avctx->time_base.den > (1 << 16) - 1) {
661         av_log(avctx, AV_LOG_ERROR,
662                "timebase %d/%d not supported by MPEG 4 standard, "
663                "the maximum admitted value for the timebase denominator "
664                "is %d\n", avctx->time_base.num, avctx->time_base.den,
665                (1 << 16) - 1);
666         return AVERROR(EINVAL);
667     }
668     s->time_increment_bits = av_log2(avctx->time_base.den - 1) + 1;
669
670     switch (avctx->codec->id) {
671     case AV_CODEC_ID_MPEG1VIDEO:
672         s->out_format = FMT_MPEG1;
673         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
674         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
675         break;
676     case AV_CODEC_ID_MPEG2VIDEO:
677         s->out_format = FMT_MPEG1;
678         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
679         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
680         s->rtp_mode   = 1;
681         break;
682 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
683     case AV_CODEC_ID_MJPEG:
684     case AV_CODEC_ID_AMV:
685         s->out_format = FMT_MJPEG;
686         s->intra_only = 1; /* force intra only for jpeg */
687         if ((ret = ff_mjpeg_encode_init(s)) < 0)
688             return ret;
689         avctx->delay = 0;
690         s->low_delay = 1;
691         break;
692 #endif
693     case AV_CODEC_ID_SPEEDHQ:
694         s->out_format = FMT_SPEEDHQ;
695         s->intra_only = 1; /* force intra only for SHQ */
696         if (!CONFIG_SPEEDHQ_ENCODER)
697             return AVERROR_ENCODER_NOT_FOUND;
698         if ((ret = ff_speedhq_encode_init(s)) < 0)
699             return ret;
700         avctx->delay = 0;
701         s->low_delay = 1;
702         break;
703     case AV_CODEC_ID_H261:
704         if (!CONFIG_H261_ENCODER)
705             return AVERROR_ENCODER_NOT_FOUND;
706         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
707             av_log(avctx, AV_LOG_ERROR,
708                    "The specified picture size of %dx%d is not valid for the "
709                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
710                     s->width, s->height);
711             return AVERROR(EINVAL);
712         }
713         s->out_format = FMT_H261;
714         avctx->delay  = 0;
715         s->low_delay  = 1;
716         s->rtp_mode   = 0; /* Sliced encoding not supported */
717         break;
718     case AV_CODEC_ID_H263:
719         if (!CONFIG_H263_ENCODER)
720             return AVERROR_ENCODER_NOT_FOUND;
721         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
722                              s->width, s->height) == 8) {
723             av_log(avctx, AV_LOG_ERROR,
724                    "The specified picture size of %dx%d is not valid for "
725                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
726                    "352x288, 704x576, and 1408x1152. "
727                    "Try H.263+.\n", s->width, s->height);
728             return AVERROR(EINVAL);
729         }
730         s->out_format = FMT_H263;
731         avctx->delay  = 0;
732         s->low_delay  = 1;
733         break;
734     case AV_CODEC_ID_H263P:
735         s->out_format = FMT_H263;
736         s->h263_plus  = 1;
737         /* Fx */
738         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
739         s->modified_quant  = s->h263_aic;
740         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
741         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
742
743         /* /Fx */
744         /* These are just to be sure */
745         avctx->delay = 0;
746         s->low_delay = 1;
747         break;
748     case AV_CODEC_ID_FLV1:
749         s->out_format      = FMT_H263;
750         s->h263_flv        = 2; /* format = 1; 11-bit codes */
751         s->unrestricted_mv = 1;
752         s->rtp_mode  = 0; /* don't allow GOB */
753         avctx->delay = 0;
754         s->low_delay = 1;
755         break;
756     case AV_CODEC_ID_RV10:
757         s->out_format = FMT_H263;
758         avctx->delay  = 0;
759         s->low_delay  = 1;
760         break;
761     case AV_CODEC_ID_RV20:
762         s->out_format      = FMT_H263;
763         avctx->delay       = 0;
764         s->low_delay       = 1;
765         s->modified_quant  = 1;
766         s->h263_aic        = 1;
767         s->h263_plus       = 1;
768         s->loop_filter     = 1;
769         s->unrestricted_mv = 0;
770         break;
771     case AV_CODEC_ID_MPEG4:
772         s->out_format      = FMT_H263;
773         s->h263_pred       = 1;
774         s->unrestricted_mv = 1;
775         s->low_delay       = s->max_b_frames ? 0 : 1;
776         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
777         break;
778     case AV_CODEC_ID_MSMPEG4V2:
779         s->out_format      = FMT_H263;
780         s->h263_pred       = 1;
781         s->unrestricted_mv = 1;
782         s->msmpeg4_version = 2;
783         avctx->delay       = 0;
784         s->low_delay       = 1;
785         break;
786     case AV_CODEC_ID_MSMPEG4V3:
787         s->out_format        = FMT_H263;
788         s->h263_pred         = 1;
789         s->unrestricted_mv   = 1;
790         s->msmpeg4_version   = 3;
791         s->flipflop_rounding = 1;
792         avctx->delay         = 0;
793         s->low_delay         = 1;
794         break;
795     case AV_CODEC_ID_WMV1:
796         s->out_format        = FMT_H263;
797         s->h263_pred         = 1;
798         s->unrestricted_mv   = 1;
799         s->msmpeg4_version   = 4;
800         s->flipflop_rounding = 1;
801         avctx->delay         = 0;
802         s->low_delay         = 1;
803         break;
804     case AV_CODEC_ID_WMV2:
805         s->out_format        = FMT_H263;
806         s->h263_pred         = 1;
807         s->unrestricted_mv   = 1;
808         s->msmpeg4_version   = 5;
809         s->flipflop_rounding = 1;
810         avctx->delay         = 0;
811         s->low_delay         = 1;
812         break;
813     default:
814         return AVERROR(EINVAL);
815     }
816
817     avctx->has_b_frames = !s->low_delay;
818
819     s->encoding = 1;
820
821     s->progressive_frame    =
822     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
823                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
824                                 s->alternate_scan);
825
826     /* init */
827     ff_mpv_idct_init(s);
828     if ((ret = ff_mpv_common_init(s)) < 0)
829         return ret;
830
831     ff_fdctdsp_init(&s->fdsp, avctx);
832     ff_me_cmp_init(&s->mecc, avctx);
833     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
834     ff_pixblockdsp_init(&s->pdsp, avctx);
835     ff_qpeldsp_init(&s->qdsp);
836
837     if (s->msmpeg4_version) {
838         int ac_stats_size = 2 * 2 * (MAX_LEVEL + 1) *  (MAX_RUN + 1) * 2 * sizeof(int);
839         if (!(s->ac_stats = av_mallocz(ac_stats_size)))
840             return AVERROR(ENOMEM);
841     }
842
843     if (!(avctx->stats_out = av_mallocz(256))               ||
844         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix,          32) ||
845         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix,   32) ||
846         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix,          32) ||
847         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix16,        32) ||
848         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix16, 32) ||
849         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix16,        32) ||
850         !FF_ALLOCZ_TYPED_ARRAY(s->input_picture,           MAX_PICTURE_COUNT) ||
851         !FF_ALLOCZ_TYPED_ARRAY(s->reordered_input_picture, MAX_PICTURE_COUNT))
852         return AVERROR(ENOMEM);
853
854     if (s->noise_reduction) {
855         if (!FF_ALLOCZ_TYPED_ARRAY(s->dct_offset, 2))
856             return AVERROR(ENOMEM);
857     }
858
859     ff_dct_encode_init(s);
860
861     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
862         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
863
864     if (s->slice_context_count > 1) {
865         s->rtp_mode = 1;
866
867         if (avctx->codec_id == AV_CODEC_ID_H263P)
868             s->h263_slice_structured = 1;
869     }
870
871     s->quant_precision = 5;
872
873     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      avctx->ildct_cmp);
874     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
875
876     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
877         ff_h261_encode_init(s);
878     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
879         ff_h263_encode_init(s);
880     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
881         ff_msmpeg4_encode_init(s);
882     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
883         && s->out_format == FMT_MPEG1)
884         ff_mpeg1_encode_init(s);
885
886     /* init q matrix */
887     for (i = 0; i < 64; i++) {
888         int j = s->idsp.idct_permutation[i];
889         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
890             s->mpeg_quant) {
891             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
892             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
893         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
894             s->intra_matrix[j] =
895             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
896         } else if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
897             s->intra_matrix[j] =
898             s->inter_matrix[j] = ff_mpeg1_default_intra_matrix[i];
899         } else {
900             /* MPEG-1/2 */
901             s->chroma_intra_matrix[j] =
902             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
903             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
904         }
905         if (avctx->intra_matrix)
906             s->intra_matrix[j] = avctx->intra_matrix[i];
907         if (avctx->inter_matrix)
908             s->inter_matrix[j] = avctx->inter_matrix[i];
909     }
910
911     /* precompute matrix */
912     /* for mjpeg, we do include qscale in the matrix */
913     if (s->out_format != FMT_MJPEG) {
914         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
915                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
916                           31, 1);
917         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
918                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
919                           31, 0);
920     }
921
922     if ((ret = ff_rate_control_init(s)) < 0)
923         return ret;
924
925     if (s->b_frame_strategy == 2) {
926         for (i = 0; i < s->max_b_frames + 2; i++) {
927             s->tmp_frames[i] = av_frame_alloc();
928             if (!s->tmp_frames[i])
929                 return AVERROR(ENOMEM);
930
931             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
932             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
933             s->tmp_frames[i]->height = s->height >> s->brd_scale;
934
935             ret = av_frame_get_buffer(s->tmp_frames[i], 0);
936             if (ret < 0)
937                 return ret;
938         }
939     }
940
941     cpb_props = ff_add_cpb_side_data(avctx);
942     if (!cpb_props)
943         return AVERROR(ENOMEM);
944     cpb_props->max_bitrate = avctx->rc_max_rate;
945     cpb_props->min_bitrate = avctx->rc_min_rate;
946     cpb_props->avg_bitrate = avctx->bit_rate;
947     cpb_props->buffer_size = avctx->rc_buffer_size;
948
949     return 0;
950 }
951
952 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
953 {
954     MpegEncContext *s = avctx->priv_data;
955     int i;
956
957     ff_rate_control_uninit(s);
958
959     ff_mpv_common_end(s);
960     if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
961         s->out_format == FMT_MJPEG)
962         ff_mjpeg_encode_close(s);
963
964     av_freep(&avctx->extradata);
965
966     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
967         av_frame_free(&s->tmp_frames[i]);
968
969     ff_free_picture_tables(&s->new_picture);
970     ff_mpeg_unref_picture(avctx, &s->new_picture);
971
972     av_freep(&avctx->stats_out);
973     av_freep(&s->ac_stats);
974
975     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
976     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
977     s->q_chroma_intra_matrix=   NULL;
978     s->q_chroma_intra_matrix16= NULL;
979     av_freep(&s->q_intra_matrix);
980     av_freep(&s->q_inter_matrix);
981     av_freep(&s->q_intra_matrix16);
982     av_freep(&s->q_inter_matrix16);
983     av_freep(&s->input_picture);
984     av_freep(&s->reordered_input_picture);
985     av_freep(&s->dct_offset);
986
987     return 0;
988 }
989
990 static int get_sae(uint8_t *src, int ref, int stride)
991 {
992     int x,y;
993     int acc = 0;
994
995     for (y = 0; y < 16; y++) {
996         for (x = 0; x < 16; x++) {
997             acc += FFABS(src[x + y * stride] - ref);
998         }
999     }
1000
1001     return acc;
1002 }
1003
1004 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1005                            uint8_t *ref, int stride)
1006 {
1007     int x, y, w, h;
1008     int acc = 0;
1009
1010     w = s->width  & ~15;
1011     h = s->height & ~15;
1012
1013     for (y = 0; y < h; y += 16) {
1014         for (x = 0; x < w; x += 16) {
1015             int offset = x + y * stride;
1016             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1017                                       stride, 16);
1018             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1019             int sae  = get_sae(src + offset, mean, stride);
1020
1021             acc += sae + 500 < sad;
1022         }
1023     }
1024     return acc;
1025 }
1026
1027 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1028 {
1029     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1030                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1031                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1032                             &s->linesize, &s->uvlinesize);
1033 }
1034
1035 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1036 {
1037     Picture *pic = NULL;
1038     int64_t pts;
1039     int i, display_picture_number = 0, ret;
1040     int encoding_delay = s->max_b_frames ? s->max_b_frames
1041                                          : (s->low_delay ? 0 : 1);
1042     int flush_offset = 1;
1043     int direct = 1;
1044
1045     if (pic_arg) {
1046         pts = pic_arg->pts;
1047         display_picture_number = s->input_picture_number++;
1048
1049         if (pts != AV_NOPTS_VALUE) {
1050             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1051                 int64_t last = s->user_specified_pts;
1052
1053                 if (pts <= last) {
1054                     av_log(s->avctx, AV_LOG_ERROR,
1055                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1056                            pts, last);
1057                     return AVERROR(EINVAL);
1058                 }
1059
1060                 if (!s->low_delay && display_picture_number == 1)
1061                     s->dts_delta = pts - last;
1062             }
1063             s->user_specified_pts = pts;
1064         } else {
1065             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1066                 s->user_specified_pts =
1067                 pts = s->user_specified_pts + 1;
1068                 av_log(s->avctx, AV_LOG_INFO,
1069                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1070                        pts);
1071             } else {
1072                 pts = display_picture_number;
1073             }
1074         }
1075
1076         if (!pic_arg->buf[0] ||
1077             pic_arg->linesize[0] != s->linesize ||
1078             pic_arg->linesize[1] != s->uvlinesize ||
1079             pic_arg->linesize[2] != s->uvlinesize)
1080             direct = 0;
1081         if ((s->width & 15) || (s->height & 15))
1082             direct = 0;
1083         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1084             direct = 0;
1085         if (s->linesize & (STRIDE_ALIGN-1))
1086             direct = 0;
1087
1088         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1089                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1090
1091         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1092         if (i < 0)
1093             return i;
1094
1095         pic = &s->picture[i];
1096         pic->reference = 3;
1097
1098         if (direct) {
1099             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1100                 return ret;
1101         }
1102         ret = alloc_picture(s, pic, direct);
1103         if (ret < 0)
1104             return ret;
1105
1106         if (!direct) {
1107             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1108                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1109                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1110                 // empty
1111             } else {
1112                 int h_chroma_shift, v_chroma_shift;
1113                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1114                                                  &h_chroma_shift,
1115                                                  &v_chroma_shift);
1116
1117                 for (i = 0; i < 3; i++) {
1118                     int src_stride = pic_arg->linesize[i];
1119                     int dst_stride = i ? s->uvlinesize : s->linesize;
1120                     int h_shift = i ? h_chroma_shift : 0;
1121                     int v_shift = i ? v_chroma_shift : 0;
1122                     int w = s->width  >> h_shift;
1123                     int h = s->height >> v_shift;
1124                     uint8_t *src = pic_arg->data[i];
1125                     uint8_t *dst = pic->f->data[i];
1126                     int vpad = 16;
1127
1128                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1129                         && !s->progressive_sequence
1130                         && FFALIGN(s->height, 32) - s->height > 16)
1131                         vpad = 32;
1132
1133                     if (!s->avctx->rc_buffer_size)
1134                         dst += INPLACE_OFFSET;
1135
1136                     if (src_stride == dst_stride)
1137                         memcpy(dst, src, src_stride * h);
1138                     else {
1139                         int h2 = h;
1140                         uint8_t *dst2 = dst;
1141                         while (h2--) {
1142                             memcpy(dst2, src, w);
1143                             dst2 += dst_stride;
1144                             src += src_stride;
1145                         }
1146                     }
1147                     if ((s->width & 15) || (s->height & (vpad-1))) {
1148                         s->mpvencdsp.draw_edges(dst, dst_stride,
1149                                                 w, h,
1150                                                 16 >> h_shift,
1151                                                 vpad >> v_shift,
1152                                                 EDGE_BOTTOM);
1153                     }
1154                 }
1155                 emms_c();
1156             }
1157         }
1158         ret = av_frame_copy_props(pic->f, pic_arg);
1159         if (ret < 0)
1160             return ret;
1161
1162         pic->f->display_picture_number = display_picture_number;
1163         pic->f->pts = pts; // we set this here to avoid modifying pic_arg
1164     } else {
1165         /* Flushing: When we have not received enough input frames,
1166          * ensure s->input_picture[0] contains the first picture */
1167         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1168             if (s->input_picture[flush_offset])
1169                 break;
1170
1171         if (flush_offset <= 1)
1172             flush_offset = 1;
1173         else
1174             encoding_delay = encoding_delay - flush_offset + 1;
1175     }
1176
1177     /* shift buffer entries */
1178     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1179         s->input_picture[i - flush_offset] = s->input_picture[i];
1180
1181     s->input_picture[encoding_delay] = (Picture*) pic;
1182
1183     return 0;
1184 }
1185
1186 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1187 {
1188     int x, y, plane;
1189     int score = 0;
1190     int64_t score64 = 0;
1191
1192     for (plane = 0; plane < 3; plane++) {
1193         const int stride = p->f->linesize[plane];
1194         const int bw = plane ? 1 : 2;
1195         for (y = 0; y < s->mb_height * bw; y++) {
1196             for (x = 0; x < s->mb_width * bw; x++) {
1197                 int off = p->shared ? 0 : 16;
1198                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1199                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1200                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1201
1202                 switch (FFABS(s->frame_skip_exp)) {
1203                 case 0: score    =  FFMAX(score, v);          break;
1204                 case 1: score   += FFABS(v);                  break;
1205                 case 2: score64 += v * (int64_t)v;                       break;
1206                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1207                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1208                 }
1209             }
1210         }
1211     }
1212     emms_c();
1213
1214     if (score)
1215         score64 = score;
1216     if (s->frame_skip_exp < 0)
1217         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1218                       -1.0/s->frame_skip_exp);
1219
1220     if (score64 < s->frame_skip_threshold)
1221         return 1;
1222     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1223         return 1;
1224     return 0;
1225 }
1226
1227 static int encode_frame(AVCodecContext *c, AVFrame *frame, AVPacket *pkt)
1228 {
1229     int ret;
1230     int size = 0;
1231
1232     ret = avcodec_send_frame(c, frame);
1233     if (ret < 0)
1234         return ret;
1235
1236     do {
1237         ret = avcodec_receive_packet(c, pkt);
1238         if (ret >= 0) {
1239             size += pkt->size;
1240             av_packet_unref(pkt);
1241         } else if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
1242             return ret;
1243     } while (ret >= 0);
1244
1245     return size;
1246 }
1247
1248 static int estimate_best_b_count(MpegEncContext *s)
1249 {
1250     const AVCodec *codec = avcodec_find_encoder(s->avctx->codec_id);
1251     AVPacket *pkt;
1252     const int scale = s->brd_scale;
1253     int width  = s->width  >> scale;
1254     int height = s->height >> scale;
1255     int i, j, out_size, p_lambda, b_lambda, lambda2;
1256     int64_t best_rd  = INT64_MAX;
1257     int best_b_count = -1;
1258     int ret = 0;
1259
1260     av_assert0(scale >= 0 && scale <= 3);
1261
1262     pkt = av_packet_alloc();
1263     if (!pkt)
1264         return AVERROR(ENOMEM);
1265
1266     //emms_c();
1267     //s->next_picture_ptr->quality;
1268     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1269     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1270     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1271     if (!b_lambda) // FIXME we should do this somewhere else
1272         b_lambda = p_lambda;
1273     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1274                FF_LAMBDA_SHIFT;
1275
1276     for (i = 0; i < s->max_b_frames + 2; i++) {
1277         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1278                                                 s->next_picture_ptr;
1279         uint8_t *data[4];
1280
1281         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1282             pre_input = *pre_input_ptr;
1283             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1284
1285             if (!pre_input.shared && i) {
1286                 data[0] += INPLACE_OFFSET;
1287                 data[1] += INPLACE_OFFSET;
1288                 data[2] += INPLACE_OFFSET;
1289             }
1290
1291             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1292                                        s->tmp_frames[i]->linesize[0],
1293                                        data[0],
1294                                        pre_input.f->linesize[0],
1295                                        width, height);
1296             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1297                                        s->tmp_frames[i]->linesize[1],
1298                                        data[1],
1299                                        pre_input.f->linesize[1],
1300                                        width >> 1, height >> 1);
1301             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1302                                        s->tmp_frames[i]->linesize[2],
1303                                        data[2],
1304                                        pre_input.f->linesize[2],
1305                                        width >> 1, height >> 1);
1306         }
1307     }
1308
1309     for (j = 0; j < s->max_b_frames + 1; j++) {
1310         AVCodecContext *c;
1311         int64_t rd = 0;
1312
1313         if (!s->input_picture[j])
1314             break;
1315
1316         c = avcodec_alloc_context3(NULL);
1317         if (!c) {
1318             ret = AVERROR(ENOMEM);
1319             goto fail;
1320         }
1321
1322         c->width        = width;
1323         c->height       = height;
1324         c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1325         c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1326         c->mb_decision  = s->avctx->mb_decision;
1327         c->me_cmp       = s->avctx->me_cmp;
1328         c->mb_cmp       = s->avctx->mb_cmp;
1329         c->me_sub_cmp   = s->avctx->me_sub_cmp;
1330         c->pix_fmt      = AV_PIX_FMT_YUV420P;
1331         c->time_base    = s->avctx->time_base;
1332         c->max_b_frames = s->max_b_frames;
1333
1334         ret = avcodec_open2(c, codec, NULL);
1335         if (ret < 0)
1336             goto fail;
1337
1338
1339         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1340         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1341
1342         out_size = encode_frame(c, s->tmp_frames[0], pkt);
1343         if (out_size < 0) {
1344             ret = out_size;
1345             goto fail;
1346         }
1347
1348         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1349
1350         for (i = 0; i < s->max_b_frames + 1; i++) {
1351             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1352
1353             s->tmp_frames[i + 1]->pict_type = is_p ?
1354                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1355             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1356
1357             out_size = encode_frame(c, s->tmp_frames[i + 1], pkt);
1358             if (out_size < 0) {
1359                 ret = out_size;
1360                 goto fail;
1361             }
1362
1363             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1364         }
1365
1366         /* get the delayed frames */
1367         out_size = encode_frame(c, NULL, pkt);
1368         if (out_size < 0) {
1369             ret = out_size;
1370             goto fail;
1371         }
1372         rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1373
1374         rd += c->error[0] + c->error[1] + c->error[2];
1375
1376         if (rd < best_rd) {
1377             best_rd = rd;
1378             best_b_count = j;
1379         }
1380
1381 fail:
1382         avcodec_free_context(&c);
1383         av_packet_unref(pkt);
1384         if (ret < 0) {
1385             best_b_count = ret;
1386             break;
1387         }
1388     }
1389
1390     av_packet_free(&pkt);
1391
1392     return best_b_count;
1393 }
1394
1395 static int select_input_picture(MpegEncContext *s)
1396 {
1397     int i, ret;
1398
1399     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1400         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1401     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1402
1403     /* set next picture type & ordering */
1404     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1405         if (s->frame_skip_threshold || s->frame_skip_factor) {
1406             if (s->picture_in_gop_number < s->gop_size &&
1407                 s->next_picture_ptr &&
1408                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1409                 // FIXME check that the gop check above is +-1 correct
1410                 av_frame_unref(s->input_picture[0]->f);
1411
1412                 ff_vbv_update(s, 0);
1413
1414                 goto no_output_pic;
1415             }
1416         }
1417
1418         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1419             !s->next_picture_ptr || s->intra_only) {
1420             s->reordered_input_picture[0] = s->input_picture[0];
1421             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1422             s->reordered_input_picture[0]->f->coded_picture_number =
1423                 s->coded_picture_number++;
1424         } else {
1425             int b_frames = 0;
1426
1427             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1428                 for (i = 0; i < s->max_b_frames + 1; i++) {
1429                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1430
1431                     if (pict_num >= s->rc_context.num_entries)
1432                         break;
1433                     if (!s->input_picture[i]) {
1434                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1435                         break;
1436                     }
1437
1438                     s->input_picture[i]->f->pict_type =
1439                         s->rc_context.entry[pict_num].new_pict_type;
1440                 }
1441             }
1442
1443             if (s->b_frame_strategy == 0) {
1444                 b_frames = s->max_b_frames;
1445                 while (b_frames && !s->input_picture[b_frames])
1446                     b_frames--;
1447             } else if (s->b_frame_strategy == 1) {
1448                 for (i = 1; i < s->max_b_frames + 1; i++) {
1449                     if (s->input_picture[i] &&
1450                         s->input_picture[i]->b_frame_score == 0) {
1451                         s->input_picture[i]->b_frame_score =
1452                             get_intra_count(s,
1453                                             s->input_picture[i    ]->f->data[0],
1454                                             s->input_picture[i - 1]->f->data[0],
1455                                             s->linesize) + 1;
1456                     }
1457                 }
1458                 for (i = 0; i < s->max_b_frames + 1; i++) {
1459                     if (!s->input_picture[i] ||
1460                         s->input_picture[i]->b_frame_score - 1 >
1461                             s->mb_num / s->b_sensitivity)
1462                         break;
1463                 }
1464
1465                 b_frames = FFMAX(0, i - 1);
1466
1467                 /* reset scores */
1468                 for (i = 0; i < b_frames + 1; i++) {
1469                     s->input_picture[i]->b_frame_score = 0;
1470                 }
1471             } else if (s->b_frame_strategy == 2) {
1472                 b_frames = estimate_best_b_count(s);
1473                 if (b_frames < 0)
1474                     return b_frames;
1475             }
1476
1477             emms_c();
1478
1479             for (i = b_frames - 1; i >= 0; i--) {
1480                 int type = s->input_picture[i]->f->pict_type;
1481                 if (type && type != AV_PICTURE_TYPE_B)
1482                     b_frames = i;
1483             }
1484             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1485                 b_frames == s->max_b_frames) {
1486                 av_log(s->avctx, AV_LOG_ERROR,
1487                        "warning, too many B-frames in a row\n");
1488             }
1489
1490             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1491                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1492                     s->gop_size > s->picture_in_gop_number) {
1493                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1494                 } else {
1495                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1496                         b_frames = 0;
1497                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1498                 }
1499             }
1500
1501             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1502                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1503                 b_frames--;
1504
1505             s->reordered_input_picture[0] = s->input_picture[b_frames];
1506             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1507                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1508             s->reordered_input_picture[0]->f->coded_picture_number =
1509                 s->coded_picture_number++;
1510             for (i = 0; i < b_frames; i++) {
1511                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1512                 s->reordered_input_picture[i + 1]->f->pict_type =
1513                     AV_PICTURE_TYPE_B;
1514                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1515                     s->coded_picture_number++;
1516             }
1517         }
1518     }
1519 no_output_pic:
1520     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1521
1522     if (s->reordered_input_picture[0]) {
1523         s->reordered_input_picture[0]->reference =
1524            s->reordered_input_picture[0]->f->pict_type !=
1525                AV_PICTURE_TYPE_B ? 3 : 0;
1526
1527         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1528             return ret;
1529
1530         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1531             // input is a shared pix, so we can't modify it -> allocate a new
1532             // one & ensure that the shared one is reuseable
1533
1534             Picture *pic;
1535             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1536             if (i < 0)
1537                 return i;
1538             pic = &s->picture[i];
1539
1540             pic->reference = s->reordered_input_picture[0]->reference;
1541             if (alloc_picture(s, pic, 0) < 0) {
1542                 return -1;
1543             }
1544
1545             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1546             if (ret < 0)
1547                 return ret;
1548
1549             /* mark us unused / free shared pic */
1550             av_frame_unref(s->reordered_input_picture[0]->f);
1551             s->reordered_input_picture[0]->shared = 0;
1552
1553             s->current_picture_ptr = pic;
1554         } else {
1555             // input is not a shared pix -> reuse buffer for current_pix
1556             s->current_picture_ptr = s->reordered_input_picture[0];
1557             for (i = 0; i < 4; i++) {
1558                 if (s->new_picture.f->data[i])
1559                     s->new_picture.f->data[i] += INPLACE_OFFSET;
1560             }
1561         }
1562         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1563         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1564                                        s->current_picture_ptr)) < 0)
1565             return ret;
1566
1567         s->picture_number = s->new_picture.f->display_picture_number;
1568     }
1569     return 0;
1570 }
1571
1572 static void frame_end(MpegEncContext *s)
1573 {
1574     if (s->unrestricted_mv &&
1575         s->current_picture.reference &&
1576         !s->intra_only) {
1577         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1578         int hshift = desc->log2_chroma_w;
1579         int vshift = desc->log2_chroma_h;
1580         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1581                                 s->current_picture.f->linesize[0],
1582                                 s->h_edge_pos, s->v_edge_pos,
1583                                 EDGE_WIDTH, EDGE_WIDTH,
1584                                 EDGE_TOP | EDGE_BOTTOM);
1585         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1586                                 s->current_picture.f->linesize[1],
1587                                 s->h_edge_pos >> hshift,
1588                                 s->v_edge_pos >> vshift,
1589                                 EDGE_WIDTH >> hshift,
1590                                 EDGE_WIDTH >> vshift,
1591                                 EDGE_TOP | EDGE_BOTTOM);
1592         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1593                                 s->current_picture.f->linesize[2],
1594                                 s->h_edge_pos >> hshift,
1595                                 s->v_edge_pos >> vshift,
1596                                 EDGE_WIDTH >> hshift,
1597                                 EDGE_WIDTH >> vshift,
1598                                 EDGE_TOP | EDGE_BOTTOM);
1599     }
1600
1601     emms_c();
1602
1603     s->last_pict_type                 = s->pict_type;
1604     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1605     if (s->pict_type!= AV_PICTURE_TYPE_B)
1606         s->last_non_b_pict_type = s->pict_type;
1607
1608 #if FF_API_CODED_FRAME
1609 FF_DISABLE_DEPRECATION_WARNINGS
1610     av_frame_unref(s->avctx->coded_frame);
1611     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1612 FF_ENABLE_DEPRECATION_WARNINGS
1613 #endif
1614 #if FF_API_ERROR_FRAME
1615 FF_DISABLE_DEPRECATION_WARNINGS
1616     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1617            sizeof(s->current_picture.encoding_error));
1618 FF_ENABLE_DEPRECATION_WARNINGS
1619 #endif
1620 }
1621
1622 static void update_noise_reduction(MpegEncContext *s)
1623 {
1624     int intra, i;
1625
1626     for (intra = 0; intra < 2; intra++) {
1627         if (s->dct_count[intra] > (1 << 16)) {
1628             for (i = 0; i < 64; i++) {
1629                 s->dct_error_sum[intra][i] >>= 1;
1630             }
1631             s->dct_count[intra] >>= 1;
1632         }
1633
1634         for (i = 0; i < 64; i++) {
1635             s->dct_offset[intra][i] = (s->noise_reduction *
1636                                        s->dct_count[intra] +
1637                                        s->dct_error_sum[intra][i] / 2) /
1638                                       (s->dct_error_sum[intra][i] + 1);
1639         }
1640     }
1641 }
1642
1643 static int frame_start(MpegEncContext *s)
1644 {
1645     int ret;
1646
1647     /* mark & release old frames */
1648     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1649         s->last_picture_ptr != s->next_picture_ptr &&
1650         s->last_picture_ptr->f->buf[0]) {
1651         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1652     }
1653
1654     s->current_picture_ptr->f->pict_type = s->pict_type;
1655     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1656
1657     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1658     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1659                                    s->current_picture_ptr)) < 0)
1660         return ret;
1661
1662     if (s->pict_type != AV_PICTURE_TYPE_B) {
1663         s->last_picture_ptr = s->next_picture_ptr;
1664         if (!s->droppable)
1665             s->next_picture_ptr = s->current_picture_ptr;
1666     }
1667
1668     if (s->last_picture_ptr) {
1669         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1670         if (s->last_picture_ptr->f->buf[0] &&
1671             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1672                                        s->last_picture_ptr)) < 0)
1673             return ret;
1674     }
1675     if (s->next_picture_ptr) {
1676         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1677         if (s->next_picture_ptr->f->buf[0] &&
1678             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1679                                        s->next_picture_ptr)) < 0)
1680             return ret;
1681     }
1682
1683     if (s->picture_structure!= PICT_FRAME) {
1684         int i;
1685         for (i = 0; i < 4; i++) {
1686             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1687                 s->current_picture.f->data[i] +=
1688                     s->current_picture.f->linesize[i];
1689             }
1690             s->current_picture.f->linesize[i] *= 2;
1691             s->last_picture.f->linesize[i]    *= 2;
1692             s->next_picture.f->linesize[i]    *= 2;
1693         }
1694     }
1695
1696     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1697         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1698         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1699     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1700         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1701         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1702     } else {
1703         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1704         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1705     }
1706
1707     if (s->dct_error_sum) {
1708         av_assert2(s->noise_reduction && s->encoding);
1709         update_noise_reduction(s);
1710     }
1711
1712     return 0;
1713 }
1714
1715 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1716                           const AVFrame *pic_arg, int *got_packet)
1717 {
1718     MpegEncContext *s = avctx->priv_data;
1719     int i, stuffing_count, ret;
1720     int context_count = s->slice_context_count;
1721
1722     s->vbv_ignore_qmax = 0;
1723
1724     s->picture_in_gop_number++;
1725
1726     if (load_input_picture(s, pic_arg) < 0)
1727         return -1;
1728
1729     if (select_input_picture(s) < 0) {
1730         return -1;
1731     }
1732
1733     /* output? */
1734     if (s->new_picture.f->data[0]) {
1735         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1736         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1737                                               :
1738                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1739         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1740             return ret;
1741         if (s->mb_info) {
1742             s->mb_info_ptr = av_packet_new_side_data(pkt,
1743                                  AV_PKT_DATA_H263_MB_INFO,
1744                                  s->mb_width*s->mb_height*12);
1745             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1746         }
1747
1748         for (i = 0; i < context_count; i++) {
1749             int start_y = s->thread_context[i]->start_mb_y;
1750             int   end_y = s->thread_context[i]->  end_mb_y;
1751             int h       = s->mb_height;
1752             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1753             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1754
1755             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1756         }
1757
1758         s->pict_type = s->new_picture.f->pict_type;
1759         //emms_c();
1760         ret = frame_start(s);
1761         if (ret < 0)
1762             return ret;
1763 vbv_retry:
1764         ret = encode_picture(s, s->picture_number);
1765         if (growing_buffer) {
1766             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1767             pkt->data = s->pb.buf;
1768             pkt->size = avctx->internal->byte_buffer_size;
1769         }
1770         if (ret < 0)
1771             return -1;
1772
1773         frame_end(s);
1774
1775        if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) && s->out_format == FMT_MJPEG)
1776             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1777
1778         if (avctx->rc_buffer_size) {
1779             RateControlContext *rcc = &s->rc_context;
1780             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1781             int hq = (avctx->mb_decision == FF_MB_DECISION_RD || avctx->trellis);
1782             int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
1783
1784             if (put_bits_count(&s->pb) > max_size &&
1785                 s->lambda < s->lmax) {
1786                 s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
1787                                        (s->qscale + 1) / s->qscale);
1788                 if (s->adaptive_quant) {
1789                     int i;
1790                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1791                         s->lambda_table[i] =
1792                             FFMAX(s->lambda_table[i] + min_step,
1793                                   s->lambda_table[i] * (s->qscale + 1) /
1794                                   s->qscale);
1795                 }
1796                 s->mb_skipped = 0;        // done in frame_start()
1797                 // done in encode_picture() so we must undo it
1798                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1799                     if (s->flipflop_rounding          ||
1800                         s->codec_id == AV_CODEC_ID_H263P ||
1801                         s->codec_id == AV_CODEC_ID_MPEG4)
1802                         s->no_rounding ^= 1;
1803                 }
1804                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1805                     s->time_base       = s->last_time_base;
1806                     s->last_non_b_time = s->time - s->pp_time;
1807                 }
1808                 for (i = 0; i < context_count; i++) {
1809                     PutBitContext *pb = &s->thread_context[i]->pb;
1810                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1811                 }
1812                 s->vbv_ignore_qmax = 1;
1813                 av_log(avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1814                 goto vbv_retry;
1815             }
1816
1817             av_assert0(avctx->rc_max_rate);
1818         }
1819
1820         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1821             ff_write_pass1_stats(s);
1822
1823         for (i = 0; i < 4; i++) {
1824             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1825             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1826         }
1827         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1828                                        s->current_picture_ptr->encoding_error,
1829                                        (avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1830                                        s->pict_type);
1831
1832         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1833             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1834                                              s->misc_bits + s->i_tex_bits +
1835                                              s->p_tex_bits);
1836         flush_put_bits(&s->pb);
1837         s->frame_bits  = put_bits_count(&s->pb);
1838
1839         stuffing_count = ff_vbv_update(s, s->frame_bits);
1840         s->stuffing_bits = 8*stuffing_count;
1841         if (stuffing_count) {
1842             if (put_bytes_left(&s->pb, 0) < stuffing_count + 50) {
1843                 av_log(avctx, AV_LOG_ERROR, "stuffing too large\n");
1844                 return -1;
1845             }
1846
1847             switch (s->codec_id) {
1848             case AV_CODEC_ID_MPEG1VIDEO:
1849             case AV_CODEC_ID_MPEG2VIDEO:
1850                 while (stuffing_count--) {
1851                     put_bits(&s->pb, 8, 0);
1852                 }
1853             break;
1854             case AV_CODEC_ID_MPEG4:
1855                 put_bits(&s->pb, 16, 0);
1856                 put_bits(&s->pb, 16, 0x1C3);
1857                 stuffing_count -= 4;
1858                 while (stuffing_count--) {
1859                     put_bits(&s->pb, 8, 0xFF);
1860                 }
1861             break;
1862             default:
1863                 av_log(avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1864             }
1865             flush_put_bits(&s->pb);
1866             s->frame_bits  = put_bits_count(&s->pb);
1867         }
1868
1869         /* update MPEG-1/2 vbv_delay for CBR */
1870         if (avctx->rc_max_rate                          &&
1871             avctx->rc_min_rate == avctx->rc_max_rate &&
1872             s->out_format == FMT_MPEG1                     &&
1873             90000LL * (avctx->rc_buffer_size - 1) <=
1874                 avctx->rc_max_rate * 0xFFFFLL) {
1875             AVCPBProperties *props;
1876             size_t props_size;
1877
1878             int vbv_delay, min_delay;
1879             double inbits  = avctx->rc_max_rate *
1880                              av_q2d(avctx->time_base);
1881             int    minbits = s->frame_bits - 8 *
1882                              (s->vbv_delay_ptr - s->pb.buf - 1);
1883             double bits    = s->rc_context.buffer_index + minbits - inbits;
1884
1885             if (bits < 0)
1886                 av_log(avctx, AV_LOG_ERROR,
1887                        "Internal error, negative bits\n");
1888
1889             av_assert1(s->repeat_first_field == 0);
1890
1891             vbv_delay = bits * 90000 / avctx->rc_max_rate;
1892             min_delay = (minbits * 90000LL + avctx->rc_max_rate - 1) /
1893                         avctx->rc_max_rate;
1894
1895             vbv_delay = FFMAX(vbv_delay, min_delay);
1896
1897             av_assert0(vbv_delay < 0xFFFF);
1898
1899             s->vbv_delay_ptr[0] &= 0xF8;
1900             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1901             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1902             s->vbv_delay_ptr[2] &= 0x07;
1903             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1904
1905             props = av_cpb_properties_alloc(&props_size);
1906             if (!props)
1907                 return AVERROR(ENOMEM);
1908             props->vbv_delay = vbv_delay * 300;
1909
1910             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1911                                           (uint8_t*)props, props_size);
1912             if (ret < 0) {
1913                 av_freep(&props);
1914                 return ret;
1915             }
1916
1917 #if FF_API_VBV_DELAY
1918 FF_DISABLE_DEPRECATION_WARNINGS
1919             avctx->vbv_delay     = vbv_delay * 300;
1920 FF_ENABLE_DEPRECATION_WARNINGS
1921 #endif
1922         }
1923         s->total_bits     += s->frame_bits;
1924
1925         pkt->pts = s->current_picture.f->pts;
1926         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1927             if (!s->current_picture.f->coded_picture_number)
1928                 pkt->dts = pkt->pts - s->dts_delta;
1929             else
1930                 pkt->dts = s->reordered_pts;
1931             s->reordered_pts = pkt->pts;
1932         } else
1933             pkt->dts = pkt->pts;
1934         if (s->current_picture.f->key_frame)
1935             pkt->flags |= AV_PKT_FLAG_KEY;
1936         if (s->mb_info)
1937             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1938     } else {
1939         s->frame_bits = 0;
1940     }
1941
1942     /* release non-reference frames */
1943     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1944         if (!s->picture[i].reference)
1945             ff_mpeg_unref_picture(avctx, &s->picture[i]);
1946     }
1947
1948     av_assert1((s->frame_bits & 7) == 0);
1949
1950     pkt->size = s->frame_bits / 8;
1951     *got_packet = !!pkt->size;
1952     return 0;
1953 }
1954
1955 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1956                                                 int n, int threshold)
1957 {
1958     static const char tab[64] = {
1959         3, 2, 2, 1, 1, 1, 1, 1,
1960         1, 1, 1, 1, 1, 1, 1, 1,
1961         1, 1, 1, 1, 1, 1, 1, 1,
1962         0, 0, 0, 0, 0, 0, 0, 0,
1963         0, 0, 0, 0, 0, 0, 0, 0,
1964         0, 0, 0, 0, 0, 0, 0, 0,
1965         0, 0, 0, 0, 0, 0, 0, 0,
1966         0, 0, 0, 0, 0, 0, 0, 0
1967     };
1968     int score = 0;
1969     int run = 0;
1970     int i;
1971     int16_t *block = s->block[n];
1972     const int last_index = s->block_last_index[n];
1973     int skip_dc;
1974
1975     if (threshold < 0) {
1976         skip_dc = 0;
1977         threshold = -threshold;
1978     } else
1979         skip_dc = 1;
1980
1981     /* Are all we could set to zero already zero? */
1982     if (last_index <= skip_dc - 1)
1983         return;
1984
1985     for (i = 0; i <= last_index; i++) {
1986         const int j = s->intra_scantable.permutated[i];
1987         const int level = FFABS(block[j]);
1988         if (level == 1) {
1989             if (skip_dc && i == 0)
1990                 continue;
1991             score += tab[run];
1992             run = 0;
1993         } else if (level > 1) {
1994             return;
1995         } else {
1996             run++;
1997         }
1998     }
1999     if (score >= threshold)
2000         return;
2001     for (i = skip_dc; i <= last_index; i++) {
2002         const int j = s->intra_scantable.permutated[i];
2003         block[j] = 0;
2004     }
2005     if (block[0])
2006         s->block_last_index[n] = 0;
2007     else
2008         s->block_last_index[n] = -1;
2009 }
2010
2011 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2012                                int last_index)
2013 {
2014     int i;
2015     const int maxlevel = s->max_qcoeff;
2016     const int minlevel = s->min_qcoeff;
2017     int overflow = 0;
2018
2019     if (s->mb_intra) {
2020         i = 1; // skip clipping of intra dc
2021     } else
2022         i = 0;
2023
2024     for (; i <= last_index; i++) {
2025         const int j = s->intra_scantable.permutated[i];
2026         int level = block[j];
2027
2028         if (level > maxlevel) {
2029             level = maxlevel;
2030             overflow++;
2031         } else if (level < minlevel) {
2032             level = minlevel;
2033             overflow++;
2034         }
2035
2036         block[j] = level;
2037     }
2038
2039     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2040         av_log(s->avctx, AV_LOG_INFO,
2041                "warning, clipping %d dct coefficients to %d..%d\n",
2042                overflow, minlevel, maxlevel);
2043 }
2044
2045 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2046 {
2047     int x, y;
2048     // FIXME optimize
2049     for (y = 0; y < 8; y++) {
2050         for (x = 0; x < 8; x++) {
2051             int x2, y2;
2052             int sum = 0;
2053             int sqr = 0;
2054             int count = 0;
2055
2056             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2057                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2058                     int v = ptr[x2 + y2 * stride];
2059                     sum += v;
2060                     sqr += v * v;
2061                     count++;
2062                 }
2063             }
2064             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2065         }
2066     }
2067 }
2068
2069 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2070                                                 int motion_x, int motion_y,
2071                                                 int mb_block_height,
2072                                                 int mb_block_width,
2073                                                 int mb_block_count)
2074 {
2075     int16_t weight[12][64];
2076     int16_t orig[12][64];
2077     const int mb_x = s->mb_x;
2078     const int mb_y = s->mb_y;
2079     int i;
2080     int skip_dct[12];
2081     int dct_offset = s->linesize * 8; // default for progressive frames
2082     int uv_dct_offset = s->uvlinesize * 8;
2083     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2084     ptrdiff_t wrap_y, wrap_c;
2085
2086     for (i = 0; i < mb_block_count; i++)
2087         skip_dct[i] = s->skipdct;
2088
2089     if (s->adaptive_quant) {
2090         const int last_qp = s->qscale;
2091         const int mb_xy = mb_x + mb_y * s->mb_stride;
2092
2093         s->lambda = s->lambda_table[mb_xy];
2094         update_qscale(s);
2095
2096         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2097             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2098             s->dquant = s->qscale - last_qp;
2099
2100             if (s->out_format == FMT_H263) {
2101                 s->dquant = av_clip(s->dquant, -2, 2);
2102
2103                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2104                     if (!s->mb_intra) {
2105                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2106                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2107                                 s->dquant = 0;
2108                         }
2109                         if (s->mv_type == MV_TYPE_8X8)
2110                             s->dquant = 0;
2111                     }
2112                 }
2113             }
2114         }
2115         ff_set_qscale(s, last_qp + s->dquant);
2116     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2117         ff_set_qscale(s, s->qscale + s->dquant);
2118
2119     wrap_y = s->linesize;
2120     wrap_c = s->uvlinesize;
2121     ptr_y  = s->new_picture.f->data[0] +
2122              (mb_y * 16 * wrap_y)              + mb_x * 16;
2123     ptr_cb = s->new_picture.f->data[1] +
2124              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2125     ptr_cr = s->new_picture.f->data[2] +
2126              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2127
2128     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2129         uint8_t *ebuf = s->sc.edge_emu_buffer + 38 * wrap_y;
2130         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2131         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2132         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2133                                  wrap_y, wrap_y,
2134                                  16, 16, mb_x * 16, mb_y * 16,
2135                                  s->width, s->height);
2136         ptr_y = ebuf;
2137         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2138                                  wrap_c, wrap_c,
2139                                  mb_block_width, mb_block_height,
2140                                  mb_x * mb_block_width, mb_y * mb_block_height,
2141                                  cw, ch);
2142         ptr_cb = ebuf + 16 * wrap_y;
2143         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2144                                  wrap_c, wrap_c,
2145                                  mb_block_width, mb_block_height,
2146                                  mb_x * mb_block_width, mb_y * mb_block_height,
2147                                  cw, ch);
2148         ptr_cr = ebuf + 16 * wrap_y + 16;
2149     }
2150
2151     if (s->mb_intra) {
2152         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2153             int progressive_score, interlaced_score;
2154
2155             s->interlaced_dct = 0;
2156             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2157                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2158                                                      NULL, wrap_y, 8) - 400;
2159
2160             if (progressive_score > 0) {
2161                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2162                                                         NULL, wrap_y * 2, 8) +
2163                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2164                                                         NULL, wrap_y * 2, 8);
2165                 if (progressive_score > interlaced_score) {
2166                     s->interlaced_dct = 1;
2167
2168                     dct_offset = wrap_y;
2169                     uv_dct_offset = wrap_c;
2170                     wrap_y <<= 1;
2171                     if (s->chroma_format == CHROMA_422 ||
2172                         s->chroma_format == CHROMA_444)
2173                         wrap_c <<= 1;
2174                 }
2175             }
2176         }
2177
2178         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2179         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2180         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2181         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2182
2183         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2184             skip_dct[4] = 1;
2185             skip_dct[5] = 1;
2186         } else {
2187             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2188             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2189             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2190                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2191                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2192             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2193                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2194                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2195                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2196                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2197                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2198                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2199             }
2200         }
2201     } else {
2202         op_pixels_func (*op_pix)[4];
2203         qpel_mc_func (*op_qpix)[16];
2204         uint8_t *dest_y, *dest_cb, *dest_cr;
2205
2206         dest_y  = s->dest[0];
2207         dest_cb = s->dest[1];
2208         dest_cr = s->dest[2];
2209
2210         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2211             op_pix  = s->hdsp.put_pixels_tab;
2212             op_qpix = s->qdsp.put_qpel_pixels_tab;
2213         } else {
2214             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2215             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2216         }
2217
2218         if (s->mv_dir & MV_DIR_FORWARD) {
2219             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2220                           s->last_picture.f->data,
2221                           op_pix, op_qpix);
2222             op_pix  = s->hdsp.avg_pixels_tab;
2223             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2224         }
2225         if (s->mv_dir & MV_DIR_BACKWARD) {
2226             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2227                           s->next_picture.f->data,
2228                           op_pix, op_qpix);
2229         }
2230
2231         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2232             int progressive_score, interlaced_score;
2233
2234             s->interlaced_dct = 0;
2235             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2236                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2237                                                      ptr_y + wrap_y * 8,
2238                                                      wrap_y, 8) - 400;
2239
2240             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2241                 progressive_score -= 400;
2242
2243             if (progressive_score > 0) {
2244                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2245                                                         wrap_y * 2, 8) +
2246                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2247                                                         ptr_y + wrap_y,
2248                                                         wrap_y * 2, 8);
2249
2250                 if (progressive_score > interlaced_score) {
2251                     s->interlaced_dct = 1;
2252
2253                     dct_offset = wrap_y;
2254                     uv_dct_offset = wrap_c;
2255                     wrap_y <<= 1;
2256                     if (s->chroma_format == CHROMA_422)
2257                         wrap_c <<= 1;
2258                 }
2259             }
2260         }
2261
2262         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2263         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2264         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2265                             dest_y + dct_offset, wrap_y);
2266         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2267                             dest_y + dct_offset + 8, wrap_y);
2268
2269         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2270             skip_dct[4] = 1;
2271             skip_dct[5] = 1;
2272         } else {
2273             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2274             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2275             if (!s->chroma_y_shift) { /* 422 */
2276                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2277                                     dest_cb + uv_dct_offset, wrap_c);
2278                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2279                                     dest_cr + uv_dct_offset, wrap_c);
2280             }
2281         }
2282         /* pre quantization */
2283         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2284                 2 * s->qscale * s->qscale) {
2285             // FIXME optimize
2286             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2287                 skip_dct[0] = 1;
2288             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2289                 skip_dct[1] = 1;
2290             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2291                                wrap_y, 8) < 20 * s->qscale)
2292                 skip_dct[2] = 1;
2293             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2294                                wrap_y, 8) < 20 * s->qscale)
2295                 skip_dct[3] = 1;
2296             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2297                 skip_dct[4] = 1;
2298             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2299                 skip_dct[5] = 1;
2300             if (!s->chroma_y_shift) { /* 422 */
2301                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2302                                    dest_cb + uv_dct_offset,
2303                                    wrap_c, 8) < 20 * s->qscale)
2304                     skip_dct[6] = 1;
2305                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2306                                    dest_cr + uv_dct_offset,
2307                                    wrap_c, 8) < 20 * s->qscale)
2308                     skip_dct[7] = 1;
2309             }
2310         }
2311     }
2312
2313     if (s->quantizer_noise_shaping) {
2314         if (!skip_dct[0])
2315             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2316         if (!skip_dct[1])
2317             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2318         if (!skip_dct[2])
2319             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2320         if (!skip_dct[3])
2321             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2322         if (!skip_dct[4])
2323             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2324         if (!skip_dct[5])
2325             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2326         if (!s->chroma_y_shift) { /* 422 */
2327             if (!skip_dct[6])
2328                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2329                                   wrap_c);
2330             if (!skip_dct[7])
2331                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2332                                   wrap_c);
2333         }
2334         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2335     }
2336
2337     /* DCT & quantize */
2338     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2339     {
2340         for (i = 0; i < mb_block_count; i++) {
2341             if (!skip_dct[i]) {
2342                 int overflow;
2343                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2344                 // FIXME we could decide to change to quantizer instead of
2345                 // clipping
2346                 // JS: I don't think that would be a good idea it could lower
2347                 //     quality instead of improve it. Just INTRADC clipping
2348                 //     deserves changes in quantizer
2349                 if (overflow)
2350                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2351             } else
2352                 s->block_last_index[i] = -1;
2353         }
2354         if (s->quantizer_noise_shaping) {
2355             for (i = 0; i < mb_block_count; i++) {
2356                 if (!skip_dct[i]) {
2357                     s->block_last_index[i] =
2358                         dct_quantize_refine(s, s->block[i], weight[i],
2359                                             orig[i], i, s->qscale);
2360                 }
2361             }
2362         }
2363
2364         if (s->luma_elim_threshold && !s->mb_intra)
2365             for (i = 0; i < 4; i++)
2366                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2367         if (s->chroma_elim_threshold && !s->mb_intra)
2368             for (i = 4; i < mb_block_count; i++)
2369                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2370
2371         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2372             for (i = 0; i < mb_block_count; i++) {
2373                 if (s->block_last_index[i] == -1)
2374                     s->coded_score[i] = INT_MAX / 256;
2375             }
2376         }
2377     }
2378
2379     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2380         s->block_last_index[4] =
2381         s->block_last_index[5] = 0;
2382         s->block[4][0] =
2383         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2384         if (!s->chroma_y_shift) { /* 422 / 444 */
2385             for (i=6; i<12; i++) {
2386                 s->block_last_index[i] = 0;
2387                 s->block[i][0] = s->block[4][0];
2388             }
2389         }
2390     }
2391
2392     // non c quantize code returns incorrect block_last_index FIXME
2393     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2394         for (i = 0; i < mb_block_count; i++) {
2395             int j;
2396             if (s->block_last_index[i] > 0) {
2397                 for (j = 63; j > 0; j--) {
2398                     if (s->block[i][s->intra_scantable.permutated[j]])
2399                         break;
2400                 }
2401                 s->block_last_index[i] = j;
2402             }
2403         }
2404     }
2405
2406     /* huffman encode */
2407     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2408     case AV_CODEC_ID_MPEG1VIDEO:
2409     case AV_CODEC_ID_MPEG2VIDEO:
2410         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2411             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2412         break;
2413     case AV_CODEC_ID_MPEG4:
2414         if (CONFIG_MPEG4_ENCODER)
2415             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2416         break;
2417     case AV_CODEC_ID_MSMPEG4V2:
2418     case AV_CODEC_ID_MSMPEG4V3:
2419     case AV_CODEC_ID_WMV1:
2420         if (CONFIG_MSMPEG4_ENCODER)
2421             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2422         break;
2423     case AV_CODEC_ID_WMV2:
2424         if (CONFIG_WMV2_ENCODER)
2425             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2426         break;
2427     case AV_CODEC_ID_H261:
2428         if (CONFIG_H261_ENCODER)
2429             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2430         break;
2431     case AV_CODEC_ID_H263:
2432     case AV_CODEC_ID_H263P:
2433     case AV_CODEC_ID_FLV1:
2434     case AV_CODEC_ID_RV10:
2435     case AV_CODEC_ID_RV20:
2436         if (CONFIG_H263_ENCODER)
2437             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2438         break;
2439 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
2440     case AV_CODEC_ID_MJPEG:
2441     case AV_CODEC_ID_AMV:
2442         ff_mjpeg_encode_mb(s, s->block);
2443         break;
2444 #endif
2445     case AV_CODEC_ID_SPEEDHQ:
2446         if (CONFIG_SPEEDHQ_ENCODER)
2447             ff_speedhq_encode_mb(s, s->block);
2448         break;
2449     default:
2450         av_assert1(0);
2451     }
2452 }
2453
2454 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2455 {
2456     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2457     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2458     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2459 }
2460
2461 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2462     int i;
2463
2464     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2465
2466     /* MPEG-1 */
2467     d->mb_skip_run= s->mb_skip_run;
2468     for(i=0; i<3; i++)
2469         d->last_dc[i] = s->last_dc[i];
2470
2471     /* statistics */
2472     d->mv_bits= s->mv_bits;
2473     d->i_tex_bits= s->i_tex_bits;
2474     d->p_tex_bits= s->p_tex_bits;
2475     d->i_count= s->i_count;
2476     d->f_count= s->f_count;
2477     d->b_count= s->b_count;
2478     d->skip_count= s->skip_count;
2479     d->misc_bits= s->misc_bits;
2480     d->last_bits= 0;
2481
2482     d->mb_skipped= 0;
2483     d->qscale= s->qscale;
2484     d->dquant= s->dquant;
2485
2486     d->esc3_level_length= s->esc3_level_length;
2487 }
2488
2489 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2490     int i;
2491
2492     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2493     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2494
2495     /* MPEG-1 */
2496     d->mb_skip_run= s->mb_skip_run;
2497     for(i=0; i<3; i++)
2498         d->last_dc[i] = s->last_dc[i];
2499
2500     /* statistics */
2501     d->mv_bits= s->mv_bits;
2502     d->i_tex_bits= s->i_tex_bits;
2503     d->p_tex_bits= s->p_tex_bits;
2504     d->i_count= s->i_count;
2505     d->f_count= s->f_count;
2506     d->b_count= s->b_count;
2507     d->skip_count= s->skip_count;
2508     d->misc_bits= s->misc_bits;
2509
2510     d->mb_intra= s->mb_intra;
2511     d->mb_skipped= s->mb_skipped;
2512     d->mv_type= s->mv_type;
2513     d->mv_dir= s->mv_dir;
2514     d->pb= s->pb;
2515     if(s->data_partitioning){
2516         d->pb2= s->pb2;
2517         d->tex_pb= s->tex_pb;
2518     }
2519     d->block= s->block;
2520     for(i=0; i<8; i++)
2521         d->block_last_index[i]= s->block_last_index[i];
2522     d->interlaced_dct= s->interlaced_dct;
2523     d->qscale= s->qscale;
2524
2525     d->esc3_level_length= s->esc3_level_length;
2526 }
2527
2528 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2529                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2530                            int *dmin, int *next_block, int motion_x, int motion_y)
2531 {
2532     int score;
2533     uint8_t *dest_backup[3];
2534
2535     copy_context_before_encode(s, backup, type);
2536
2537     s->block= s->blocks[*next_block];
2538     s->pb= pb[*next_block];
2539     if(s->data_partitioning){
2540         s->pb2   = pb2   [*next_block];
2541         s->tex_pb= tex_pb[*next_block];
2542     }
2543
2544     if(*next_block){
2545         memcpy(dest_backup, s->dest, sizeof(s->dest));
2546         s->dest[0] = s->sc.rd_scratchpad;
2547         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2548         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2549         av_assert0(s->linesize >= 32); //FIXME
2550     }
2551
2552     encode_mb(s, motion_x, motion_y);
2553
2554     score= put_bits_count(&s->pb);
2555     if(s->data_partitioning){
2556         score+= put_bits_count(&s->pb2);
2557         score+= put_bits_count(&s->tex_pb);
2558     }
2559
2560     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2561         ff_mpv_reconstruct_mb(s, s->block);
2562
2563         score *= s->lambda2;
2564         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2565     }
2566
2567     if(*next_block){
2568         memcpy(s->dest, dest_backup, sizeof(s->dest));
2569     }
2570
2571     if(score<*dmin){
2572         *dmin= score;
2573         *next_block^=1;
2574
2575         copy_context_after_encode(best, s, type);
2576     }
2577 }
2578
2579 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2580     const uint32_t *sq = ff_square_tab + 256;
2581     int acc=0;
2582     int x,y;
2583
2584     if(w==16 && h==16)
2585         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2586     else if(w==8 && h==8)
2587         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2588
2589     for(y=0; y<h; y++){
2590         for(x=0; x<w; x++){
2591             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2592         }
2593     }
2594
2595     av_assert2(acc>=0);
2596
2597     return acc;
2598 }
2599
2600 static int sse_mb(MpegEncContext *s){
2601     int w= 16;
2602     int h= 16;
2603
2604     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2605     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2606
2607     if(w==16 && h==16)
2608       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2609         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2610                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2611                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2612       }else{
2613         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2614                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2615                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2616       }
2617     else
2618         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2619                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2620                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2621 }
2622
2623 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2624     MpegEncContext *s= *(void**)arg;
2625
2626
2627     s->me.pre_pass=1;
2628     s->me.dia_size= s->avctx->pre_dia_size;
2629     s->first_slice_line=1;
2630     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2631         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2632             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2633         }
2634         s->first_slice_line=0;
2635     }
2636
2637     s->me.pre_pass=0;
2638
2639     return 0;
2640 }
2641
2642 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2643     MpegEncContext *s= *(void**)arg;
2644
2645     s->me.dia_size= s->avctx->dia_size;
2646     s->first_slice_line=1;
2647     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2648         s->mb_x=0; //for block init below
2649         ff_init_block_index(s);
2650         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2651             s->block_index[0]+=2;
2652             s->block_index[1]+=2;
2653             s->block_index[2]+=2;
2654             s->block_index[3]+=2;
2655
2656             /* compute motion vector & mb_type and store in context */
2657             if(s->pict_type==AV_PICTURE_TYPE_B)
2658                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2659             else
2660                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2661         }
2662         s->first_slice_line=0;
2663     }
2664     return 0;
2665 }
2666
2667 static int mb_var_thread(AVCodecContext *c, void *arg){
2668     MpegEncContext *s= *(void**)arg;
2669     int mb_x, mb_y;
2670
2671     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2672         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2673             int xx = mb_x * 16;
2674             int yy = mb_y * 16;
2675             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2676             int varc;
2677             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2678
2679             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2680                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2681
2682             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2683             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2684             s->me.mb_var_sum_temp    += varc;
2685         }
2686     }
2687     return 0;
2688 }
2689
2690 static void write_slice_end(MpegEncContext *s){
2691     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2692         if(s->partitioned_frame){
2693             ff_mpeg4_merge_partitions(s);
2694         }
2695
2696         ff_mpeg4_stuffing(&s->pb);
2697     } else if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
2698                s->out_format == FMT_MJPEG) {
2699         ff_mjpeg_encode_stuffing(s);
2700     } else if (CONFIG_SPEEDHQ_ENCODER && s->out_format == FMT_SPEEDHQ) {
2701         ff_speedhq_end_slice(s);
2702     }
2703
2704     flush_put_bits(&s->pb);
2705
2706     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2707         s->misc_bits+= get_bits_diff(s);
2708 }
2709
2710 static void write_mb_info(MpegEncContext *s)
2711 {
2712     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2713     int offset = put_bits_count(&s->pb);
2714     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2715     int gobn = s->mb_y / s->gob_index;
2716     int pred_x, pred_y;
2717     if (CONFIG_H263_ENCODER)
2718         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2719     bytestream_put_le32(&ptr, offset);
2720     bytestream_put_byte(&ptr, s->qscale);
2721     bytestream_put_byte(&ptr, gobn);
2722     bytestream_put_le16(&ptr, mba);
2723     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2724     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2725     /* 4MV not implemented */
2726     bytestream_put_byte(&ptr, 0); /* hmv2 */
2727     bytestream_put_byte(&ptr, 0); /* vmv2 */
2728 }
2729
2730 static void update_mb_info(MpegEncContext *s, int startcode)
2731 {
2732     if (!s->mb_info)
2733         return;
2734     if (put_bytes_count(&s->pb, 0) - s->prev_mb_info >= s->mb_info) {
2735         s->mb_info_size += 12;
2736         s->prev_mb_info = s->last_mb_info;
2737     }
2738     if (startcode) {
2739         s->prev_mb_info = put_bytes_count(&s->pb, 0);
2740         /* This might have incremented mb_info_size above, and we return without
2741          * actually writing any info into that slot yet. But in that case,
2742          * this will be called again at the start of the after writing the
2743          * start code, actually writing the mb info. */
2744         return;
2745     }
2746
2747     s->last_mb_info = put_bytes_count(&s->pb, 0);
2748     if (!s->mb_info_size)
2749         s->mb_info_size += 12;
2750     write_mb_info(s);
2751 }
2752
2753 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2754 {
2755     if (put_bytes_left(&s->pb, 0) < threshold
2756         && s->slice_context_count == 1
2757         && s->pb.buf == s->avctx->internal->byte_buffer) {
2758         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2759         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2760
2761         uint8_t *new_buffer = NULL;
2762         int new_buffer_size = 0;
2763
2764         if ((s->avctx->internal->byte_buffer_size + size_increase) >= INT_MAX/8) {
2765             av_log(s->avctx, AV_LOG_ERROR, "Cannot reallocate putbit buffer\n");
2766             return AVERROR(ENOMEM);
2767         }
2768
2769         emms_c();
2770
2771         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2772                               s->avctx->internal->byte_buffer_size + size_increase);
2773         if (!new_buffer)
2774             return AVERROR(ENOMEM);
2775
2776         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2777         av_free(s->avctx->internal->byte_buffer);
2778         s->avctx->internal->byte_buffer      = new_buffer;
2779         s->avctx->internal->byte_buffer_size = new_buffer_size;
2780         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2781         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2782         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2783     }
2784     if (put_bytes_left(&s->pb, 0) < threshold)
2785         return AVERROR(EINVAL);
2786     return 0;
2787 }
2788
2789 static int encode_thread(AVCodecContext *c, void *arg){
2790     MpegEncContext *s= *(void**)arg;
2791     int mb_x, mb_y, mb_y_order;
2792     int chr_h= 16>>s->chroma_y_shift;
2793     int i, j;
2794     MpegEncContext best_s = { 0 }, backup_s;
2795     uint8_t bit_buf[2][MAX_MB_BYTES];
2796     uint8_t bit_buf2[2][MAX_MB_BYTES];
2797     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2798     PutBitContext pb[2], pb2[2], tex_pb[2];
2799
2800     for(i=0; i<2; i++){
2801         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2802         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2803         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2804     }
2805
2806     s->last_bits= put_bits_count(&s->pb);
2807     s->mv_bits=0;
2808     s->misc_bits=0;
2809     s->i_tex_bits=0;
2810     s->p_tex_bits=0;
2811     s->i_count=0;
2812     s->f_count=0;
2813     s->b_count=0;
2814     s->skip_count=0;
2815
2816     for(i=0; i<3; i++){
2817         /* init last dc values */
2818         /* note: quant matrix value (8) is implied here */
2819         s->last_dc[i] = 128 << s->intra_dc_precision;
2820
2821         s->current_picture.encoding_error[i] = 0;
2822     }
2823     if(s->codec_id==AV_CODEC_ID_AMV){
2824         s->last_dc[0] = 128*8/13;
2825         s->last_dc[1] = 128*8/14;
2826         s->last_dc[2] = 128*8/14;
2827     }
2828     s->mb_skip_run = 0;
2829     memset(s->last_mv, 0, sizeof(s->last_mv));
2830
2831     s->last_mv_dir = 0;
2832
2833     switch(s->codec_id){
2834     case AV_CODEC_ID_H263:
2835     case AV_CODEC_ID_H263P:
2836     case AV_CODEC_ID_FLV1:
2837         if (CONFIG_H263_ENCODER)
2838             s->gob_index = H263_GOB_HEIGHT(s->height);
2839         break;
2840     case AV_CODEC_ID_MPEG4:
2841         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2842             ff_mpeg4_init_partitions(s);
2843         break;
2844     }
2845
2846     s->resync_mb_x=0;
2847     s->resync_mb_y=0;
2848     s->first_slice_line = 1;
2849     s->ptr_lastgob = s->pb.buf;
2850     for (mb_y_order = s->start_mb_y; mb_y_order < s->end_mb_y; mb_y_order++) {
2851         if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
2852             int first_in_slice;
2853             mb_y = ff_speedhq_mb_y_order_to_mb(mb_y_order, s->mb_height, &first_in_slice);
2854             if (first_in_slice && mb_y_order != s->start_mb_y)
2855                 ff_speedhq_end_slice(s);
2856             s->last_dc[0] = s->last_dc[1] = s->last_dc[2] = 1024 << s->intra_dc_precision;
2857         } else {
2858             mb_y = mb_y_order;
2859         }
2860         s->mb_x=0;
2861         s->mb_y= mb_y;
2862
2863         ff_set_qscale(s, s->qscale);
2864         ff_init_block_index(s);
2865
2866         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2867             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2868             int mb_type= s->mb_type[xy];
2869 //            int d;
2870             int dmin= INT_MAX;
2871             int dir;
2872             int size_increase =  s->avctx->internal->byte_buffer_size/4
2873                                + s->mb_width*MAX_MB_BYTES;
2874
2875             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2876             if (put_bytes_left(&s->pb, 0) < MAX_MB_BYTES){
2877                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2878                 return -1;
2879             }
2880             if(s->data_partitioning){
2881                 if (put_bytes_left(&s->pb2,    0) < MAX_MB_BYTES ||
2882                     put_bytes_left(&s->tex_pb, 0) < MAX_MB_BYTES) {
2883                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2884                     return -1;
2885                 }
2886             }
2887
2888             s->mb_x = mb_x;
2889             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2890             ff_update_block_index(s);
2891
2892             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2893                 ff_h261_reorder_mb_index(s);
2894                 xy= s->mb_y*s->mb_stride + s->mb_x;
2895                 mb_type= s->mb_type[xy];
2896             }
2897
2898             /* write gob / video packet header  */
2899             if(s->rtp_mode){
2900                 int current_packet_size, is_gob_start;
2901
2902                 current_packet_size = put_bytes_count(&s->pb, 1)
2903                                       - (s->ptr_lastgob - s->pb.buf);
2904
2905                 is_gob_start = s->rtp_payload_size &&
2906                                current_packet_size >= s->rtp_payload_size &&
2907                                mb_y + mb_x > 0;
2908
2909                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2910
2911                 switch(s->codec_id){
2912                 case AV_CODEC_ID_H263:
2913                 case AV_CODEC_ID_H263P:
2914                     if(!s->h263_slice_structured)
2915                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2916                     break;
2917                 case AV_CODEC_ID_MPEG2VIDEO:
2918                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2919                 case AV_CODEC_ID_MPEG1VIDEO:
2920                     if(s->mb_skip_run) is_gob_start=0;
2921                     break;
2922                 case AV_CODEC_ID_MJPEG:
2923                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2924                     break;
2925                 }
2926
2927                 if(is_gob_start){
2928                     if(s->start_mb_y != mb_y || mb_x!=0){
2929                         write_slice_end(s);
2930
2931                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2932                             ff_mpeg4_init_partitions(s);
2933                         }
2934                     }
2935
2936                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2937                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2938
2939                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2940                         int r = put_bytes_count(&s->pb, 0) + s->picture_number + 16 + s->mb_x + s->mb_y;
2941                         int d = 100 / s->error_rate;
2942                         if(r % d == 0){
2943                             current_packet_size=0;
2944                             s->pb.buf_ptr= s->ptr_lastgob;
2945                             av_assert1(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2946                         }
2947                     }
2948
2949                     update_mb_info(s, 1);
2950
2951                     switch(s->codec_id){
2952                     case AV_CODEC_ID_MPEG4:
2953                         if (CONFIG_MPEG4_ENCODER) {
2954                             ff_mpeg4_encode_video_packet_header(s);
2955                             ff_mpeg4_clean_buffers(s);
2956                         }
2957                     break;
2958                     case AV_CODEC_ID_MPEG1VIDEO:
2959                     case AV_CODEC_ID_MPEG2VIDEO:
2960                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2961                             ff_mpeg1_encode_slice_header(s);
2962                             ff_mpeg1_clean_buffers(s);
2963                         }
2964                     break;
2965                     case AV_CODEC_ID_H263:
2966                     case AV_CODEC_ID_H263P:
2967                         if (CONFIG_H263_ENCODER)
2968                             ff_h263_encode_gob_header(s, mb_y);
2969                     break;
2970                     }
2971
2972                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2973                         int bits= put_bits_count(&s->pb);
2974                         s->misc_bits+= bits - s->last_bits;
2975                         s->last_bits= bits;
2976                     }
2977
2978                     s->ptr_lastgob += current_packet_size;
2979                     s->first_slice_line=1;
2980                     s->resync_mb_x=mb_x;
2981                     s->resync_mb_y=mb_y;
2982                 }
2983             }
2984
2985             if(  (s->resync_mb_x   == s->mb_x)
2986                && s->resync_mb_y+1 == s->mb_y){
2987                 s->first_slice_line=0;
2988             }
2989
2990             s->mb_skipped=0;
2991             s->dquant=0; //only for QP_RD
2992
2993             update_mb_info(s, 0);
2994
2995             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2996                 int next_block=0;
2997                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2998
2999                 copy_context_before_encode(&backup_s, s, -1);
3000                 backup_s.pb= s->pb;
3001                 best_s.data_partitioning= s->data_partitioning;
3002                 best_s.partitioned_frame= s->partitioned_frame;
3003                 if(s->data_partitioning){
3004                     backup_s.pb2= s->pb2;
3005                     backup_s.tex_pb= s->tex_pb;
3006                 }
3007
3008                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
3009                     s->mv_dir = MV_DIR_FORWARD;
3010                     s->mv_type = MV_TYPE_16X16;
3011                     s->mb_intra= 0;
3012                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3013                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3014                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
3015                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3016                 }
3017                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
3018                     s->mv_dir = MV_DIR_FORWARD;
3019                     s->mv_type = MV_TYPE_FIELD;
3020                     s->mb_intra= 0;
3021                     for(i=0; i<2; i++){
3022                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3023                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3024                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3025                     }
3026                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3027                                  &dmin, &next_block, 0, 0);
3028                 }
3029                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3030                     s->mv_dir = MV_DIR_FORWARD;
3031                     s->mv_type = MV_TYPE_16X16;
3032                     s->mb_intra= 0;
3033                     s->mv[0][0][0] = 0;
3034                     s->mv[0][0][1] = 0;
3035                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3036                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3037                 }
3038                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3039                     s->mv_dir = MV_DIR_FORWARD;
3040                     s->mv_type = MV_TYPE_8X8;
3041                     s->mb_intra= 0;
3042                     for(i=0; i<4; i++){
3043                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3044                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3045                     }
3046                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3047                                  &dmin, &next_block, 0, 0);
3048                 }
3049                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3050                     s->mv_dir = MV_DIR_FORWARD;
3051                     s->mv_type = MV_TYPE_16X16;
3052                     s->mb_intra= 0;
3053                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3054                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3055                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3056                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3057                 }
3058                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3059                     s->mv_dir = MV_DIR_BACKWARD;
3060                     s->mv_type = MV_TYPE_16X16;
3061                     s->mb_intra= 0;
3062                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3063                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3064                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3065                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3066                 }
3067                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3068                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3069                     s->mv_type = MV_TYPE_16X16;
3070                     s->mb_intra= 0;
3071                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3072                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3073                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3074                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3075                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3076                                  &dmin, &next_block, 0, 0);
3077                 }
3078                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3079                     s->mv_dir = MV_DIR_FORWARD;
3080                     s->mv_type = MV_TYPE_FIELD;
3081                     s->mb_intra= 0;
3082                     for(i=0; i<2; i++){
3083                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3084                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3085                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3086                     }
3087                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3088                                  &dmin, &next_block, 0, 0);
3089                 }
3090                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3091                     s->mv_dir = MV_DIR_BACKWARD;
3092                     s->mv_type = MV_TYPE_FIELD;
3093                     s->mb_intra= 0;
3094                     for(i=0; i<2; i++){
3095                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3096                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3097                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3098                     }
3099                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3100                                  &dmin, &next_block, 0, 0);
3101                 }
3102                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3103                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3104                     s->mv_type = MV_TYPE_FIELD;
3105                     s->mb_intra= 0;
3106                     for(dir=0; dir<2; dir++){
3107                         for(i=0; i<2; i++){
3108                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3109                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3110                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3111                         }
3112                     }
3113                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3114                                  &dmin, &next_block, 0, 0);
3115                 }
3116                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3117                     s->mv_dir = 0;
3118                     s->mv_type = MV_TYPE_16X16;
3119                     s->mb_intra= 1;
3120                     s->mv[0][0][0] = 0;
3121                     s->mv[0][0][1] = 0;
3122                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3123                                  &dmin, &next_block, 0, 0);
3124                     if(s->h263_pred || s->h263_aic){
3125                         if(best_s.mb_intra)
3126                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3127                         else
3128                             ff_clean_intra_table_entries(s); //old mode?
3129                     }
3130                 }
3131
3132                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3133                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3134                         const int last_qp= backup_s.qscale;
3135                         int qpi, qp, dc[6];
3136                         int16_t ac[6][16];
3137                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3138                         static const int dquant_tab[4]={-1,1,-2,2};
3139                         int storecoefs = s->mb_intra && s->dc_val[0];
3140
3141                         av_assert2(backup_s.dquant == 0);
3142
3143                         //FIXME intra
3144                         s->mv_dir= best_s.mv_dir;
3145                         s->mv_type = MV_TYPE_16X16;
3146                         s->mb_intra= best_s.mb_intra;
3147                         s->mv[0][0][0] = best_s.mv[0][0][0];
3148                         s->mv[0][0][1] = best_s.mv[0][0][1];
3149                         s->mv[1][0][0] = best_s.mv[1][0][0];
3150                         s->mv[1][0][1] = best_s.mv[1][0][1];
3151
3152                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3153                         for(; qpi<4; qpi++){
3154                             int dquant= dquant_tab[qpi];
3155                             qp= last_qp + dquant;
3156                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3157                                 continue;
3158                             backup_s.dquant= dquant;
3159                             if(storecoefs){
3160                                 for(i=0; i<6; i++){
3161                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3162                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3163                                 }
3164                             }
3165
3166                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3167                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3168                             if(best_s.qscale != qp){
3169                                 if(storecoefs){
3170                                     for(i=0; i<6; i++){
3171                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3172                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3173                                     }
3174                                 }
3175                             }
3176                         }
3177                     }
3178                 }
3179                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3180                     int mx= s->b_direct_mv_table[xy][0];
3181                     int my= s->b_direct_mv_table[xy][1];
3182
3183                     backup_s.dquant = 0;
3184                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3185                     s->mb_intra= 0;
3186                     ff_mpeg4_set_direct_mv(s, mx, my);
3187                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3188                                  &dmin, &next_block, mx, my);
3189                 }
3190                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3191                     backup_s.dquant = 0;
3192                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3193                     s->mb_intra= 0;
3194                     ff_mpeg4_set_direct_mv(s, 0, 0);
3195                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3196                                  &dmin, &next_block, 0, 0);
3197                 }
3198                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3199                     int coded=0;
3200                     for(i=0; i<6; i++)
3201                         coded |= s->block_last_index[i];
3202                     if(coded){
3203                         int mx,my;
3204                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3205                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3206                             mx=my=0; //FIXME find the one we actually used
3207                             ff_mpeg4_set_direct_mv(s, mx, my);
3208                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3209                             mx= s->mv[1][0][0];
3210                             my= s->mv[1][0][1];
3211                         }else{
3212                             mx= s->mv[0][0][0];
3213                             my= s->mv[0][0][1];
3214                         }
3215
3216                         s->mv_dir= best_s.mv_dir;
3217                         s->mv_type = best_s.mv_type;
3218                         s->mb_intra= 0;
3219 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3220                         s->mv[0][0][1] = best_s.mv[0][0][1];
3221                         s->mv[1][0][0] = best_s.mv[1][0][0];
3222                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3223                         backup_s.dquant= 0;
3224                         s->skipdct=1;
3225                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3226                                         &dmin, &next_block, mx, my);
3227                         s->skipdct=0;
3228                     }
3229                 }
3230
3231                 s->current_picture.qscale_table[xy] = best_s.qscale;
3232
3233                 copy_context_after_encode(s, &best_s, -1);
3234
3235                 pb_bits_count= put_bits_count(&s->pb);
3236                 flush_put_bits(&s->pb);
3237                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3238                 s->pb= backup_s.pb;
3239
3240                 if(s->data_partitioning){
3241                     pb2_bits_count= put_bits_count(&s->pb2);
3242                     flush_put_bits(&s->pb2);
3243                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3244                     s->pb2= backup_s.pb2;
3245
3246                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3247                     flush_put_bits(&s->tex_pb);
3248                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3249                     s->tex_pb= backup_s.tex_pb;
3250                 }
3251                 s->last_bits= put_bits_count(&s->pb);
3252
3253                 if (CONFIG_H263_ENCODER &&
3254                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3255                     ff_h263_update_motion_val(s);
3256
3257                 if(next_block==0){ //FIXME 16 vs linesize16
3258                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3259                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3260                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3261                 }
3262
3263                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3264                     ff_mpv_reconstruct_mb(s, s->block);
3265             } else {
3266                 int motion_x = 0, motion_y = 0;
3267                 s->mv_type=MV_TYPE_16X16;
3268                 // only one MB-Type possible
3269
3270                 switch(mb_type){
3271                 case CANDIDATE_MB_TYPE_INTRA:
3272                     s->mv_dir = 0;
3273                     s->mb_intra= 1;
3274                     motion_x= s->mv[0][0][0] = 0;
3275                     motion_y= s->mv[0][0][1] = 0;
3276                     break;
3277                 case CANDIDATE_MB_TYPE_INTER:
3278                     s->mv_dir = MV_DIR_FORWARD;
3279                     s->mb_intra= 0;
3280                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3281                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3282                     break;
3283                 case CANDIDATE_MB_TYPE_INTER_I:
3284                     s->mv_dir = MV_DIR_FORWARD;
3285                     s->mv_type = MV_TYPE_FIELD;
3286                     s->mb_intra= 0;
3287                     for(i=0; i<2; i++){
3288                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3289                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3290                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3291                     }
3292                     break;
3293                 case CANDIDATE_MB_TYPE_INTER4V:
3294                     s->mv_dir = MV_DIR_FORWARD;
3295                     s->mv_type = MV_TYPE_8X8;
3296                     s->mb_intra= 0;
3297                     for(i=0; i<4; i++){
3298                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3299                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3300                     }
3301                     break;
3302                 case CANDIDATE_MB_TYPE_DIRECT:
3303                     if (CONFIG_MPEG4_ENCODER) {
3304                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3305                         s->mb_intra= 0;
3306                         motion_x=s->b_direct_mv_table[xy][0];
3307                         motion_y=s->b_direct_mv_table[xy][1];
3308                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3309                     }
3310                     break;
3311                 case CANDIDATE_MB_TYPE_DIRECT0:
3312                     if (CONFIG_MPEG4_ENCODER) {
3313                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3314                         s->mb_intra= 0;
3315                         ff_mpeg4_set_direct_mv(s, 0, 0);
3316                     }
3317                     break;
3318                 case CANDIDATE_MB_TYPE_BIDIR:
3319                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3320                     s->mb_intra= 0;
3321                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3322                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3323                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3324                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3325                     break;
3326                 case CANDIDATE_MB_TYPE_BACKWARD:
3327                     s->mv_dir = MV_DIR_BACKWARD;
3328                     s->mb_intra= 0;
3329                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3330                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3331                     break;
3332                 case CANDIDATE_MB_TYPE_FORWARD:
3333                     s->mv_dir = MV_DIR_FORWARD;
3334                     s->mb_intra= 0;
3335                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3336                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3337                     break;
3338                 case CANDIDATE_MB_TYPE_FORWARD_I:
3339                     s->mv_dir = MV_DIR_FORWARD;
3340                     s->mv_type = MV_TYPE_FIELD;
3341                     s->mb_intra= 0;
3342                     for(i=0; i<2; i++){
3343                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3344                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3345                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3346                     }
3347                     break;
3348                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3349                     s->mv_dir = MV_DIR_BACKWARD;
3350                     s->mv_type = MV_TYPE_FIELD;
3351                     s->mb_intra= 0;
3352                     for(i=0; i<2; i++){
3353                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3354                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3355                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3356                     }
3357                     break;
3358                 case CANDIDATE_MB_TYPE_BIDIR_I:
3359                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3360                     s->mv_type = MV_TYPE_FIELD;
3361                     s->mb_intra= 0;
3362                     for(dir=0; dir<2; dir++){
3363                         for(i=0; i<2; i++){
3364                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3365                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3366                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3367                         }
3368                     }
3369                     break;
3370                 default:
3371                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3372                 }
3373
3374                 encode_mb(s, motion_x, motion_y);
3375
3376                 // RAL: Update last macroblock type
3377                 s->last_mv_dir = s->mv_dir;
3378
3379                 if (CONFIG_H263_ENCODER &&
3380                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3381                     ff_h263_update_motion_val(s);
3382
3383                 ff_mpv_reconstruct_mb(s, s->block);
3384             }
3385
3386             /* clean the MV table in IPS frames for direct mode in B-frames */
3387             if(s->mb_intra /* && I,P,S_TYPE */){
3388                 s->p_mv_table[xy][0]=0;
3389                 s->p_mv_table[xy][1]=0;
3390             }
3391
3392             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3393                 int w= 16;
3394                 int h= 16;
3395
3396                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3397                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3398
3399                 s->current_picture.encoding_error[0] += sse(
3400                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3401                     s->dest[0], w, h, s->linesize);
3402                 s->current_picture.encoding_error[1] += sse(
3403                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3404                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3405                 s->current_picture.encoding_error[2] += sse(
3406                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3407                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3408             }
3409             if(s->loop_filter){
3410                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3411                     ff_h263_loop_filter(s);
3412             }
3413             ff_dlog(s->avctx, "MB %d %d bits\n",
3414                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3415         }
3416     }
3417
3418     //not beautiful here but we must write it before flushing so it has to be here
3419     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3420         ff_msmpeg4_encode_ext_header(s);
3421
3422     write_slice_end(s);
3423
3424     return 0;
3425 }
3426
3427 #define MERGE(field) dst->field += src->field; src->field=0
3428 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3429     MERGE(me.scene_change_score);
3430     MERGE(me.mc_mb_var_sum_temp);
3431     MERGE(me.mb_var_sum_temp);
3432 }
3433
3434 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3435     int i;
3436
3437     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3438     MERGE(dct_count[1]);
3439     MERGE(mv_bits);
3440     MERGE(i_tex_bits);
3441     MERGE(p_tex_bits);
3442     MERGE(i_count);
3443     MERGE(f_count);
3444     MERGE(b_count);
3445     MERGE(skip_count);
3446     MERGE(misc_bits);
3447     MERGE(er.error_count);
3448     MERGE(padding_bug_score);
3449     MERGE(current_picture.encoding_error[0]);
3450     MERGE(current_picture.encoding_error[1]);
3451     MERGE(current_picture.encoding_error[2]);
3452
3453     if (dst->noise_reduction){
3454         for(i=0; i<64; i++){
3455             MERGE(dct_error_sum[0][i]);
3456             MERGE(dct_error_sum[1][i]);
3457         }
3458     }
3459
3460     av_assert1(put_bits_count(&src->pb) % 8 ==0);
3461     av_assert1(put_bits_count(&dst->pb) % 8 ==0);
3462     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3463     flush_put_bits(&dst->pb);
3464 }
3465
3466 static int estimate_qp(MpegEncContext *s, int dry_run){
3467     if (s->next_lambda){
3468         s->current_picture_ptr->f->quality =
3469         s->current_picture.f->quality = s->next_lambda;
3470         if(!dry_run) s->next_lambda= 0;
3471     } else if (!s->fixed_qscale) {
3472         int quality = ff_rate_estimate_qscale(s, dry_run);
3473         s->current_picture_ptr->f->quality =
3474         s->current_picture.f->quality = quality;
3475         if (s->current_picture.f->quality < 0)
3476             return -1;
3477     }
3478
3479     if(s->adaptive_quant){
3480         switch(s->codec_id){
3481         case AV_CODEC_ID_MPEG4:
3482             if (CONFIG_MPEG4_ENCODER)
3483                 ff_clean_mpeg4_qscales(s);
3484             break;
3485         case AV_CODEC_ID_H263:
3486         case AV_CODEC_ID_H263P:
3487         case AV_CODEC_ID_FLV1:
3488             if (CONFIG_H263_ENCODER)
3489                 ff_clean_h263_qscales(s);
3490             break;
3491         default:
3492             ff_init_qscale_tab(s);
3493         }
3494
3495         s->lambda= s->lambda_table[0];
3496         //FIXME broken
3497     }else
3498         s->lambda = s->current_picture.f->quality;
3499     update_qscale(s);
3500     return 0;
3501 }
3502
3503 /* must be called before writing the header */
3504 static void set_frame_distances(MpegEncContext * s){
3505     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3506     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3507
3508     if(s->pict_type==AV_PICTURE_TYPE_B){
3509         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3510         av_assert1(s->pb_time > 0 && s->pb_time < s->pp_time);
3511     }else{
3512         s->pp_time= s->time - s->last_non_b_time;
3513         s->last_non_b_time= s->time;
3514         av_assert1(s->picture_number==0 || s->pp_time > 0);
3515     }
3516 }
3517
3518 static int encode_picture(MpegEncContext *s, int picture_number)
3519 {
3520     int i, ret;
3521     int bits;
3522     int context_count = s->slice_context_count;
3523
3524     s->picture_number = picture_number;
3525
3526     /* Reset the average MB variance */
3527     s->me.mb_var_sum_temp    =
3528     s->me.mc_mb_var_sum_temp = 0;
3529
3530     /* we need to initialize some time vars before we can encode B-frames */
3531     // RAL: Condition added for MPEG1VIDEO
3532     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3533         set_frame_distances(s);
3534     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3535         ff_set_mpeg4_time(s);
3536
3537     s->me.scene_change_score=0;
3538
3539 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3540
3541     if(s->pict_type==AV_PICTURE_TYPE_I){
3542         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3543         else                        s->no_rounding=0;
3544     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3545         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3546             s->no_rounding ^= 1;
3547     }
3548
3549     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3550         if (estimate_qp(s,1) < 0)
3551             return -1;
3552         ff_get_2pass_fcode(s);
3553     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3554         if(s->pict_type==AV_PICTURE_TYPE_B)
3555             s->lambda= s->last_lambda_for[s->pict_type];
3556         else
3557             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3558         update_qscale(s);
3559     }
3560
3561     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3562         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3563         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3564         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3565         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3566     }
3567
3568     s->mb_intra=0; //for the rate distortion & bit compare functions
3569     for(i=1; i<context_count; i++){
3570         ret = ff_update_duplicate_context(s->thread_context[i], s);
3571         if (ret < 0)
3572             return ret;
3573     }
3574
3575     if(ff_init_me(s)<0)
3576         return -1;
3577
3578     /* Estimate motion for every MB */
3579     if(s->pict_type != AV_PICTURE_TYPE_I){
3580         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3581         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3582         if (s->pict_type != AV_PICTURE_TYPE_B) {
3583             if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3584                 s->me_pre == 2) {
3585                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3586             }
3587         }
3588
3589         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3590     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3591         /* I-Frame */
3592         for(i=0; i<s->mb_stride*s->mb_height; i++)
3593             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3594
3595         if(!s->fixed_qscale){
3596             /* finding spatial complexity for I-frame rate control */
3597             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3598         }
3599     }
3600     for(i=1; i<context_count; i++){
3601         merge_context_after_me(s, s->thread_context[i]);
3602     }
3603     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3604     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3605     emms_c();
3606
3607     if (s->me.scene_change_score > s->scenechange_threshold &&
3608         s->pict_type == AV_PICTURE_TYPE_P) {
3609         s->pict_type= AV_PICTURE_TYPE_I;
3610         for(i=0; i<s->mb_stride*s->mb_height; i++)
3611             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3612         if(s->msmpeg4_version >= 3)
3613             s->no_rounding=1;
3614         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3615                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3616     }
3617
3618     if(!s->umvplus){
3619         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3620             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3621
3622             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3623                 int a,b;
3624                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3625                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3626                 s->f_code= FFMAX3(s->f_code, a, b);
3627             }
3628
3629             ff_fix_long_p_mvs(s, s->intra_penalty ? CANDIDATE_MB_TYPE_INTER : CANDIDATE_MB_TYPE_INTRA);
3630             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, !!s->intra_penalty);
3631             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3632                 int j;
3633                 for(i=0; i<2; i++){
3634                     for(j=0; j<2; j++)
3635                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3636                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, !!s->intra_penalty);
3637                 }
3638             }
3639         }
3640
3641         if(s->pict_type==AV_PICTURE_TYPE_B){
3642             int a, b;
3643
3644             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3645             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3646             s->f_code = FFMAX(a, b);
3647
3648             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3649             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3650             s->b_code = FFMAX(a, b);
3651
3652             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3653             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3654             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3655             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3656             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3657                 int dir, j;
3658                 for(dir=0; dir<2; dir++){
3659                     for(i=0; i<2; i++){
3660                         for(j=0; j<2; j++){
3661                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3662                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3663                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3664                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3665                         }
3666                     }
3667                 }
3668             }
3669         }
3670     }
3671
3672     if (estimate_qp(s, 0) < 0)
3673         return -1;
3674
3675     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3676         s->pict_type == AV_PICTURE_TYPE_I &&
3677         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3678         s->qscale= 3; //reduce clipping problems
3679
3680     if (s->out_format == FMT_MJPEG) {
3681         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3682         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3683
3684         if (s->avctx->intra_matrix) {
3685             chroma_matrix =
3686             luma_matrix = s->avctx->intra_matrix;
3687         }
3688         if (s->avctx->chroma_intra_matrix)
3689             chroma_matrix = s->avctx->chroma_intra_matrix;
3690
3691         /* for mjpeg, we do include qscale in the matrix */
3692         for(i=1;i<64;i++){
3693             int j = s->idsp.idct_permutation[i];
3694
3695             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3696             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3697         }
3698         s->y_dc_scale_table=
3699         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3700         s->chroma_intra_matrix[0] =
3701         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3702         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3703                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3704         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3705                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3706         s->qscale= 8;
3707     }
3708     if(s->codec_id == AV_CODEC_ID_AMV){
3709         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3710         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3711         for(i=1;i<64;i++){
3712             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3713
3714             s->intra_matrix[j]        = sp5x_qscale_five_quant_table[0][i];
3715             s->chroma_intra_matrix[j] = sp5x_qscale_five_quant_table[1][i];
3716         }
3717         s->y_dc_scale_table= y;
3718         s->c_dc_scale_table= c;
3719         s->intra_matrix[0] = 13;
3720         s->chroma_intra_matrix[0] = 14;
3721         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3722                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3723         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3724                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3725         s->qscale= 8;
3726     }
3727
3728     if (s->out_format == FMT_SPEEDHQ) {
3729         s->y_dc_scale_table=
3730         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[3];
3731     }
3732
3733     //FIXME var duplication
3734     s->current_picture_ptr->f->key_frame =
3735     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3736     s->current_picture_ptr->f->pict_type =
3737     s->current_picture.f->pict_type = s->pict_type;
3738
3739     if (s->current_picture.f->key_frame)
3740         s->picture_in_gop_number=0;
3741
3742     s->mb_x = s->mb_y = 0;
3743     s->last_bits= put_bits_count(&s->pb);
3744     switch(s->out_format) {
3745 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
3746     case FMT_MJPEG:
3747         /* s->huffman == HUFFMAN_TABLE_OPTIMAL can only be true for MJPEG. */
3748         if (!CONFIG_MJPEG_ENCODER || s->huffman != HUFFMAN_TABLE_OPTIMAL)
3749             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3750                                            s->pred, s->intra_matrix, s->chroma_intra_matrix);
3751         break;
3752 #endif
3753     case FMT_SPEEDHQ:
3754         if (CONFIG_SPEEDHQ_ENCODER)
3755             ff_speedhq_encode_picture_header(s);
3756         break;
3757     case FMT_H261:
3758         if (CONFIG_H261_ENCODER)
3759             ff_h261_encode_picture_header(s, picture_number);
3760         break;
3761     case FMT_H263:
3762         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3763             ff_wmv2_encode_picture_header(s, picture_number);
3764         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3765             ff_msmpeg4_encode_picture_header(s, picture_number);
3766         else if (CONFIG_MPEG4_ENCODER && s->h263_pred) {
3767             ret = ff_mpeg4_encode_picture_header(s, picture_number);
3768             if (ret < 0)
3769                 return ret;
3770         } else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3771             ret = ff_rv10_encode_picture_header(s, picture_number);
3772             if (ret < 0)
3773                 return ret;
3774         }
3775         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3776             ff_rv20_encode_picture_header(s, picture_number);
3777         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3778             ff_flv_encode_picture_header(s, picture_number);
3779         else if (CONFIG_H263_ENCODER)
3780             ff_h263_encode_picture_header(s, picture_number);
3781         break;
3782     case FMT_MPEG1:
3783         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3784             ff_mpeg1_encode_picture_header(s, picture_number);
3785         break;
3786     default:
3787         av_assert0(0);
3788     }
3789     bits= put_bits_count(&s->pb);
3790     s->header_bits= bits - s->last_bits;
3791
3792     for(i=1; i<context_count; i++){
3793         update_duplicate_context_after_me(s->thread_context[i], s);
3794     }
3795     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3796     for(i=1; i<context_count; i++){
3797         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3798             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-BUF_BITS));
3799         merge_context_after_encode(s, s->thread_context[i]);
3800     }
3801     emms_c();
3802     return 0;
3803 }
3804
3805 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3806     const int intra= s->mb_intra;
3807     int i;
3808
3809     s->dct_count[intra]++;
3810
3811     for(i=0; i<64; i++){
3812         int level= block[i];
3813
3814         if(level){
3815             if(level>0){
3816                 s->dct_error_sum[intra][i] += level;
3817                 level -= s->dct_offset[intra][i];
3818                 if(level<0) level=0;
3819             }else{
3820                 s->dct_error_sum[intra][i] -= level;
3821                 level += s->dct_offset[intra][i];
3822                 if(level>0) level=0;
3823             }
3824             block[i]= level;
3825         }
3826     }
3827 }
3828
3829 static int dct_quantize_trellis_c(MpegEncContext *s,
3830                                   int16_t *block, int n,
3831                                   int qscale, int *overflow){
3832     const int *qmat;
3833     const uint16_t *matrix;
3834     const uint8_t *scantable;
3835     const uint8_t *perm_scantable;
3836     int max=0;
3837     unsigned int threshold1, threshold2;
3838     int bias=0;
3839     int run_tab[65];
3840     int level_tab[65];
3841     int score_tab[65];
3842     int survivor[65];
3843     int survivor_count;
3844     int last_run=0;
3845     int last_level=0;
3846     int last_score= 0;
3847     int last_i;
3848     int coeff[2][64];
3849     int coeff_count[64];
3850     int qmul, qadd, start_i, last_non_zero, i, dc;
3851     const int esc_length= s->ac_esc_length;
3852     uint8_t * length;
3853     uint8_t * last_length;
3854     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3855     int mpeg2_qscale;
3856
3857     s->fdsp.fdct(block);
3858
3859     if(s->dct_error_sum)
3860         s->denoise_dct(s, block);
3861     qmul= qscale*16;
3862     qadd= ((qscale-1)|1)*8;
3863
3864     if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
3865     else                 mpeg2_qscale = qscale << 1;
3866
3867     if (s->mb_intra) {
3868         int q;
3869         scantable= s->intra_scantable.scantable;
3870         perm_scantable= s->intra_scantable.permutated;
3871         if (!s->h263_aic) {
3872             if (n < 4)
3873                 q = s->y_dc_scale;
3874             else
3875                 q = s->c_dc_scale;
3876             q = q << 3;
3877         } else{
3878             /* For AIC we skip quant/dequant of INTRADC */
3879             q = 1 << 3;
3880             qadd=0;
3881         }
3882
3883         /* note: block[0] is assumed to be positive */
3884         block[0] = (block[0] + (q >> 1)) / q;
3885         start_i = 1;
3886         last_non_zero = 0;
3887         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3888         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3889         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3890             bias= 1<<(QMAT_SHIFT-1);
3891
3892         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3893             length     = s->intra_chroma_ac_vlc_length;
3894             last_length= s->intra_chroma_ac_vlc_last_length;
3895         } else {
3896             length     = s->intra_ac_vlc_length;
3897             last_length= s->intra_ac_vlc_last_length;
3898         }
3899     } else {
3900         scantable= s->inter_scantable.scantable;
3901         perm_scantable= s->inter_scantable.permutated;
3902         start_i = 0;
3903         last_non_zero = -1;
3904         qmat = s->q_inter_matrix[qscale];
3905         matrix = s->inter_matrix;
3906         length     = s->inter_ac_vlc_length;
3907         last_length= s->inter_ac_vlc_last_length;
3908     }
3909     last_i= start_i;
3910
3911     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3912     threshold2= (threshold1<<1);
3913
3914     for(i=63; i>=start_i; i--) {
3915         const int j = scantable[i];
3916         int level = block[j] * qmat[j];
3917
3918         if(((unsigned)(level+threshold1))>threshold2){
3919             last_non_zero = i;
3920             break;
3921         }
3922     }
3923
3924     for(i=start_i; i<=last_non_zero; i++) {
3925         const int j = scantable[i];
3926         int level = block[j] * qmat[j];
3927
3928 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3929 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3930         if(((unsigned)(level+threshold1))>threshold2){
3931             if(level>0){
3932                 level= (bias + level)>>QMAT_SHIFT;
3933                 coeff[0][i]= level;
3934                 coeff[1][i]= level-1;
3935 //                coeff[2][k]= level-2;
3936             }else{
3937                 level= (bias - level)>>QMAT_SHIFT;
3938                 coeff[0][i]= -level;
3939                 coeff[1][i]= -level+1;
3940 //                coeff[2][k]= -level+2;
3941             }
3942             coeff_count[i]= FFMIN(level, 2);
3943             av_assert2(coeff_count[i]);
3944             max |=level;
3945         }else{
3946             coeff[0][i]= (level>>31)|1;
3947             coeff_count[i]= 1;
3948         }
3949     }
3950
3951     *overflow= s->max_qcoeff < max; //overflow might have happened
3952
3953     if(last_non_zero < start_i){
3954         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3955         return last_non_zero;
3956     }
3957
3958     score_tab[start_i]= 0;
3959     survivor[0]= start_i;
3960     survivor_count= 1;
3961
3962     for(i=start_i; i<=last_non_zero; i++){
3963         int level_index, j, zero_distortion;
3964         int dct_coeff= FFABS(block[ scantable[i] ]);
3965         int best_score=256*256*256*120;
3966
3967         if (s->fdsp.fdct == ff_fdct_ifast)
3968             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3969         zero_distortion= dct_coeff*dct_coeff;
3970
3971         for(level_index=0; level_index < coeff_count[i]; level_index++){
3972             int distortion;
3973             int level= coeff[level_index][i];
3974             const int alevel= FFABS(level);
3975             int unquant_coeff;
3976
3977             av_assert2(level);
3978
3979             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3980                 unquant_coeff= alevel*qmul + qadd;
3981             } else if(s->out_format == FMT_MJPEG) {
3982                 j = s->idsp.idct_permutation[scantable[i]];
3983                 unquant_coeff = alevel * matrix[j] * 8;
3984             }else{ // MPEG-1
3985                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3986                 if(s->mb_intra){
3987                         unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
3988                         unquant_coeff =   (unquant_coeff - 1) | 1;
3989                 }else{
3990                         unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
3991                         unquant_coeff =   (unquant_coeff - 1) | 1;
3992                 }
3993                 unquant_coeff<<= 3;
3994             }
3995
3996             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3997             level+=64;
3998             if((level&(~127)) == 0){
3999                 for(j=survivor_count-1; j>=0; j--){
4000                     int run= i - survivor[j];
4001                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4002                     score += score_tab[i-run];
4003
4004                     if(score < best_score){
4005                         best_score= score;
4006                         run_tab[i+1]= run;
4007                         level_tab[i+1]= level-64;
4008                     }
4009                 }
4010
4011                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4012                     for(j=survivor_count-1; j>=0; j--){
4013                         int run= i - survivor[j];
4014                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4015                         score += score_tab[i-run];
4016                         if(score < last_score){
4017                             last_score= score;
4018                             last_run= run;
4019                             last_level= level-64;
4020                             last_i= i+1;
4021                         }
4022                     }
4023                 }
4024             }else{
4025                 distortion += esc_length*lambda;
4026                 for(j=survivor_count-1; j>=0; j--){
4027                     int run= i - survivor[j];
4028                     int score= distortion + score_tab[i-run];
4029
4030                     if(score < best_score){
4031                         best_score= score;
4032                         run_tab[i+1]= run;
4033                         level_tab[i+1]= level-64;
4034                     }
4035                 }
4036
4037                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4038                   for(j=survivor_count-1; j>=0; j--){
4039                         int run= i - survivor[j];
4040                         int score= distortion + score_tab[i-run];
4041                         if(score < last_score){
4042                             last_score= score;
4043                             last_run= run;
4044                             last_level= level-64;
4045                             last_i= i+1;
4046                         }
4047                     }
4048                 }
4049             }
4050         }
4051
4052         score_tab[i+1]= best_score;
4053
4054         // Note: there is a vlc code in MPEG-4 which is 1 bit shorter then another one with a shorter run and the same level
4055         if(last_non_zero <= 27){
4056             for(; survivor_count; survivor_count--){
4057                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4058                     break;
4059             }
4060         }else{
4061             for(; survivor_count; survivor_count--){
4062                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4063                     break;
4064             }
4065         }
4066
4067         survivor[ survivor_count++ ]= i+1;
4068     }
4069
4070     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4071         last_score= 256*256*256*120;
4072         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4073             int score= score_tab[i];
4074             if (i)
4075                 score += lambda * 2; // FIXME more exact?
4076
4077             if(score < last_score){
4078                 last_score= score;
4079                 last_i= i;
4080                 last_level= level_tab[i];
4081                 last_run= run_tab[i];
4082             }
4083         }
4084     }
4085
4086     s->coded_score[n] = last_score;
4087
4088     dc= FFABS(block[0]);
4089     last_non_zero= last_i - 1;
4090     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4091
4092     if(last_non_zero < start_i)
4093         return last_non_zero;
4094
4095     if(last_non_zero == 0 && start_i == 0){
4096         int best_level= 0;
4097         int best_score= dc * dc;
4098
4099         for(i=0; i<coeff_count[0]; i++){
4100             int level= coeff[i][0];
4101             int alevel= FFABS(level);
4102             int unquant_coeff, score, distortion;
4103
4104             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4105                     unquant_coeff= (alevel*qmul + qadd)>>3;
4106             } else{ // MPEG-1
4107                     unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
4108                     unquant_coeff =   (unquant_coeff - 1) | 1;
4109             }
4110             unquant_coeff = (unquant_coeff + 4) >> 3;
4111             unquant_coeff<<= 3 + 3;
4112
4113             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4114             level+=64;
4115             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4116             else                    score= distortion + esc_length*lambda;
4117
4118             if(score < best_score){
4119                 best_score= score;
4120                 best_level= level - 64;
4121             }
4122         }
4123         block[0]= best_level;
4124         s->coded_score[n] = best_score - dc*dc;
4125         if(best_level == 0) return -1;
4126         else                return last_non_zero;
4127     }
4128
4129     i= last_i;
4130     av_assert2(last_level);
4131
4132     block[ perm_scantable[last_non_zero] ]= last_level;
4133     i -= last_run + 1;
4134
4135     for(; i>start_i; i -= run_tab[i] + 1){
4136         block[ perm_scantable[i-1] ]= level_tab[i];
4137     }
4138
4139     return last_non_zero;
4140 }
4141
4142 static int16_t basis[64][64];
4143
4144 static void build_basis(uint8_t *perm){
4145     int i, j, x, y;
4146     emms_c();
4147     for(i=0; i<8; i++){
4148         for(j=0; j<8; j++){
4149             for(y=0; y<8; y++){
4150                 for(x=0; x<8; x++){
4151                     double s= 0.25*(1<<BASIS_SHIFT);
4152                     int index= 8*i + j;
4153                     int perm_index= perm[index];
4154                     if(i==0) s*= sqrt(0.5);
4155                     if(j==0) s*= sqrt(0.5);
4156                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4157                 }
4158             }
4159         }
4160     }
4161 }
4162
4163 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4164                         int16_t *block, int16_t *weight, int16_t *orig,
4165                         int n, int qscale){
4166     int16_t rem[64];
4167     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4168     const uint8_t *scantable;
4169     const uint8_t *perm_scantable;
4170 //    unsigned int threshold1, threshold2;
4171 //    int bias=0;
4172     int run_tab[65];
4173     int prev_run=0;
4174     int prev_level=0;
4175     int qmul, qadd, start_i, last_non_zero, i, dc;
4176     uint8_t * length;
4177     uint8_t * last_length;
4178     int lambda;
4179     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4180
4181     if(basis[0][0] == 0)
4182         build_basis(s->idsp.idct_permutation);
4183
4184     qmul= qscale*2;
4185     qadd= (qscale-1)|1;
4186     if (s->mb_intra) {
4187         scantable= s->intra_scantable.scantable;
4188         perm_scantable= s->intra_scantable.permutated;
4189         if (!s->h263_aic) {
4190             if (n < 4)
4191                 q = s->y_dc_scale;
4192             else
4193                 q = s->c_dc_scale;
4194         } else{
4195             /* For AIC we skip quant/dequant of INTRADC */
4196             q = 1;
4197             qadd=0;
4198         }
4199         q <<= RECON_SHIFT-3;
4200         /* note: block[0] is assumed to be positive */
4201         dc= block[0]*q;
4202 //        block[0] = (block[0] + (q >> 1)) / q;
4203         start_i = 1;
4204 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4205 //            bias= 1<<(QMAT_SHIFT-1);
4206         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4207             length     = s->intra_chroma_ac_vlc_length;
4208             last_length= s->intra_chroma_ac_vlc_last_length;
4209         } else {
4210             length     = s->intra_ac_vlc_length;
4211             last_length= s->intra_ac_vlc_last_length;
4212         }
4213     } else {
4214         scantable= s->inter_scantable.scantable;
4215         perm_scantable= s->inter_scantable.permutated;
4216         dc= 0;
4217         start_i = 0;
4218         length     = s->inter_ac_vlc_length;
4219         last_length= s->inter_ac_vlc_last_length;
4220     }
4221     last_non_zero = s->block_last_index[n];
4222
4223     dc += (1<<(RECON_SHIFT-1));
4224     for(i=0; i<64; i++){
4225         rem[i] = dc - (orig[i] << RECON_SHIFT); // FIXME use orig directly instead of copying to rem[]
4226     }
4227
4228     sum=0;
4229     for(i=0; i<64; i++){
4230         int one= 36;
4231         int qns=4;
4232         int w;
4233
4234         w= FFABS(weight[i]) + qns*one;
4235         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4236
4237         weight[i] = w;
4238 //        w=weight[i] = (63*qns + (w/2)) / w;
4239
4240         av_assert2(w>0);
4241         av_assert2(w<(1<<6));
4242         sum += w*w;
4243     }
4244     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4245
4246     run=0;
4247     rle_index=0;
4248     for(i=start_i; i<=last_non_zero; i++){
4249         int j= perm_scantable[i];
4250         const int level= block[j];
4251         int coeff;
4252
4253         if(level){
4254             if(level<0) coeff= qmul*level - qadd;
4255             else        coeff= qmul*level + qadd;
4256             run_tab[rle_index++]=run;
4257             run=0;
4258
4259             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4260         }else{
4261             run++;
4262         }
4263     }
4264
4265     for(;;){
4266         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4267         int best_coeff=0;
4268         int best_change=0;
4269         int run2, best_unquant_change=0, analyze_gradient;
4270         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4271
4272         if(analyze_gradient){
4273             for(i=0; i<64; i++){
4274                 int w= weight[i];
4275
4276                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4277             }
4278             s->fdsp.fdct(d1);
4279         }
4280
4281         if(start_i){
4282             const int level= block[0];
4283             int change, old_coeff;
4284
4285             av_assert2(s->mb_intra);
4286
4287             old_coeff= q*level;
4288
4289             for(change=-1; change<=1; change+=2){
4290                 int new_level= level + change;
4291                 int score, new_coeff;
4292
4293                 new_coeff= q*new_level;
4294                 if(new_coeff >= 2048 || new_coeff < 0)
4295                     continue;
4296
4297                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4298                                                   new_coeff - old_coeff);
4299                 if(score<best_score){
4300                     best_score= score;
4301                     best_coeff= 0;
4302                     best_change= change;
4303                     best_unquant_change= new_coeff - old_coeff;
4304                 }
4305             }
4306         }
4307
4308         run=0;
4309         rle_index=0;
4310         run2= run_tab[rle_index++];
4311         prev_level=0;
4312         prev_run=0;
4313
4314         for(i=start_i; i<64; i++){
4315             int j= perm_scantable[i];
4316             const int level= block[j];
4317             int change, old_coeff;
4318
4319             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4320                 break;
4321
4322             if(level){
4323                 if(level<0) old_coeff= qmul*level - qadd;
4324                 else        old_coeff= qmul*level + qadd;
4325                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4326             }else{
4327                 old_coeff=0;
4328                 run2--;
4329                 av_assert2(run2>=0 || i >= last_non_zero );
4330             }
4331
4332             for(change=-1; change<=1; change+=2){
4333                 int new_level= level + change;
4334                 int score, new_coeff, unquant_change;
4335
4336                 score=0;
4337                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4338                    continue;
4339
4340                 if(new_level){
4341                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4342                     else            new_coeff= qmul*new_level + qadd;
4343                     if(new_coeff >= 2048 || new_coeff <= -2048)
4344                         continue;
4345                     //FIXME check for overflow
4346
4347                     if(level){
4348                         if(level < 63 && level > -63){
4349                             if(i < last_non_zero)
4350                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4351                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4352                             else
4353                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4354                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4355                         }
4356                     }else{
4357                         av_assert2(FFABS(new_level)==1);
4358
4359                         if(analyze_gradient){
4360                             int g= d1[ scantable[i] ];
4361                             if(g && (g^new_level) >= 0)
4362                                 continue;
4363                         }
4364
4365                         if(i < last_non_zero){
4366                             int next_i= i + run2 + 1;
4367                             int next_level= block[ perm_scantable[next_i] ] + 64;
4368
4369                             if(next_level&(~127))
4370                                 next_level= 0;
4371
4372                             if(next_i < last_non_zero)
4373                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4374                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4375                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4376                             else
4377                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4378                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4379                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4380                         }else{
4381                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4382                             if(prev_level){
4383                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4384                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4385                             }
4386                         }
4387                     }
4388                 }else{
4389                     new_coeff=0;
4390                     av_assert2(FFABS(level)==1);
4391
4392                     if(i < last_non_zero){
4393                         int next_i= i + run2 + 1;
4394                         int next_level= block[ perm_scantable[next_i] ] + 64;
4395
4396                         if(next_level&(~127))
4397                             next_level= 0;
4398
4399                         if(next_i < last_non_zero)
4400                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4401                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4402                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4403                         else
4404                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4405                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4406                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4407                     }else{
4408                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4409                         if(prev_level){
4410                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4411                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4412                         }
4413                     }
4414                 }
4415
4416                 score *= lambda;
4417
4418                 unquant_change= new_coeff - old_coeff;
4419                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4420
4421                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4422                                                    unquant_change);
4423                 if(score<best_score){
4424                     best_score= score;
4425                     best_coeff= i;
4426                     best_change= change;
4427                     best_unquant_change= unquant_change;
4428                 }
4429             }
4430             if(level){
4431                 prev_level= level + 64;
4432                 if(prev_level&(~127))
4433                     prev_level= 0;
4434                 prev_run= run;
4435                 run=0;
4436             }else{
4437                 run++;
4438             }
4439         }
4440
4441         if(best_change){
4442             int j= perm_scantable[ best_coeff ];
4443
4444             block[j] += best_change;
4445
4446             if(best_coeff > last_non_zero){
4447                 last_non_zero= best_coeff;
4448                 av_assert2(block[j]);
4449             }else{
4450                 for(; last_non_zero>=start_i; last_non_zero--){
4451                     if(block[perm_scantable[last_non_zero]])
4452                         break;
4453                 }
4454             }
4455
4456             run=0;
4457             rle_index=0;
4458             for(i=start_i; i<=last_non_zero; i++){
4459                 int j= perm_scantable[i];
4460                 const int level= block[j];
4461
4462                  if(level){
4463                      run_tab[rle_index++]=run;
4464                      run=0;
4465                  }else{
4466                      run++;
4467                  }
4468             }
4469
4470             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4471         }else{
4472             break;
4473         }
4474     }
4475
4476     return last_non_zero;
4477 }
4478
4479 /**
4480  * Permute an 8x8 block according to permutation.
4481  * @param block the block which will be permuted according to
4482  *              the given permutation vector
4483  * @param permutation the permutation vector
4484  * @param last the last non zero coefficient in scantable order, used to
4485  *             speed the permutation up
4486  * @param scantable the used scantable, this is only used to speed the
4487  *                  permutation up, the block is not (inverse) permutated
4488  *                  to scantable order!
4489  */
4490 void ff_block_permute(int16_t *block, uint8_t *permutation,
4491                       const uint8_t *scantable, int last)
4492 {
4493     int i;
4494     int16_t temp[64];
4495
4496     if (last <= 0)
4497         return;
4498     //FIXME it is ok but not clean and might fail for some permutations
4499     // if (permutation[1] == 1)
4500     // return;
4501
4502     for (i = 0; i <= last; i++) {
4503         const int j = scantable[i];
4504         temp[j] = block[j];
4505         block[j] = 0;
4506     }
4507
4508     for (i = 0; i <= last; i++) {
4509         const int j = scantable[i];
4510         const int perm_j = permutation[j];
4511         block[perm_j] = temp[j];
4512     }
4513 }
4514
4515 int ff_dct_quantize_c(MpegEncContext *s,
4516                         int16_t *block, int n,
4517                         int qscale, int *overflow)
4518 {
4519     int i, j, level, last_non_zero, q, start_i;
4520     const int *qmat;
4521     const uint8_t *scantable;
4522     int bias;
4523     int max=0;
4524     unsigned int threshold1, threshold2;
4525
4526     s->fdsp.fdct(block);
4527
4528     if(s->dct_error_sum)
4529         s->denoise_dct(s, block);
4530
4531     if (s->mb_intra) {
4532         scantable= s->intra_scantable.scantable;
4533         if (!s->h263_aic) {
4534             if (n < 4)
4535                 q = s->y_dc_scale;
4536             else
4537                 q = s->c_dc_scale;
4538             q = q << 3;
4539         } else
4540             /* For AIC we skip quant/dequant of INTRADC */
4541             q = 1 << 3;
4542
4543         /* note: block[0] is assumed to be positive */
4544         block[0] = (block[0] + (q >> 1)) / q;
4545         start_i = 1;
4546         last_non_zero = 0;
4547         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4548         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4549     } else {
4550         scantable= s->inter_scantable.scantable;
4551         start_i = 0;
4552         last_non_zero = -1;
4553         qmat = s->q_inter_matrix[qscale];
4554         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4555     }
4556     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4557     threshold2= (threshold1<<1);
4558     for(i=63;i>=start_i;i--) {
4559         j = scantable[i];
4560         level = block[j] * qmat[j];
4561
4562         if(((unsigned)(level+threshold1))>threshold2){
4563             last_non_zero = i;
4564             break;
4565         }else{
4566             block[j]=0;
4567         }
4568     }
4569     for(i=start_i; i<=last_non_zero; i++) {
4570         j = scantable[i];
4571         level = block[j] * qmat[j];
4572
4573 //        if(   bias+level >= (1<<QMAT_SHIFT)
4574 //           || bias-level >= (1<<QMAT_SHIFT)){
4575         if(((unsigned)(level+threshold1))>threshold2){
4576             if(level>0){
4577                 level= (bias + level)>>QMAT_SHIFT;
4578                 block[j]= level;
4579             }else{
4580                 level= (bias - level)>>QMAT_SHIFT;
4581                 block[j]= -level;
4582             }
4583             max |=level;
4584         }else{
4585             block[j]=0;
4586         }
4587     }
4588     *overflow= s->max_qcoeff < max; //overflow might have happened
4589
4590     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4591     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4592         ff_block_permute(block, s->idsp.idct_permutation,
4593                       scantable, last_non_zero);
4594
4595     return last_non_zero;
4596 }
4597
4598 #define OFFSET(x) offsetof(MpegEncContext, x)
4599 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4600 static const AVOption h263_options[] = {
4601     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4602     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4603     FF_MPV_COMMON_OPTS
4604 #if FF_API_MPEGVIDEO_OPTS
4605     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
4606     FF_MPV_DEPRECATED_A53_CC_OPT
4607     FF_MPV_DEPRECATED_MATRIX_OPT
4608     FF_MPV_DEPRECATED_BFRAME_OPTS
4609 #endif
4610     { NULL },
4611 };
4612
4613 static const AVClass h263_class = {
4614     .class_name = "H.263 encoder",
4615     .item_name  = av_default_item_name,
4616     .option     = h263_options,
4617     .version    = LIBAVUTIL_VERSION_INT,
4618 };
4619
4620 AVCodec ff_h263_encoder = {
4621     .name           = "h263",
4622     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4623     .type           = AVMEDIA_TYPE_VIDEO,
4624     .id             = AV_CODEC_ID_H263,
4625     .priv_data_size = sizeof(MpegEncContext),
4626     .init           = ff_mpv_encode_init,
4627     .encode2        = ff_mpv_encode_picture,
4628     .close          = ff_mpv_encode_end,
4629     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4630     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4631     .priv_class     = &h263_class,
4632 };
4633
4634 static const AVOption h263p_options[] = {
4635     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus),       AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4636     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4637     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4638     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE},
4639     FF_MPV_COMMON_OPTS
4640 #if FF_API_MPEGVIDEO_OPTS
4641     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
4642     FF_MPV_DEPRECATED_A53_CC_OPT
4643     FF_MPV_DEPRECATED_MATRIX_OPT
4644     FF_MPV_DEPRECATED_BFRAME_OPTS
4645 #endif
4646     { NULL },
4647 };
4648 static const AVClass h263p_class = {
4649     .class_name = "H.263p encoder",
4650     .item_name  = av_default_item_name,
4651     .option     = h263p_options,
4652     .version    = LIBAVUTIL_VERSION_INT,
4653 };
4654
4655 AVCodec ff_h263p_encoder = {
4656     .name           = "h263p",
4657     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4658     .type           = AVMEDIA_TYPE_VIDEO,
4659     .id             = AV_CODEC_ID_H263P,
4660     .priv_data_size = sizeof(MpegEncContext),
4661     .init           = ff_mpv_encode_init,
4662     .encode2        = ff_mpv_encode_picture,
4663     .close          = ff_mpv_encode_end,
4664     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4665     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4666     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4667     .priv_class     = &h263p_class,
4668 };
4669
4670 static const AVClass msmpeg4v2_class = {
4671     .class_name = "msmpeg4v2 encoder",
4672     .item_name  = av_default_item_name,
4673     .option     = ff_mpv_generic_options,
4674     .version    = LIBAVUTIL_VERSION_INT,
4675 };
4676
4677 AVCodec ff_msmpeg4v2_encoder = {
4678     .name           = "msmpeg4v2",
4679     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4680     .type           = AVMEDIA_TYPE_VIDEO,
4681     .id             = AV_CODEC_ID_MSMPEG4V2,
4682     .priv_data_size = sizeof(MpegEncContext),
4683     .init           = ff_mpv_encode_init,
4684     .encode2        = ff_mpv_encode_picture,
4685     .close          = ff_mpv_encode_end,
4686     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4687     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4688     .priv_class     = &msmpeg4v2_class,
4689 };
4690
4691 static const AVClass msmpeg4v3_class = {
4692     .class_name = "msmpeg4v3 encoder",
4693     .item_name  = av_default_item_name,
4694     .option     = ff_mpv_generic_options,
4695     .version    = LIBAVUTIL_VERSION_INT,
4696 };
4697
4698 AVCodec ff_msmpeg4v3_encoder = {
4699     .name           = "msmpeg4",
4700     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4701     .type           = AVMEDIA_TYPE_VIDEO,
4702     .id             = AV_CODEC_ID_MSMPEG4V3,
4703     .priv_data_size = sizeof(MpegEncContext),
4704     .init           = ff_mpv_encode_init,
4705     .encode2        = ff_mpv_encode_picture,
4706     .close          = ff_mpv_encode_end,
4707     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4708     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4709     .priv_class     = &msmpeg4v3_class,
4710 };
4711
4712 static const AVClass wmv1_class = {
4713     .class_name = "wmv1 encoder",
4714     .item_name  = av_default_item_name,
4715     .option     = ff_mpv_generic_options,
4716     .version    = LIBAVUTIL_VERSION_INT,
4717 };
4718
4719 AVCodec ff_wmv1_encoder = {
4720     .name           = "wmv1",
4721     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4722     .type           = AVMEDIA_TYPE_VIDEO,
4723     .id             = AV_CODEC_ID_WMV1,
4724     .priv_data_size = sizeof(MpegEncContext),
4725     .init           = ff_mpv_encode_init,
4726     .encode2        = ff_mpv_encode_picture,
4727     .close          = ff_mpv_encode_end,
4728     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4729     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4730     .priv_class     = &wmv1_class,
4731 };