]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
avcodec: Remove deprecated VBV delay field
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /*
26  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
27  */
28
29 /**
30  * @file
31  * The simplest mpeg encoder (well, it was the simplest!).
32  */
33
34 #include <stdint.h>
35
36 #include "libavutil/internal.h"
37 #include "libavutil/intmath.h"
38 #include "libavutil/mathematics.h"
39 #include "libavutil/mem_internal.h"
40 #include "libavutil/pixdesc.h"
41 #include "libavutil/opt.h"
42 #include "libavutil/thread.h"
43 #include "avcodec.h"
44 #include "dct.h"
45 #include "idctdsp.h"
46 #include "mpeg12.h"
47 #include "mpegvideo.h"
48 #include "mpegvideodata.h"
49 #include "h261.h"
50 #include "h263.h"
51 #include "h263data.h"
52 #include "mjpegenc_common.h"
53 #include "mathops.h"
54 #include "mpegutils.h"
55 #include "mjpegenc.h"
56 #include "speedhqenc.h"
57 #include "msmpeg4.h"
58 #include "pixblockdsp.h"
59 #include "qpeldsp.h"
60 #include "faandct.h"
61 #include "thread.h"
62 #include "aandcttab.h"
63 #include "flv.h"
64 #include "mpeg4video.h"
65 #include "internal.h"
66 #include "bytestream.h"
67 #include "wmv2.h"
68 #include "rv10.h"
69 #include "packet_internal.h"
70 #include <limits.h>
71 #include "sp5x.h"
72
73 #define QUANT_BIAS_SHIFT 8
74
75 #define QMAT_SHIFT_MMX 16
76 #define QMAT_SHIFT 21
77
78 static int encode_picture(MpegEncContext *s, int picture_number);
79 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
80 static int sse_mb(MpegEncContext *s);
81 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
82 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
83
84 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_DMV * 2 + 1];
85 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
86
87 const AVOption ff_mpv_generic_options[] = {
88     FF_MPV_COMMON_OPTS
89 #if FF_API_MPEGVIDEO_OPTS
90     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
91     FF_MPV_DEPRECATED_A53_CC_OPT
92     FF_MPV_DEPRECATED_MATRIX_OPT
93     FF_MPV_DEPRECATED_BFRAME_OPTS
94 #endif
95     { NULL },
96 };
97
98 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
99                        uint16_t (*qmat16)[2][64],
100                        const uint16_t *quant_matrix,
101                        int bias, int qmin, int qmax, int intra)
102 {
103     FDCTDSPContext *fdsp = &s->fdsp;
104     int qscale;
105     int shift = 0;
106
107     for (qscale = qmin; qscale <= qmax; qscale++) {
108         int i;
109         int qscale2;
110
111         if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
112         else                 qscale2 = qscale << 1;
113
114         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
115 #if CONFIG_FAANDCT
116             fdsp->fdct == ff_faandct            ||
117 #endif /* CONFIG_FAANDCT */
118             fdsp->fdct == ff_jpeg_fdct_islow_10) {
119             for (i = 0; i < 64; i++) {
120                 const int j = s->idsp.idct_permutation[i];
121                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
122                 /* 16 <= qscale * quant_matrix[i] <= 7905
123                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
124                  *             19952 <=              x  <= 249205026
125                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
126                  *           3444240 >= (1 << 36) / (x) >= 275 */
127
128                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
129             }
130         } else if (fdsp->fdct == ff_fdct_ifast) {
131             for (i = 0; i < 64; i++) {
132                 const int j = s->idsp.idct_permutation[i];
133                 int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
134                 /* 16 <= qscale * quant_matrix[i] <= 7905
135                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
136                  *             19952 <=              x  <= 249205026
137                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
138                  *           3444240 >= (1 << 36) / (x) >= 275 */
139
140                 qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
141             }
142         } else {
143             for (i = 0; i < 64; i++) {
144                 const int j = s->idsp.idct_permutation[i];
145                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
146                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
147                  * Assume x = qscale * quant_matrix[i]
148                  * So             16 <=              x  <= 7905
149                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
150                  * so          32768 >= (1 << 19) / (x) >= 67 */
151                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
152                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
153                 //                    (qscale * quant_matrix[i]);
154                 qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
155
156                 if (qmat16[qscale][0][i] == 0 ||
157                     qmat16[qscale][0][i] == 128 * 256)
158                     qmat16[qscale][0][i] = 128 * 256 - 1;
159                 qmat16[qscale][1][i] =
160                     ROUNDED_DIV(bias * (1<<(16 - QUANT_BIAS_SHIFT)),
161                                 qmat16[qscale][0][i]);
162             }
163         }
164
165         for (i = intra; i < 64; i++) {
166             int64_t max = 8191;
167             if (fdsp->fdct == ff_fdct_ifast) {
168                 max = (8191LL * ff_aanscales[i]) >> 14;
169             }
170             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
171                 shift++;
172             }
173         }
174     }
175     if (shift) {
176         av_log(s->avctx, AV_LOG_INFO,
177                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
178                QMAT_SHIFT - shift);
179     }
180 }
181
182 static inline void update_qscale(MpegEncContext *s)
183 {
184     if (s->q_scale_type == 1 && 0) {
185         int i;
186         int bestdiff=INT_MAX;
187         int best = 1;
188
189         for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
190             int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
191             if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
192                 (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
193                 continue;
194             if (diff < bestdiff) {
195                 bestdiff = diff;
196                 best = i;
197             }
198         }
199         s->qscale = best;
200     } else {
201         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
202                     (FF_LAMBDA_SHIFT + 7);
203         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
204     }
205
206     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
207                  FF_LAMBDA_SHIFT;
208 }
209
210 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
211 {
212     int i;
213
214     if (matrix) {
215         put_bits(pb, 1, 1);
216         for (i = 0; i < 64; i++) {
217             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
218         }
219     } else
220         put_bits(pb, 1, 0);
221 }
222
223 /**
224  * init s->current_picture.qscale_table from s->lambda_table
225  */
226 void ff_init_qscale_tab(MpegEncContext *s)
227 {
228     int8_t * const qscale_table = s->current_picture.qscale_table;
229     int i;
230
231     for (i = 0; i < s->mb_num; i++) {
232         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
233         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
234         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
235                                                   s->avctx->qmax);
236     }
237 }
238
239 static void update_duplicate_context_after_me(MpegEncContext *dst,
240                                               MpegEncContext *src)
241 {
242 #define COPY(a) dst->a= src->a
243     COPY(pict_type);
244     COPY(current_picture);
245     COPY(f_code);
246     COPY(b_code);
247     COPY(qscale);
248     COPY(lambda);
249     COPY(lambda2);
250     COPY(picture_in_gop_number);
251     COPY(gop_picture_number);
252     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
253     COPY(progressive_frame);    // FIXME don't set in encode_header
254     COPY(partitioned_frame);    // FIXME don't set in encode_header
255 #undef COPY
256 }
257
258 static void mpv_encode_init_static(void)
259 {
260    for (int i = -16; i < 16; i++)
261         default_fcode_tab[i + MAX_MV] = 1;
262 }
263
264 /**
265  * Set the given MpegEncContext to defaults for encoding.
266  * the changed fields will not depend upon the prior state of the MpegEncContext.
267  */
268 static void mpv_encode_defaults(MpegEncContext *s)
269 {
270     static AVOnce init_static_once = AV_ONCE_INIT;
271
272     ff_mpv_common_defaults(s);
273
274     ff_thread_once(&init_static_once, mpv_encode_init_static);
275
276     s->me.mv_penalty = default_mv_penalty;
277     s->fcode_tab     = default_fcode_tab;
278
279     s->input_picture_number  = 0;
280     s->picture_in_gop_number = 0;
281 }
282
283 av_cold int ff_dct_encode_init(MpegEncContext *s)
284 {
285     if (ARCH_X86)
286         ff_dct_encode_init_x86(s);
287
288     if (CONFIG_H263_ENCODER)
289         ff_h263dsp_init(&s->h263dsp);
290     if (!s->dct_quantize)
291         s->dct_quantize = ff_dct_quantize_c;
292     if (!s->denoise_dct)
293         s->denoise_dct  = denoise_dct_c;
294     s->fast_dct_quantize = s->dct_quantize;
295     if (s->avctx->trellis)
296         s->dct_quantize  = dct_quantize_trellis_c;
297
298     return 0;
299 }
300
301 /* init video encoder */
302 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
303 {
304     MpegEncContext *s = avctx->priv_data;
305     AVCPBProperties *cpb_props;
306     int i, ret;
307
308     mpv_encode_defaults(s);
309
310     switch (avctx->pix_fmt) {
311     case AV_PIX_FMT_YUVJ444P:
312     case AV_PIX_FMT_YUV444P:
313         s->chroma_format = CHROMA_444;
314         break;
315     case AV_PIX_FMT_YUVJ422P:
316     case AV_PIX_FMT_YUV422P:
317         s->chroma_format = CHROMA_422;
318         break;
319     case AV_PIX_FMT_YUVJ420P:
320     case AV_PIX_FMT_YUV420P:
321     default:
322         s->chroma_format = CHROMA_420;
323         break;
324     }
325
326     avctx->bits_per_raw_sample = av_clip(avctx->bits_per_raw_sample, 0, 8);
327
328     s->bit_rate = avctx->bit_rate;
329     s->width    = avctx->width;
330     s->height   = avctx->height;
331     if (avctx->gop_size > 600 &&
332         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
333         av_log(avctx, AV_LOG_WARNING,
334                "keyframe interval too large!, reducing it from %d to %d\n",
335                avctx->gop_size, 600);
336         avctx->gop_size = 600;
337     }
338     s->gop_size     = avctx->gop_size;
339     s->avctx        = avctx;
340     if (avctx->max_b_frames > MAX_B_FRAMES) {
341         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
342                "is %d.\n", MAX_B_FRAMES);
343         avctx->max_b_frames = MAX_B_FRAMES;
344     }
345     s->max_b_frames = avctx->max_b_frames;
346     s->codec_id     = avctx->codec->id;
347     s->strict_std_compliance = avctx->strict_std_compliance;
348     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
349     s->rtp_mode           = !!s->rtp_payload_size;
350     s->intra_dc_precision = avctx->intra_dc_precision;
351
352     // workaround some differences between how applications specify dc precision
353     if (s->intra_dc_precision < 0) {
354         s->intra_dc_precision += 8;
355     } else if (s->intra_dc_precision >= 8)
356         s->intra_dc_precision -= 8;
357
358     if (s->intra_dc_precision < 0) {
359         av_log(avctx, AV_LOG_ERROR,
360                 "intra dc precision must be positive, note some applications use"
361                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
362         return AVERROR(EINVAL);
363     }
364
365     if (avctx->codec_id == AV_CODEC_ID_AMV || (avctx->active_thread_type & FF_THREAD_SLICE))
366         s->huffman = 0;
367
368     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
369         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
370         return AVERROR(EINVAL);
371     }
372     s->user_specified_pts = AV_NOPTS_VALUE;
373
374     if (s->gop_size <= 1) {
375         s->intra_only = 1;
376         s->gop_size   = 12;
377     } else {
378         s->intra_only = 0;
379     }
380
381     /* Fixed QSCALE */
382     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
383
384     s->adaptive_quant = (avctx->lumi_masking ||
385                          avctx->dark_masking ||
386                          avctx->temporal_cplx_masking ||
387                          avctx->spatial_cplx_masking  ||
388                          avctx->p_masking      ||
389                          s->border_masking ||
390                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
391                         !s->fixed_qscale;
392
393     s->loop_filter = !!(avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
394
395     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
396         switch(avctx->codec_id) {
397         case AV_CODEC_ID_MPEG1VIDEO:
398         case AV_CODEC_ID_MPEG2VIDEO:
399             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
400             break;
401         case AV_CODEC_ID_MPEG4:
402         case AV_CODEC_ID_MSMPEG4V1:
403         case AV_CODEC_ID_MSMPEG4V2:
404         case AV_CODEC_ID_MSMPEG4V3:
405             if       (avctx->rc_max_rate >= 15000000) {
406                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
407             } else if(avctx->rc_max_rate >=  2000000) {
408                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
409             } else if(avctx->rc_max_rate >=   384000) {
410                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
411             } else
412                 avctx->rc_buffer_size = 40;
413             avctx->rc_buffer_size *= 16384;
414             break;
415         }
416         if (avctx->rc_buffer_size) {
417             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
418         }
419     }
420
421     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
422         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
423         return AVERROR(EINVAL);
424     }
425
426     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
427         av_log(avctx, AV_LOG_INFO,
428                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
429     }
430
431     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
432         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
433         return AVERROR(EINVAL);
434     }
435
436     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
437         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
438         return AVERROR(EINVAL);
439     }
440
441     if (avctx->rc_max_rate &&
442         avctx->rc_max_rate == avctx->bit_rate &&
443         avctx->rc_max_rate != avctx->rc_min_rate) {
444         av_log(avctx, AV_LOG_INFO,
445                "impossible bitrate constraints, this will fail\n");
446     }
447
448     if (avctx->rc_buffer_size &&
449         avctx->bit_rate * (int64_t)avctx->time_base.num >
450             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
451         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
452         return AVERROR(EINVAL);
453     }
454
455     if (!s->fixed_qscale &&
456         avctx->bit_rate * av_q2d(avctx->time_base) >
457             avctx->bit_rate_tolerance) {
458         av_log(avctx, AV_LOG_WARNING,
459                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
460         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
461     }
462
463     if (avctx->rc_max_rate &&
464         avctx->rc_min_rate == avctx->rc_max_rate &&
465         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
466          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
467         90000LL * (avctx->rc_buffer_size - 1) >
468             avctx->rc_max_rate * 0xFFFFLL) {
469         av_log(avctx, AV_LOG_INFO,
470                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
471                "specified vbv buffer is too large for the given bitrate!\n");
472     }
473
474     if ((avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
475         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
476         s->codec_id != AV_CODEC_ID_FLV1) {
477         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
478         return AVERROR(EINVAL);
479     }
480
481     if (s->obmc && avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
482         av_log(avctx, AV_LOG_ERROR,
483                "OBMC is only supported with simple mb decision\n");
484         return AVERROR(EINVAL);
485     }
486
487     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
488         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
489         return AVERROR(EINVAL);
490     }
491
492     if (s->max_b_frames                    &&
493         s->codec_id != AV_CODEC_ID_MPEG4      &&
494         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
495         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
496         av_log(avctx, AV_LOG_ERROR, "B-frames not supported by codec\n");
497         return AVERROR(EINVAL);
498     }
499     if (s->max_b_frames < 0) {
500         av_log(avctx, AV_LOG_ERROR,
501                "max b frames must be 0 or positive for mpegvideo based encoders\n");
502         return AVERROR(EINVAL);
503     }
504
505     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
506          s->codec_id == AV_CODEC_ID_H263  ||
507          s->codec_id == AV_CODEC_ID_H263P) &&
508         (avctx->sample_aspect_ratio.num > 255 ||
509          avctx->sample_aspect_ratio.den > 255)) {
510         av_log(avctx, AV_LOG_WARNING,
511                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
512                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
513         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
514                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
515     }
516
517     if ((s->codec_id == AV_CODEC_ID_H263  ||
518          s->codec_id == AV_CODEC_ID_H263P) &&
519         (avctx->width  > 2048 ||
520          avctx->height > 1152 )) {
521         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
522         return AVERROR(EINVAL);
523     }
524     if ((s->codec_id == AV_CODEC_ID_H263  ||
525          s->codec_id == AV_CODEC_ID_H263P ||
526          s->codec_id == AV_CODEC_ID_RV20) &&
527         ((avctx->width &3) ||
528          (avctx->height&3) )) {
529         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
530         return AVERROR(EINVAL);
531     }
532
533     if (s->codec_id == AV_CODEC_ID_RV10 &&
534         (avctx->width &15 ||
535          avctx->height&15 )) {
536         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
537         return AVERROR(EINVAL);
538     }
539
540     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
541          s->codec_id == AV_CODEC_ID_WMV2) &&
542          avctx->width & 1) {
543         av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
544         return AVERROR(EINVAL);
545     }
546
547     if ((avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
548         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
549         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
550         return AVERROR(EINVAL);
551     }
552
553     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
554         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
555         return AVERROR(EINVAL);
556     }
557
558     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
559         avctx->mb_decision != FF_MB_DECISION_RD) {
560         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
561         return AVERROR(EINVAL);
562     }
563
564     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
565             (s->codec_id == AV_CODEC_ID_AMV ||
566              s->codec_id == AV_CODEC_ID_MJPEG)) {
567         // Used to produce garbage with MJPEG.
568         av_log(avctx, AV_LOG_ERROR,
569                "QP RD is no longer compatible with MJPEG or AMV\n");
570         return AVERROR(EINVAL);
571     }
572
573     if (s->scenechange_threshold < 1000000000 &&
574         (avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
575         av_log(avctx, AV_LOG_ERROR,
576                "closed gop with scene change detection are not supported yet, "
577                "set threshold to 1000000000\n");
578         return AVERROR_PATCHWELCOME;
579     }
580
581     if (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
582         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
583             s->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
584             av_log(avctx, AV_LOG_ERROR,
585                    "low delay forcing is only available for mpeg2, "
586                    "set strict_std_compliance to 'unofficial' or lower in order to allow it\n");
587             return AVERROR(EINVAL);
588         }
589         if (s->max_b_frames != 0) {
590             av_log(avctx, AV_LOG_ERROR,
591                    "B-frames cannot be used with low delay\n");
592             return AVERROR(EINVAL);
593         }
594     }
595
596     if (s->q_scale_type == 1) {
597         if (avctx->qmax > 28) {
598             av_log(avctx, AV_LOG_ERROR,
599                    "non linear quant only supports qmax <= 28 currently\n");
600             return AVERROR_PATCHWELCOME;
601         }
602     }
603
604     if (avctx->slices > 1 &&
605         (avctx->codec_id == AV_CODEC_ID_FLV1 || avctx->codec_id == AV_CODEC_ID_H261)) {
606         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
607         return AVERROR(EINVAL);
608     }
609
610     if (avctx->thread_count > 1         &&
611         s->codec_id != AV_CODEC_ID_MPEG4      &&
612         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
613         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
614         s->codec_id != AV_CODEC_ID_MJPEG      &&
615         (s->codec_id != AV_CODEC_ID_H263P)) {
616         av_log(avctx, AV_LOG_ERROR,
617                "multi threaded encoding not supported by codec\n");
618         return AVERROR_PATCHWELCOME;
619     }
620
621     if (avctx->thread_count < 1) {
622         av_log(avctx, AV_LOG_ERROR,
623                "automatic thread number detection not supported by codec, "
624                "patch welcome\n");
625         return AVERROR_PATCHWELCOME;
626     }
627
628     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
629         av_log(avctx, AV_LOG_INFO,
630                "notice: b_frame_strategy only affects the first pass\n");
631         s->b_frame_strategy = 0;
632     }
633
634     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
635     if (i > 1) {
636         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
637         avctx->time_base.den /= i;
638         avctx->time_base.num /= i;
639         //return -1;
640     }
641
642     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id == AV_CODEC_ID_AMV || s->codec_id == AV_CODEC_ID_SPEEDHQ) {
643         // (a + x * 3 / 8) / x
644         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
645         s->inter_quant_bias = 0;
646     } else {
647         s->intra_quant_bias = 0;
648         // (a - x / 4) / x
649         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
650     }
651
652     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
653         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
654         return AVERROR(EINVAL);
655     }
656
657     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
658
659     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
660         avctx->time_base.den > (1 << 16) - 1) {
661         av_log(avctx, AV_LOG_ERROR,
662                "timebase %d/%d not supported by MPEG 4 standard, "
663                "the maximum admitted value for the timebase denominator "
664                "is %d\n", avctx->time_base.num, avctx->time_base.den,
665                (1 << 16) - 1);
666         return AVERROR(EINVAL);
667     }
668     s->time_increment_bits = av_log2(avctx->time_base.den - 1) + 1;
669
670     switch (avctx->codec->id) {
671     case AV_CODEC_ID_MPEG1VIDEO:
672         s->out_format = FMT_MPEG1;
673         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
674         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
675         break;
676     case AV_CODEC_ID_MPEG2VIDEO:
677         s->out_format = FMT_MPEG1;
678         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
679         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
680         s->rtp_mode   = 1;
681         break;
682 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
683     case AV_CODEC_ID_MJPEG:
684     case AV_CODEC_ID_AMV:
685         s->out_format = FMT_MJPEG;
686         s->intra_only = 1; /* force intra only for jpeg */
687         if ((ret = ff_mjpeg_encode_init(s)) < 0)
688             return ret;
689         avctx->delay = 0;
690         s->low_delay = 1;
691         break;
692 #endif
693     case AV_CODEC_ID_SPEEDHQ:
694         s->out_format = FMT_SPEEDHQ;
695         s->intra_only = 1; /* force intra only for SHQ */
696         if (!CONFIG_SPEEDHQ_ENCODER)
697             return AVERROR_ENCODER_NOT_FOUND;
698         if ((ret = ff_speedhq_encode_init(s)) < 0)
699             return ret;
700         avctx->delay = 0;
701         s->low_delay = 1;
702         break;
703     case AV_CODEC_ID_H261:
704         if (!CONFIG_H261_ENCODER)
705             return AVERROR_ENCODER_NOT_FOUND;
706         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
707             av_log(avctx, AV_LOG_ERROR,
708                    "The specified picture size of %dx%d is not valid for the "
709                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
710                     s->width, s->height);
711             return AVERROR(EINVAL);
712         }
713         s->out_format = FMT_H261;
714         avctx->delay  = 0;
715         s->low_delay  = 1;
716         s->rtp_mode   = 0; /* Sliced encoding not supported */
717         break;
718     case AV_CODEC_ID_H263:
719         if (!CONFIG_H263_ENCODER)
720             return AVERROR_ENCODER_NOT_FOUND;
721         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
722                              s->width, s->height) == 8) {
723             av_log(avctx, AV_LOG_ERROR,
724                    "The specified picture size of %dx%d is not valid for "
725                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
726                    "352x288, 704x576, and 1408x1152. "
727                    "Try H.263+.\n", s->width, s->height);
728             return AVERROR(EINVAL);
729         }
730         s->out_format = FMT_H263;
731         avctx->delay  = 0;
732         s->low_delay  = 1;
733         break;
734     case AV_CODEC_ID_H263P:
735         s->out_format = FMT_H263;
736         s->h263_plus  = 1;
737         /* Fx */
738         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
739         s->modified_quant  = s->h263_aic;
740         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
741         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
742
743         /* /Fx */
744         /* These are just to be sure */
745         avctx->delay = 0;
746         s->low_delay = 1;
747         break;
748     case AV_CODEC_ID_FLV1:
749         s->out_format      = FMT_H263;
750         s->h263_flv        = 2; /* format = 1; 11-bit codes */
751         s->unrestricted_mv = 1;
752         s->rtp_mode  = 0; /* don't allow GOB */
753         avctx->delay = 0;
754         s->low_delay = 1;
755         break;
756     case AV_CODEC_ID_RV10:
757         s->out_format = FMT_H263;
758         avctx->delay  = 0;
759         s->low_delay  = 1;
760         break;
761     case AV_CODEC_ID_RV20:
762         s->out_format      = FMT_H263;
763         avctx->delay       = 0;
764         s->low_delay       = 1;
765         s->modified_quant  = 1;
766         s->h263_aic        = 1;
767         s->h263_plus       = 1;
768         s->loop_filter     = 1;
769         s->unrestricted_mv = 0;
770         break;
771     case AV_CODEC_ID_MPEG4:
772         s->out_format      = FMT_H263;
773         s->h263_pred       = 1;
774         s->unrestricted_mv = 1;
775         s->low_delay       = s->max_b_frames ? 0 : 1;
776         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
777         break;
778     case AV_CODEC_ID_MSMPEG4V2:
779         s->out_format      = FMT_H263;
780         s->h263_pred       = 1;
781         s->unrestricted_mv = 1;
782         s->msmpeg4_version = 2;
783         avctx->delay       = 0;
784         s->low_delay       = 1;
785         break;
786     case AV_CODEC_ID_MSMPEG4V3:
787         s->out_format        = FMT_H263;
788         s->h263_pred         = 1;
789         s->unrestricted_mv   = 1;
790         s->msmpeg4_version   = 3;
791         s->flipflop_rounding = 1;
792         avctx->delay         = 0;
793         s->low_delay         = 1;
794         break;
795     case AV_CODEC_ID_WMV1:
796         s->out_format        = FMT_H263;
797         s->h263_pred         = 1;
798         s->unrestricted_mv   = 1;
799         s->msmpeg4_version   = 4;
800         s->flipflop_rounding = 1;
801         avctx->delay         = 0;
802         s->low_delay         = 1;
803         break;
804     case AV_CODEC_ID_WMV2:
805         s->out_format        = FMT_H263;
806         s->h263_pred         = 1;
807         s->unrestricted_mv   = 1;
808         s->msmpeg4_version   = 5;
809         s->flipflop_rounding = 1;
810         avctx->delay         = 0;
811         s->low_delay         = 1;
812         break;
813     default:
814         return AVERROR(EINVAL);
815     }
816
817     avctx->has_b_frames = !s->low_delay;
818
819     s->encoding = 1;
820
821     s->progressive_frame    =
822     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
823                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
824                                 s->alternate_scan);
825
826     /* init */
827     ff_mpv_idct_init(s);
828     if ((ret = ff_mpv_common_init(s)) < 0)
829         return ret;
830
831     ff_fdctdsp_init(&s->fdsp, avctx);
832     ff_me_cmp_init(&s->mecc, avctx);
833     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
834     ff_pixblockdsp_init(&s->pdsp, avctx);
835     ff_qpeldsp_init(&s->qdsp);
836
837     if (s->msmpeg4_version) {
838         int ac_stats_size = 2 * 2 * (MAX_LEVEL + 1) *  (MAX_RUN + 1) * 2 * sizeof(int);
839         if (!(s->ac_stats = av_mallocz(ac_stats_size)))
840             return AVERROR(ENOMEM);
841     }
842
843     if (!(avctx->stats_out = av_mallocz(256))               ||
844         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix,          32) ||
845         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix,   32) ||
846         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix,          32) ||
847         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix16,        32) ||
848         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix16, 32) ||
849         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix16,        32) ||
850         !FF_ALLOCZ_TYPED_ARRAY(s->input_picture,           MAX_PICTURE_COUNT) ||
851         !FF_ALLOCZ_TYPED_ARRAY(s->reordered_input_picture, MAX_PICTURE_COUNT))
852         return AVERROR(ENOMEM);
853
854     if (s->noise_reduction) {
855         if (!FF_ALLOCZ_TYPED_ARRAY(s->dct_offset, 2))
856             return AVERROR(ENOMEM);
857     }
858
859     ff_dct_encode_init(s);
860
861     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
862         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
863
864     if (s->slice_context_count > 1) {
865         s->rtp_mode = 1;
866
867         if (avctx->codec_id == AV_CODEC_ID_H263P)
868             s->h263_slice_structured = 1;
869     }
870
871     s->quant_precision = 5;
872
873     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      avctx->ildct_cmp);
874     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->frame_skip_cmp);
875
876     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
877         ff_h261_encode_init(s);
878     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
879         ff_h263_encode_init(s);
880     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
881         ff_msmpeg4_encode_init(s);
882     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
883         && s->out_format == FMT_MPEG1)
884         ff_mpeg1_encode_init(s);
885
886     /* init q matrix */
887     for (i = 0; i < 64; i++) {
888         int j = s->idsp.idct_permutation[i];
889         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
890             s->mpeg_quant) {
891             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
892             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
893         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
894             s->intra_matrix[j] =
895             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
896         } else if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
897             s->intra_matrix[j] =
898             s->inter_matrix[j] = ff_mpeg1_default_intra_matrix[i];
899         } else {
900             /* MPEG-1/2 */
901             s->chroma_intra_matrix[j] =
902             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
903             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
904         }
905         if (avctx->intra_matrix)
906             s->intra_matrix[j] = avctx->intra_matrix[i];
907         if (avctx->inter_matrix)
908             s->inter_matrix[j] = avctx->inter_matrix[i];
909     }
910
911     /* precompute matrix */
912     /* for mjpeg, we do include qscale in the matrix */
913     if (s->out_format != FMT_MJPEG) {
914         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
915                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
916                           31, 1);
917         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
918                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
919                           31, 0);
920     }
921
922     if ((ret = ff_rate_control_init(s)) < 0)
923         return ret;
924
925     if (s->b_frame_strategy == 2) {
926         for (i = 0; i < s->max_b_frames + 2; i++) {
927             s->tmp_frames[i] = av_frame_alloc();
928             if (!s->tmp_frames[i])
929                 return AVERROR(ENOMEM);
930
931             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
932             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
933             s->tmp_frames[i]->height = s->height >> s->brd_scale;
934
935             ret = av_frame_get_buffer(s->tmp_frames[i], 0);
936             if (ret < 0)
937                 return ret;
938         }
939     }
940
941     cpb_props = ff_add_cpb_side_data(avctx);
942     if (!cpb_props)
943         return AVERROR(ENOMEM);
944     cpb_props->max_bitrate = avctx->rc_max_rate;
945     cpb_props->min_bitrate = avctx->rc_min_rate;
946     cpb_props->avg_bitrate = avctx->bit_rate;
947     cpb_props->buffer_size = avctx->rc_buffer_size;
948
949     return 0;
950 }
951
952 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
953 {
954     MpegEncContext *s = avctx->priv_data;
955     int i;
956
957     ff_rate_control_uninit(s);
958
959     ff_mpv_common_end(s);
960     if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
961         s->out_format == FMT_MJPEG)
962         ff_mjpeg_encode_close(s);
963
964     av_freep(&avctx->extradata);
965
966     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
967         av_frame_free(&s->tmp_frames[i]);
968
969     ff_free_picture_tables(&s->new_picture);
970     ff_mpeg_unref_picture(avctx, &s->new_picture);
971
972     av_freep(&avctx->stats_out);
973     av_freep(&s->ac_stats);
974
975     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
976     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
977     s->q_chroma_intra_matrix=   NULL;
978     s->q_chroma_intra_matrix16= NULL;
979     av_freep(&s->q_intra_matrix);
980     av_freep(&s->q_inter_matrix);
981     av_freep(&s->q_intra_matrix16);
982     av_freep(&s->q_inter_matrix16);
983     av_freep(&s->input_picture);
984     av_freep(&s->reordered_input_picture);
985     av_freep(&s->dct_offset);
986
987     return 0;
988 }
989
990 static int get_sae(uint8_t *src, int ref, int stride)
991 {
992     int x,y;
993     int acc = 0;
994
995     for (y = 0; y < 16; y++) {
996         for (x = 0; x < 16; x++) {
997             acc += FFABS(src[x + y * stride] - ref);
998         }
999     }
1000
1001     return acc;
1002 }
1003
1004 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1005                            uint8_t *ref, int stride)
1006 {
1007     int x, y, w, h;
1008     int acc = 0;
1009
1010     w = s->width  & ~15;
1011     h = s->height & ~15;
1012
1013     for (y = 0; y < h; y += 16) {
1014         for (x = 0; x < w; x += 16) {
1015             int offset = x + y * stride;
1016             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1017                                       stride, 16);
1018             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1019             int sae  = get_sae(src + offset, mean, stride);
1020
1021             acc += sae + 500 < sad;
1022         }
1023     }
1024     return acc;
1025 }
1026
1027 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1028 {
1029     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1030                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1031                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1032                             &s->linesize, &s->uvlinesize);
1033 }
1034
1035 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1036 {
1037     Picture *pic = NULL;
1038     int64_t pts;
1039     int i, display_picture_number = 0, ret;
1040     int encoding_delay = s->max_b_frames ? s->max_b_frames
1041                                          : (s->low_delay ? 0 : 1);
1042     int flush_offset = 1;
1043     int direct = 1;
1044
1045     if (pic_arg) {
1046         pts = pic_arg->pts;
1047         display_picture_number = s->input_picture_number++;
1048
1049         if (pts != AV_NOPTS_VALUE) {
1050             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1051                 int64_t last = s->user_specified_pts;
1052
1053                 if (pts <= last) {
1054                     av_log(s->avctx, AV_LOG_ERROR,
1055                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1056                            pts, last);
1057                     return AVERROR(EINVAL);
1058                 }
1059
1060                 if (!s->low_delay && display_picture_number == 1)
1061                     s->dts_delta = pts - last;
1062             }
1063             s->user_specified_pts = pts;
1064         } else {
1065             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1066                 s->user_specified_pts =
1067                 pts = s->user_specified_pts + 1;
1068                 av_log(s->avctx, AV_LOG_INFO,
1069                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1070                        pts);
1071             } else {
1072                 pts = display_picture_number;
1073             }
1074         }
1075
1076         if (!pic_arg->buf[0] ||
1077             pic_arg->linesize[0] != s->linesize ||
1078             pic_arg->linesize[1] != s->uvlinesize ||
1079             pic_arg->linesize[2] != s->uvlinesize)
1080             direct = 0;
1081         if ((s->width & 15) || (s->height & 15))
1082             direct = 0;
1083         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1084             direct = 0;
1085         if (s->linesize & (STRIDE_ALIGN-1))
1086             direct = 0;
1087
1088         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1089                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1090
1091         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1092         if (i < 0)
1093             return i;
1094
1095         pic = &s->picture[i];
1096         pic->reference = 3;
1097
1098         if (direct) {
1099             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1100                 return ret;
1101         }
1102         ret = alloc_picture(s, pic, direct);
1103         if (ret < 0)
1104             return ret;
1105
1106         if (!direct) {
1107             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1108                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1109                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1110                 // empty
1111             } else {
1112                 int h_chroma_shift, v_chroma_shift;
1113                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1114                                                  &h_chroma_shift,
1115                                                  &v_chroma_shift);
1116
1117                 for (i = 0; i < 3; i++) {
1118                     int src_stride = pic_arg->linesize[i];
1119                     int dst_stride = i ? s->uvlinesize : s->linesize;
1120                     int h_shift = i ? h_chroma_shift : 0;
1121                     int v_shift = i ? v_chroma_shift : 0;
1122                     int w = s->width  >> h_shift;
1123                     int h = s->height >> v_shift;
1124                     uint8_t *src = pic_arg->data[i];
1125                     uint8_t *dst = pic->f->data[i];
1126                     int vpad = 16;
1127
1128                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1129                         && !s->progressive_sequence
1130                         && FFALIGN(s->height, 32) - s->height > 16)
1131                         vpad = 32;
1132
1133                     if (!s->avctx->rc_buffer_size)
1134                         dst += INPLACE_OFFSET;
1135
1136                     if (src_stride == dst_stride)
1137                         memcpy(dst, src, src_stride * h);
1138                     else {
1139                         int h2 = h;
1140                         uint8_t *dst2 = dst;
1141                         while (h2--) {
1142                             memcpy(dst2, src, w);
1143                             dst2 += dst_stride;
1144                             src += src_stride;
1145                         }
1146                     }
1147                     if ((s->width & 15) || (s->height & (vpad-1))) {
1148                         s->mpvencdsp.draw_edges(dst, dst_stride,
1149                                                 w, h,
1150                                                 16 >> h_shift,
1151                                                 vpad >> v_shift,
1152                                                 EDGE_BOTTOM);
1153                     }
1154                 }
1155                 emms_c();
1156             }
1157         }
1158         ret = av_frame_copy_props(pic->f, pic_arg);
1159         if (ret < 0)
1160             return ret;
1161
1162         pic->f->display_picture_number = display_picture_number;
1163         pic->f->pts = pts; // we set this here to avoid modifying pic_arg
1164     } else {
1165         /* Flushing: When we have not received enough input frames,
1166          * ensure s->input_picture[0] contains the first picture */
1167         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1168             if (s->input_picture[flush_offset])
1169                 break;
1170
1171         if (flush_offset <= 1)
1172             flush_offset = 1;
1173         else
1174             encoding_delay = encoding_delay - flush_offset + 1;
1175     }
1176
1177     /* shift buffer entries */
1178     for (i = flush_offset; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1179         s->input_picture[i - flush_offset] = s->input_picture[i];
1180
1181     s->input_picture[encoding_delay] = (Picture*) pic;
1182
1183     return 0;
1184 }
1185
1186 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1187 {
1188     int x, y, plane;
1189     int score = 0;
1190     int64_t score64 = 0;
1191
1192     for (plane = 0; plane < 3; plane++) {
1193         const int stride = p->f->linesize[plane];
1194         const int bw = plane ? 1 : 2;
1195         for (y = 0; y < s->mb_height * bw; y++) {
1196             for (x = 0; x < s->mb_width * bw; x++) {
1197                 int off = p->shared ? 0 : 16;
1198                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1199                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1200                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1201
1202                 switch (FFABS(s->frame_skip_exp)) {
1203                 case 0: score    =  FFMAX(score, v);          break;
1204                 case 1: score   += FFABS(v);                  break;
1205                 case 2: score64 += v * (int64_t)v;                       break;
1206                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1207                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1208                 }
1209             }
1210         }
1211     }
1212     emms_c();
1213
1214     if (score)
1215         score64 = score;
1216     if (s->frame_skip_exp < 0)
1217         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1218                       -1.0/s->frame_skip_exp);
1219
1220     if (score64 < s->frame_skip_threshold)
1221         return 1;
1222     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1223         return 1;
1224     return 0;
1225 }
1226
1227 static int encode_frame(AVCodecContext *c, AVFrame *frame, AVPacket *pkt)
1228 {
1229     int ret;
1230     int size = 0;
1231
1232     ret = avcodec_send_frame(c, frame);
1233     if (ret < 0)
1234         return ret;
1235
1236     do {
1237         ret = avcodec_receive_packet(c, pkt);
1238         if (ret >= 0) {
1239             size += pkt->size;
1240             av_packet_unref(pkt);
1241         } else if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
1242             return ret;
1243     } while (ret >= 0);
1244
1245     return size;
1246 }
1247
1248 static int estimate_best_b_count(MpegEncContext *s)
1249 {
1250     const AVCodec *codec = avcodec_find_encoder(s->avctx->codec_id);
1251     AVPacket *pkt;
1252     const int scale = s->brd_scale;
1253     int width  = s->width  >> scale;
1254     int height = s->height >> scale;
1255     int i, j, out_size, p_lambda, b_lambda, lambda2;
1256     int64_t best_rd  = INT64_MAX;
1257     int best_b_count = -1;
1258     int ret = 0;
1259
1260     av_assert0(scale >= 0 && scale <= 3);
1261
1262     pkt = av_packet_alloc();
1263     if (!pkt)
1264         return AVERROR(ENOMEM);
1265
1266     //emms_c();
1267     //s->next_picture_ptr->quality;
1268     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1269     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1270     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1271     if (!b_lambda) // FIXME we should do this somewhere else
1272         b_lambda = p_lambda;
1273     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1274                FF_LAMBDA_SHIFT;
1275
1276     for (i = 0; i < s->max_b_frames + 2; i++) {
1277         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1278                                                 s->next_picture_ptr;
1279         uint8_t *data[4];
1280
1281         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1282             pre_input = *pre_input_ptr;
1283             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1284
1285             if (!pre_input.shared && i) {
1286                 data[0] += INPLACE_OFFSET;
1287                 data[1] += INPLACE_OFFSET;
1288                 data[2] += INPLACE_OFFSET;
1289             }
1290
1291             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1292                                        s->tmp_frames[i]->linesize[0],
1293                                        data[0],
1294                                        pre_input.f->linesize[0],
1295                                        width, height);
1296             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1297                                        s->tmp_frames[i]->linesize[1],
1298                                        data[1],
1299                                        pre_input.f->linesize[1],
1300                                        width >> 1, height >> 1);
1301             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1302                                        s->tmp_frames[i]->linesize[2],
1303                                        data[2],
1304                                        pre_input.f->linesize[2],
1305                                        width >> 1, height >> 1);
1306         }
1307     }
1308
1309     for (j = 0; j < s->max_b_frames + 1; j++) {
1310         AVCodecContext *c;
1311         int64_t rd = 0;
1312
1313         if (!s->input_picture[j])
1314             break;
1315
1316         c = avcodec_alloc_context3(NULL);
1317         if (!c) {
1318             ret = AVERROR(ENOMEM);
1319             goto fail;
1320         }
1321
1322         c->width        = width;
1323         c->height       = height;
1324         c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1325         c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1326         c->mb_decision  = s->avctx->mb_decision;
1327         c->me_cmp       = s->avctx->me_cmp;
1328         c->mb_cmp       = s->avctx->mb_cmp;
1329         c->me_sub_cmp   = s->avctx->me_sub_cmp;
1330         c->pix_fmt      = AV_PIX_FMT_YUV420P;
1331         c->time_base    = s->avctx->time_base;
1332         c->max_b_frames = s->max_b_frames;
1333
1334         ret = avcodec_open2(c, codec, NULL);
1335         if (ret < 0)
1336             goto fail;
1337
1338
1339         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1340         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1341
1342         out_size = encode_frame(c, s->tmp_frames[0], pkt);
1343         if (out_size < 0) {
1344             ret = out_size;
1345             goto fail;
1346         }
1347
1348         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1349
1350         for (i = 0; i < s->max_b_frames + 1; i++) {
1351             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1352
1353             s->tmp_frames[i + 1]->pict_type = is_p ?
1354                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1355             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1356
1357             out_size = encode_frame(c, s->tmp_frames[i + 1], pkt);
1358             if (out_size < 0) {
1359                 ret = out_size;
1360                 goto fail;
1361             }
1362
1363             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1364         }
1365
1366         /* get the delayed frames */
1367         out_size = encode_frame(c, NULL, pkt);
1368         if (out_size < 0) {
1369             ret = out_size;
1370             goto fail;
1371         }
1372         rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1373
1374         rd += c->error[0] + c->error[1] + c->error[2];
1375
1376         if (rd < best_rd) {
1377             best_rd = rd;
1378             best_b_count = j;
1379         }
1380
1381 fail:
1382         avcodec_free_context(&c);
1383         av_packet_unref(pkt);
1384         if (ret < 0) {
1385             best_b_count = ret;
1386             break;
1387         }
1388     }
1389
1390     av_packet_free(&pkt);
1391
1392     return best_b_count;
1393 }
1394
1395 static int select_input_picture(MpegEncContext *s)
1396 {
1397     int i, ret;
1398
1399     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1400         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1401     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1402
1403     /* set next picture type & ordering */
1404     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1405         if (s->frame_skip_threshold || s->frame_skip_factor) {
1406             if (s->picture_in_gop_number < s->gop_size &&
1407                 s->next_picture_ptr &&
1408                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1409                 // FIXME check that the gop check above is +-1 correct
1410                 av_frame_unref(s->input_picture[0]->f);
1411
1412                 ff_vbv_update(s, 0);
1413
1414                 goto no_output_pic;
1415             }
1416         }
1417
1418         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1419             !s->next_picture_ptr || s->intra_only) {
1420             s->reordered_input_picture[0] = s->input_picture[0];
1421             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1422             s->reordered_input_picture[0]->f->coded_picture_number =
1423                 s->coded_picture_number++;
1424         } else {
1425             int b_frames = 0;
1426
1427             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1428                 for (i = 0; i < s->max_b_frames + 1; i++) {
1429                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1430
1431                     if (pict_num >= s->rc_context.num_entries)
1432                         break;
1433                     if (!s->input_picture[i]) {
1434                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1435                         break;
1436                     }
1437
1438                     s->input_picture[i]->f->pict_type =
1439                         s->rc_context.entry[pict_num].new_pict_type;
1440                 }
1441             }
1442
1443             if (s->b_frame_strategy == 0) {
1444                 b_frames = s->max_b_frames;
1445                 while (b_frames && !s->input_picture[b_frames])
1446                     b_frames--;
1447             } else if (s->b_frame_strategy == 1) {
1448                 for (i = 1; i < s->max_b_frames + 1; i++) {
1449                     if (s->input_picture[i] &&
1450                         s->input_picture[i]->b_frame_score == 0) {
1451                         s->input_picture[i]->b_frame_score =
1452                             get_intra_count(s,
1453                                             s->input_picture[i    ]->f->data[0],
1454                                             s->input_picture[i - 1]->f->data[0],
1455                                             s->linesize) + 1;
1456                     }
1457                 }
1458                 for (i = 0; i < s->max_b_frames + 1; i++) {
1459                     if (!s->input_picture[i] ||
1460                         s->input_picture[i]->b_frame_score - 1 >
1461                             s->mb_num / s->b_sensitivity)
1462                         break;
1463                 }
1464
1465                 b_frames = FFMAX(0, i - 1);
1466
1467                 /* reset scores */
1468                 for (i = 0; i < b_frames + 1; i++) {
1469                     s->input_picture[i]->b_frame_score = 0;
1470                 }
1471             } else if (s->b_frame_strategy == 2) {
1472                 b_frames = estimate_best_b_count(s);
1473                 if (b_frames < 0)
1474                     return b_frames;
1475             }
1476
1477             emms_c();
1478
1479             for (i = b_frames - 1; i >= 0; i--) {
1480                 int type = s->input_picture[i]->f->pict_type;
1481                 if (type && type != AV_PICTURE_TYPE_B)
1482                     b_frames = i;
1483             }
1484             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1485                 b_frames == s->max_b_frames) {
1486                 av_log(s->avctx, AV_LOG_ERROR,
1487                        "warning, too many B-frames in a row\n");
1488             }
1489
1490             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1491                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1492                     s->gop_size > s->picture_in_gop_number) {
1493                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1494                 } else {
1495                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1496                         b_frames = 0;
1497                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1498                 }
1499             }
1500
1501             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1502                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1503                 b_frames--;
1504
1505             s->reordered_input_picture[0] = s->input_picture[b_frames];
1506             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1507                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1508             s->reordered_input_picture[0]->f->coded_picture_number =
1509                 s->coded_picture_number++;
1510             for (i = 0; i < b_frames; i++) {
1511                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1512                 s->reordered_input_picture[i + 1]->f->pict_type =
1513                     AV_PICTURE_TYPE_B;
1514                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1515                     s->coded_picture_number++;
1516             }
1517         }
1518     }
1519 no_output_pic:
1520     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1521
1522     if (s->reordered_input_picture[0]) {
1523         s->reordered_input_picture[0]->reference =
1524            s->reordered_input_picture[0]->f->pict_type !=
1525                AV_PICTURE_TYPE_B ? 3 : 0;
1526
1527         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1528             return ret;
1529
1530         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1531             // input is a shared pix, so we can't modify it -> allocate a new
1532             // one & ensure that the shared one is reuseable
1533
1534             Picture *pic;
1535             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1536             if (i < 0)
1537                 return i;
1538             pic = &s->picture[i];
1539
1540             pic->reference = s->reordered_input_picture[0]->reference;
1541             if (alloc_picture(s, pic, 0) < 0) {
1542                 return -1;
1543             }
1544
1545             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1546             if (ret < 0)
1547                 return ret;
1548
1549             /* mark us unused / free shared pic */
1550             av_frame_unref(s->reordered_input_picture[0]->f);
1551             s->reordered_input_picture[0]->shared = 0;
1552
1553             s->current_picture_ptr = pic;
1554         } else {
1555             // input is not a shared pix -> reuse buffer for current_pix
1556             s->current_picture_ptr = s->reordered_input_picture[0];
1557             for (i = 0; i < 4; i++) {
1558                 if (s->new_picture.f->data[i])
1559                     s->new_picture.f->data[i] += INPLACE_OFFSET;
1560             }
1561         }
1562         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1563         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1564                                        s->current_picture_ptr)) < 0)
1565             return ret;
1566
1567         s->picture_number = s->new_picture.f->display_picture_number;
1568     }
1569     return 0;
1570 }
1571
1572 static void frame_end(MpegEncContext *s)
1573 {
1574     if (s->unrestricted_mv &&
1575         s->current_picture.reference &&
1576         !s->intra_only) {
1577         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1578         int hshift = desc->log2_chroma_w;
1579         int vshift = desc->log2_chroma_h;
1580         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1581                                 s->current_picture.f->linesize[0],
1582                                 s->h_edge_pos, s->v_edge_pos,
1583                                 EDGE_WIDTH, EDGE_WIDTH,
1584                                 EDGE_TOP | EDGE_BOTTOM);
1585         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1586                                 s->current_picture.f->linesize[1],
1587                                 s->h_edge_pos >> hshift,
1588                                 s->v_edge_pos >> vshift,
1589                                 EDGE_WIDTH >> hshift,
1590                                 EDGE_WIDTH >> vshift,
1591                                 EDGE_TOP | EDGE_BOTTOM);
1592         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1593                                 s->current_picture.f->linesize[2],
1594                                 s->h_edge_pos >> hshift,
1595                                 s->v_edge_pos >> vshift,
1596                                 EDGE_WIDTH >> hshift,
1597                                 EDGE_WIDTH >> vshift,
1598                                 EDGE_TOP | EDGE_BOTTOM);
1599     }
1600
1601     emms_c();
1602
1603     s->last_pict_type                 = s->pict_type;
1604     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1605     if (s->pict_type!= AV_PICTURE_TYPE_B)
1606         s->last_non_b_pict_type = s->pict_type;
1607
1608 #if FF_API_CODED_FRAME
1609 FF_DISABLE_DEPRECATION_WARNINGS
1610     av_frame_unref(s->avctx->coded_frame);
1611     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1612 FF_ENABLE_DEPRECATION_WARNINGS
1613 #endif
1614 #if FF_API_ERROR_FRAME
1615 FF_DISABLE_DEPRECATION_WARNINGS
1616     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1617            sizeof(s->current_picture.encoding_error));
1618 FF_ENABLE_DEPRECATION_WARNINGS
1619 #endif
1620 }
1621
1622 static void update_noise_reduction(MpegEncContext *s)
1623 {
1624     int intra, i;
1625
1626     for (intra = 0; intra < 2; intra++) {
1627         if (s->dct_count[intra] > (1 << 16)) {
1628             for (i = 0; i < 64; i++) {
1629                 s->dct_error_sum[intra][i] >>= 1;
1630             }
1631             s->dct_count[intra] >>= 1;
1632         }
1633
1634         for (i = 0; i < 64; i++) {
1635             s->dct_offset[intra][i] = (s->noise_reduction *
1636                                        s->dct_count[intra] +
1637                                        s->dct_error_sum[intra][i] / 2) /
1638                                       (s->dct_error_sum[intra][i] + 1);
1639         }
1640     }
1641 }
1642
1643 static int frame_start(MpegEncContext *s)
1644 {
1645     int ret;
1646
1647     /* mark & release old frames */
1648     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1649         s->last_picture_ptr != s->next_picture_ptr &&
1650         s->last_picture_ptr->f->buf[0]) {
1651         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1652     }
1653
1654     s->current_picture_ptr->f->pict_type = s->pict_type;
1655     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1656
1657     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1658     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1659                                    s->current_picture_ptr)) < 0)
1660         return ret;
1661
1662     if (s->pict_type != AV_PICTURE_TYPE_B) {
1663         s->last_picture_ptr = s->next_picture_ptr;
1664         if (!s->droppable)
1665             s->next_picture_ptr = s->current_picture_ptr;
1666     }
1667
1668     if (s->last_picture_ptr) {
1669         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1670         if (s->last_picture_ptr->f->buf[0] &&
1671             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1672                                        s->last_picture_ptr)) < 0)
1673             return ret;
1674     }
1675     if (s->next_picture_ptr) {
1676         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1677         if (s->next_picture_ptr->f->buf[0] &&
1678             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1679                                        s->next_picture_ptr)) < 0)
1680             return ret;
1681     }
1682
1683     if (s->picture_structure!= PICT_FRAME) {
1684         int i;
1685         for (i = 0; i < 4; i++) {
1686             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1687                 s->current_picture.f->data[i] +=
1688                     s->current_picture.f->linesize[i];
1689             }
1690             s->current_picture.f->linesize[i] *= 2;
1691             s->last_picture.f->linesize[i]    *= 2;
1692             s->next_picture.f->linesize[i]    *= 2;
1693         }
1694     }
1695
1696     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1697         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1698         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1699     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1700         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1701         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1702     } else {
1703         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1704         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1705     }
1706
1707     if (s->dct_error_sum) {
1708         av_assert2(s->noise_reduction && s->encoding);
1709         update_noise_reduction(s);
1710     }
1711
1712     return 0;
1713 }
1714
1715 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1716                           const AVFrame *pic_arg, int *got_packet)
1717 {
1718     MpegEncContext *s = avctx->priv_data;
1719     int i, stuffing_count, ret;
1720     int context_count = s->slice_context_count;
1721
1722     s->vbv_ignore_qmax = 0;
1723
1724     s->picture_in_gop_number++;
1725
1726     if (load_input_picture(s, pic_arg) < 0)
1727         return -1;
1728
1729     if (select_input_picture(s) < 0) {
1730         return -1;
1731     }
1732
1733     /* output? */
1734     if (s->new_picture.f->data[0]) {
1735         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1736         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1737                                               :
1738                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1739         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1740             return ret;
1741         if (s->mb_info) {
1742             s->mb_info_ptr = av_packet_new_side_data(pkt,
1743                                  AV_PKT_DATA_H263_MB_INFO,
1744                                  s->mb_width*s->mb_height*12);
1745             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1746         }
1747
1748         for (i = 0; i < context_count; i++) {
1749             int start_y = s->thread_context[i]->start_mb_y;
1750             int   end_y = s->thread_context[i]->  end_mb_y;
1751             int h       = s->mb_height;
1752             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1753             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1754
1755             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1756         }
1757
1758         s->pict_type = s->new_picture.f->pict_type;
1759         //emms_c();
1760         ret = frame_start(s);
1761         if (ret < 0)
1762             return ret;
1763 vbv_retry:
1764         ret = encode_picture(s, s->picture_number);
1765         if (growing_buffer) {
1766             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1767             pkt->data = s->pb.buf;
1768             pkt->size = avctx->internal->byte_buffer_size;
1769         }
1770         if (ret < 0)
1771             return -1;
1772
1773         frame_end(s);
1774
1775        if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) && s->out_format == FMT_MJPEG)
1776             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1777
1778         if (avctx->rc_buffer_size) {
1779             RateControlContext *rcc = &s->rc_context;
1780             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1781             int hq = (avctx->mb_decision == FF_MB_DECISION_RD || avctx->trellis);
1782             int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
1783
1784             if (put_bits_count(&s->pb) > max_size &&
1785                 s->lambda < s->lmax) {
1786                 s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
1787                                        (s->qscale + 1) / s->qscale);
1788                 if (s->adaptive_quant) {
1789                     int i;
1790                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1791                         s->lambda_table[i] =
1792                             FFMAX(s->lambda_table[i] + min_step,
1793                                   s->lambda_table[i] * (s->qscale + 1) /
1794                                   s->qscale);
1795                 }
1796                 s->mb_skipped = 0;        // done in frame_start()
1797                 // done in encode_picture() so we must undo it
1798                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1799                     if (s->flipflop_rounding          ||
1800                         s->codec_id == AV_CODEC_ID_H263P ||
1801                         s->codec_id == AV_CODEC_ID_MPEG4)
1802                         s->no_rounding ^= 1;
1803                 }
1804                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1805                     s->time_base       = s->last_time_base;
1806                     s->last_non_b_time = s->time - s->pp_time;
1807                 }
1808                 for (i = 0; i < context_count; i++) {
1809                     PutBitContext *pb = &s->thread_context[i]->pb;
1810                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1811                 }
1812                 s->vbv_ignore_qmax = 1;
1813                 av_log(avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1814                 goto vbv_retry;
1815             }
1816
1817             av_assert0(avctx->rc_max_rate);
1818         }
1819
1820         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1821             ff_write_pass1_stats(s);
1822
1823         for (i = 0; i < 4; i++) {
1824             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1825             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1826         }
1827         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1828                                        s->current_picture_ptr->encoding_error,
1829                                        (avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1830                                        s->pict_type);
1831
1832         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1833             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1834                                              s->misc_bits + s->i_tex_bits +
1835                                              s->p_tex_bits);
1836         flush_put_bits(&s->pb);
1837         s->frame_bits  = put_bits_count(&s->pb);
1838
1839         stuffing_count = ff_vbv_update(s, s->frame_bits);
1840         s->stuffing_bits = 8*stuffing_count;
1841         if (stuffing_count) {
1842             if (put_bytes_left(&s->pb, 0) < stuffing_count + 50) {
1843                 av_log(avctx, AV_LOG_ERROR, "stuffing too large\n");
1844                 return -1;
1845             }
1846
1847             switch (s->codec_id) {
1848             case AV_CODEC_ID_MPEG1VIDEO:
1849             case AV_CODEC_ID_MPEG2VIDEO:
1850                 while (stuffing_count--) {
1851                     put_bits(&s->pb, 8, 0);
1852                 }
1853             break;
1854             case AV_CODEC_ID_MPEG4:
1855                 put_bits(&s->pb, 16, 0);
1856                 put_bits(&s->pb, 16, 0x1C3);
1857                 stuffing_count -= 4;
1858                 while (stuffing_count--) {
1859                     put_bits(&s->pb, 8, 0xFF);
1860                 }
1861             break;
1862             default:
1863                 av_log(avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1864             }
1865             flush_put_bits(&s->pb);
1866             s->frame_bits  = put_bits_count(&s->pb);
1867         }
1868
1869         /* update MPEG-1/2 vbv_delay for CBR */
1870         if (avctx->rc_max_rate                          &&
1871             avctx->rc_min_rate == avctx->rc_max_rate &&
1872             s->out_format == FMT_MPEG1                     &&
1873             90000LL * (avctx->rc_buffer_size - 1) <=
1874                 avctx->rc_max_rate * 0xFFFFLL) {
1875             AVCPBProperties *props;
1876             size_t props_size;
1877
1878             int vbv_delay, min_delay;
1879             double inbits  = avctx->rc_max_rate *
1880                              av_q2d(avctx->time_base);
1881             int    minbits = s->frame_bits - 8 *
1882                              (s->vbv_delay_ptr - s->pb.buf - 1);
1883             double bits    = s->rc_context.buffer_index + minbits - inbits;
1884
1885             if (bits < 0)
1886                 av_log(avctx, AV_LOG_ERROR,
1887                        "Internal error, negative bits\n");
1888
1889             av_assert1(s->repeat_first_field == 0);
1890
1891             vbv_delay = bits * 90000 / avctx->rc_max_rate;
1892             min_delay = (minbits * 90000LL + avctx->rc_max_rate - 1) /
1893                         avctx->rc_max_rate;
1894
1895             vbv_delay = FFMAX(vbv_delay, min_delay);
1896
1897             av_assert0(vbv_delay < 0xFFFF);
1898
1899             s->vbv_delay_ptr[0] &= 0xF8;
1900             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1901             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1902             s->vbv_delay_ptr[2] &= 0x07;
1903             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1904
1905             props = av_cpb_properties_alloc(&props_size);
1906             if (!props)
1907                 return AVERROR(ENOMEM);
1908             props->vbv_delay = vbv_delay * 300;
1909
1910             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1911                                           (uint8_t*)props, props_size);
1912             if (ret < 0) {
1913                 av_freep(&props);
1914                 return ret;
1915             }
1916         }
1917         s->total_bits     += s->frame_bits;
1918
1919         pkt->pts = s->current_picture.f->pts;
1920         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1921             if (!s->current_picture.f->coded_picture_number)
1922                 pkt->dts = pkt->pts - s->dts_delta;
1923             else
1924                 pkt->dts = s->reordered_pts;
1925             s->reordered_pts = pkt->pts;
1926         } else
1927             pkt->dts = pkt->pts;
1928         if (s->current_picture.f->key_frame)
1929             pkt->flags |= AV_PKT_FLAG_KEY;
1930         if (s->mb_info)
1931             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1932     } else {
1933         s->frame_bits = 0;
1934     }
1935
1936     /* release non-reference frames */
1937     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1938         if (!s->picture[i].reference)
1939             ff_mpeg_unref_picture(avctx, &s->picture[i]);
1940     }
1941
1942     av_assert1((s->frame_bits & 7) == 0);
1943
1944     pkt->size = s->frame_bits / 8;
1945     *got_packet = !!pkt->size;
1946     return 0;
1947 }
1948
1949 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1950                                                 int n, int threshold)
1951 {
1952     static const char tab[64] = {
1953         3, 2, 2, 1, 1, 1, 1, 1,
1954         1, 1, 1, 1, 1, 1, 1, 1,
1955         1, 1, 1, 1, 1, 1, 1, 1,
1956         0, 0, 0, 0, 0, 0, 0, 0,
1957         0, 0, 0, 0, 0, 0, 0, 0,
1958         0, 0, 0, 0, 0, 0, 0, 0,
1959         0, 0, 0, 0, 0, 0, 0, 0,
1960         0, 0, 0, 0, 0, 0, 0, 0
1961     };
1962     int score = 0;
1963     int run = 0;
1964     int i;
1965     int16_t *block = s->block[n];
1966     const int last_index = s->block_last_index[n];
1967     int skip_dc;
1968
1969     if (threshold < 0) {
1970         skip_dc = 0;
1971         threshold = -threshold;
1972     } else
1973         skip_dc = 1;
1974
1975     /* Are all we could set to zero already zero? */
1976     if (last_index <= skip_dc - 1)
1977         return;
1978
1979     for (i = 0; i <= last_index; i++) {
1980         const int j = s->intra_scantable.permutated[i];
1981         const int level = FFABS(block[j]);
1982         if (level == 1) {
1983             if (skip_dc && i == 0)
1984                 continue;
1985             score += tab[run];
1986             run = 0;
1987         } else if (level > 1) {
1988             return;
1989         } else {
1990             run++;
1991         }
1992     }
1993     if (score >= threshold)
1994         return;
1995     for (i = skip_dc; i <= last_index; i++) {
1996         const int j = s->intra_scantable.permutated[i];
1997         block[j] = 0;
1998     }
1999     if (block[0])
2000         s->block_last_index[n] = 0;
2001     else
2002         s->block_last_index[n] = -1;
2003 }
2004
2005 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2006                                int last_index)
2007 {
2008     int i;
2009     const int maxlevel = s->max_qcoeff;
2010     const int minlevel = s->min_qcoeff;
2011     int overflow = 0;
2012
2013     if (s->mb_intra) {
2014         i = 1; // skip clipping of intra dc
2015     } else
2016         i = 0;
2017
2018     for (; i <= last_index; i++) {
2019         const int j = s->intra_scantable.permutated[i];
2020         int level = block[j];
2021
2022         if (level > maxlevel) {
2023             level = maxlevel;
2024             overflow++;
2025         } else if (level < minlevel) {
2026             level = minlevel;
2027             overflow++;
2028         }
2029
2030         block[j] = level;
2031     }
2032
2033     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2034         av_log(s->avctx, AV_LOG_INFO,
2035                "warning, clipping %d dct coefficients to %d..%d\n",
2036                overflow, minlevel, maxlevel);
2037 }
2038
2039 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2040 {
2041     int x, y;
2042     // FIXME optimize
2043     for (y = 0; y < 8; y++) {
2044         for (x = 0; x < 8; x++) {
2045             int x2, y2;
2046             int sum = 0;
2047             int sqr = 0;
2048             int count = 0;
2049
2050             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2051                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2052                     int v = ptr[x2 + y2 * stride];
2053                     sum += v;
2054                     sqr += v * v;
2055                     count++;
2056                 }
2057             }
2058             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2059         }
2060     }
2061 }
2062
2063 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2064                                                 int motion_x, int motion_y,
2065                                                 int mb_block_height,
2066                                                 int mb_block_width,
2067                                                 int mb_block_count)
2068 {
2069     int16_t weight[12][64];
2070     int16_t orig[12][64];
2071     const int mb_x = s->mb_x;
2072     const int mb_y = s->mb_y;
2073     int i;
2074     int skip_dct[12];
2075     int dct_offset = s->linesize * 8; // default for progressive frames
2076     int uv_dct_offset = s->uvlinesize * 8;
2077     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2078     ptrdiff_t wrap_y, wrap_c;
2079
2080     for (i = 0; i < mb_block_count; i++)
2081         skip_dct[i] = s->skipdct;
2082
2083     if (s->adaptive_quant) {
2084         const int last_qp = s->qscale;
2085         const int mb_xy = mb_x + mb_y * s->mb_stride;
2086
2087         s->lambda = s->lambda_table[mb_xy];
2088         update_qscale(s);
2089
2090         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2091             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2092             s->dquant = s->qscale - last_qp;
2093
2094             if (s->out_format == FMT_H263) {
2095                 s->dquant = av_clip(s->dquant, -2, 2);
2096
2097                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2098                     if (!s->mb_intra) {
2099                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2100                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2101                                 s->dquant = 0;
2102                         }
2103                         if (s->mv_type == MV_TYPE_8X8)
2104                             s->dquant = 0;
2105                     }
2106                 }
2107             }
2108         }
2109         ff_set_qscale(s, last_qp + s->dquant);
2110     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2111         ff_set_qscale(s, s->qscale + s->dquant);
2112
2113     wrap_y = s->linesize;
2114     wrap_c = s->uvlinesize;
2115     ptr_y  = s->new_picture.f->data[0] +
2116              (mb_y * 16 * wrap_y)              + mb_x * 16;
2117     ptr_cb = s->new_picture.f->data[1] +
2118              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2119     ptr_cr = s->new_picture.f->data[2] +
2120              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2121
2122     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2123         uint8_t *ebuf = s->sc.edge_emu_buffer + 38 * wrap_y;
2124         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2125         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2126         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2127                                  wrap_y, wrap_y,
2128                                  16, 16, mb_x * 16, mb_y * 16,
2129                                  s->width, s->height);
2130         ptr_y = ebuf;
2131         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2132                                  wrap_c, wrap_c,
2133                                  mb_block_width, mb_block_height,
2134                                  mb_x * mb_block_width, mb_y * mb_block_height,
2135                                  cw, ch);
2136         ptr_cb = ebuf + 16 * wrap_y;
2137         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2138                                  wrap_c, wrap_c,
2139                                  mb_block_width, mb_block_height,
2140                                  mb_x * mb_block_width, mb_y * mb_block_height,
2141                                  cw, ch);
2142         ptr_cr = ebuf + 16 * wrap_y + 16;
2143     }
2144
2145     if (s->mb_intra) {
2146         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2147             int progressive_score, interlaced_score;
2148
2149             s->interlaced_dct = 0;
2150             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2151                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2152                                                      NULL, wrap_y, 8) - 400;
2153
2154             if (progressive_score > 0) {
2155                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2156                                                         NULL, wrap_y * 2, 8) +
2157                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2158                                                         NULL, wrap_y * 2, 8);
2159                 if (progressive_score > interlaced_score) {
2160                     s->interlaced_dct = 1;
2161
2162                     dct_offset = wrap_y;
2163                     uv_dct_offset = wrap_c;
2164                     wrap_y <<= 1;
2165                     if (s->chroma_format == CHROMA_422 ||
2166                         s->chroma_format == CHROMA_444)
2167                         wrap_c <<= 1;
2168                 }
2169             }
2170         }
2171
2172         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2173         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2174         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2175         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2176
2177         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2178             skip_dct[4] = 1;
2179             skip_dct[5] = 1;
2180         } else {
2181             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2182             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2183             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2184                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2185                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2186             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2187                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2188                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2189                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2190                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2191                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2192                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2193             }
2194         }
2195     } else {
2196         op_pixels_func (*op_pix)[4];
2197         qpel_mc_func (*op_qpix)[16];
2198         uint8_t *dest_y, *dest_cb, *dest_cr;
2199
2200         dest_y  = s->dest[0];
2201         dest_cb = s->dest[1];
2202         dest_cr = s->dest[2];
2203
2204         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2205             op_pix  = s->hdsp.put_pixels_tab;
2206             op_qpix = s->qdsp.put_qpel_pixels_tab;
2207         } else {
2208             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2209             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2210         }
2211
2212         if (s->mv_dir & MV_DIR_FORWARD) {
2213             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2214                           s->last_picture.f->data,
2215                           op_pix, op_qpix);
2216             op_pix  = s->hdsp.avg_pixels_tab;
2217             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2218         }
2219         if (s->mv_dir & MV_DIR_BACKWARD) {
2220             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2221                           s->next_picture.f->data,
2222                           op_pix, op_qpix);
2223         }
2224
2225         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2226             int progressive_score, interlaced_score;
2227
2228             s->interlaced_dct = 0;
2229             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2230                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2231                                                      ptr_y + wrap_y * 8,
2232                                                      wrap_y, 8) - 400;
2233
2234             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2235                 progressive_score -= 400;
2236
2237             if (progressive_score > 0) {
2238                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2239                                                         wrap_y * 2, 8) +
2240                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2241                                                         ptr_y + wrap_y,
2242                                                         wrap_y * 2, 8);
2243
2244                 if (progressive_score > interlaced_score) {
2245                     s->interlaced_dct = 1;
2246
2247                     dct_offset = wrap_y;
2248                     uv_dct_offset = wrap_c;
2249                     wrap_y <<= 1;
2250                     if (s->chroma_format == CHROMA_422)
2251                         wrap_c <<= 1;
2252                 }
2253             }
2254         }
2255
2256         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2257         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2258         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2259                             dest_y + dct_offset, wrap_y);
2260         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2261                             dest_y + dct_offset + 8, wrap_y);
2262
2263         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2264             skip_dct[4] = 1;
2265             skip_dct[5] = 1;
2266         } else {
2267             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2268             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2269             if (!s->chroma_y_shift) { /* 422 */
2270                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2271                                     dest_cb + uv_dct_offset, wrap_c);
2272                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2273                                     dest_cr + uv_dct_offset, wrap_c);
2274             }
2275         }
2276         /* pre quantization */
2277         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2278                 2 * s->qscale * s->qscale) {
2279             // FIXME optimize
2280             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2281                 skip_dct[0] = 1;
2282             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2283                 skip_dct[1] = 1;
2284             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2285                                wrap_y, 8) < 20 * s->qscale)
2286                 skip_dct[2] = 1;
2287             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2288                                wrap_y, 8) < 20 * s->qscale)
2289                 skip_dct[3] = 1;
2290             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2291                 skip_dct[4] = 1;
2292             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2293                 skip_dct[5] = 1;
2294             if (!s->chroma_y_shift) { /* 422 */
2295                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2296                                    dest_cb + uv_dct_offset,
2297                                    wrap_c, 8) < 20 * s->qscale)
2298                     skip_dct[6] = 1;
2299                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2300                                    dest_cr + uv_dct_offset,
2301                                    wrap_c, 8) < 20 * s->qscale)
2302                     skip_dct[7] = 1;
2303             }
2304         }
2305     }
2306
2307     if (s->quantizer_noise_shaping) {
2308         if (!skip_dct[0])
2309             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2310         if (!skip_dct[1])
2311             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2312         if (!skip_dct[2])
2313             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2314         if (!skip_dct[3])
2315             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2316         if (!skip_dct[4])
2317             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2318         if (!skip_dct[5])
2319             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2320         if (!s->chroma_y_shift) { /* 422 */
2321             if (!skip_dct[6])
2322                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2323                                   wrap_c);
2324             if (!skip_dct[7])
2325                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2326                                   wrap_c);
2327         }
2328         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2329     }
2330
2331     /* DCT & quantize */
2332     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2333     {
2334         for (i = 0; i < mb_block_count; i++) {
2335             if (!skip_dct[i]) {
2336                 int overflow;
2337                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2338                 // FIXME we could decide to change to quantizer instead of
2339                 // clipping
2340                 // JS: I don't think that would be a good idea it could lower
2341                 //     quality instead of improve it. Just INTRADC clipping
2342                 //     deserves changes in quantizer
2343                 if (overflow)
2344                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2345             } else
2346                 s->block_last_index[i] = -1;
2347         }
2348         if (s->quantizer_noise_shaping) {
2349             for (i = 0; i < mb_block_count; i++) {
2350                 if (!skip_dct[i]) {
2351                     s->block_last_index[i] =
2352                         dct_quantize_refine(s, s->block[i], weight[i],
2353                                             orig[i], i, s->qscale);
2354                 }
2355             }
2356         }
2357
2358         if (s->luma_elim_threshold && !s->mb_intra)
2359             for (i = 0; i < 4; i++)
2360                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2361         if (s->chroma_elim_threshold && !s->mb_intra)
2362             for (i = 4; i < mb_block_count; i++)
2363                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2364
2365         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2366             for (i = 0; i < mb_block_count; i++) {
2367                 if (s->block_last_index[i] == -1)
2368                     s->coded_score[i] = INT_MAX / 256;
2369             }
2370         }
2371     }
2372
2373     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2374         s->block_last_index[4] =
2375         s->block_last_index[5] = 0;
2376         s->block[4][0] =
2377         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2378         if (!s->chroma_y_shift) { /* 422 / 444 */
2379             for (i=6; i<12; i++) {
2380                 s->block_last_index[i] = 0;
2381                 s->block[i][0] = s->block[4][0];
2382             }
2383         }
2384     }
2385
2386     // non c quantize code returns incorrect block_last_index FIXME
2387     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2388         for (i = 0; i < mb_block_count; i++) {
2389             int j;
2390             if (s->block_last_index[i] > 0) {
2391                 for (j = 63; j > 0; j--) {
2392                     if (s->block[i][s->intra_scantable.permutated[j]])
2393                         break;
2394                 }
2395                 s->block_last_index[i] = j;
2396             }
2397         }
2398     }
2399
2400     /* huffman encode */
2401     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2402     case AV_CODEC_ID_MPEG1VIDEO:
2403     case AV_CODEC_ID_MPEG2VIDEO:
2404         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2405             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2406         break;
2407     case AV_CODEC_ID_MPEG4:
2408         if (CONFIG_MPEG4_ENCODER)
2409             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2410         break;
2411     case AV_CODEC_ID_MSMPEG4V2:
2412     case AV_CODEC_ID_MSMPEG4V3:
2413     case AV_CODEC_ID_WMV1:
2414         if (CONFIG_MSMPEG4_ENCODER)
2415             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2416         break;
2417     case AV_CODEC_ID_WMV2:
2418         if (CONFIG_WMV2_ENCODER)
2419             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2420         break;
2421     case AV_CODEC_ID_H261:
2422         if (CONFIG_H261_ENCODER)
2423             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2424         break;
2425     case AV_CODEC_ID_H263:
2426     case AV_CODEC_ID_H263P:
2427     case AV_CODEC_ID_FLV1:
2428     case AV_CODEC_ID_RV10:
2429     case AV_CODEC_ID_RV20:
2430         if (CONFIG_H263_ENCODER)
2431             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2432         break;
2433 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
2434     case AV_CODEC_ID_MJPEG:
2435     case AV_CODEC_ID_AMV:
2436         ff_mjpeg_encode_mb(s, s->block);
2437         break;
2438 #endif
2439     case AV_CODEC_ID_SPEEDHQ:
2440         if (CONFIG_SPEEDHQ_ENCODER)
2441             ff_speedhq_encode_mb(s, s->block);
2442         break;
2443     default:
2444         av_assert1(0);
2445     }
2446 }
2447
2448 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2449 {
2450     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2451     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2452     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2453 }
2454
2455 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2456     int i;
2457
2458     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2459
2460     /* MPEG-1 */
2461     d->mb_skip_run= s->mb_skip_run;
2462     for(i=0; i<3; i++)
2463         d->last_dc[i] = s->last_dc[i];
2464
2465     /* statistics */
2466     d->mv_bits= s->mv_bits;
2467     d->i_tex_bits= s->i_tex_bits;
2468     d->p_tex_bits= s->p_tex_bits;
2469     d->i_count= s->i_count;
2470     d->f_count= s->f_count;
2471     d->b_count= s->b_count;
2472     d->skip_count= s->skip_count;
2473     d->misc_bits= s->misc_bits;
2474     d->last_bits= 0;
2475
2476     d->mb_skipped= 0;
2477     d->qscale= s->qscale;
2478     d->dquant= s->dquant;
2479
2480     d->esc3_level_length= s->esc3_level_length;
2481 }
2482
2483 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2484     int i;
2485
2486     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2487     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2488
2489     /* MPEG-1 */
2490     d->mb_skip_run= s->mb_skip_run;
2491     for(i=0; i<3; i++)
2492         d->last_dc[i] = s->last_dc[i];
2493
2494     /* statistics */
2495     d->mv_bits= s->mv_bits;
2496     d->i_tex_bits= s->i_tex_bits;
2497     d->p_tex_bits= s->p_tex_bits;
2498     d->i_count= s->i_count;
2499     d->f_count= s->f_count;
2500     d->b_count= s->b_count;
2501     d->skip_count= s->skip_count;
2502     d->misc_bits= s->misc_bits;
2503
2504     d->mb_intra= s->mb_intra;
2505     d->mb_skipped= s->mb_skipped;
2506     d->mv_type= s->mv_type;
2507     d->mv_dir= s->mv_dir;
2508     d->pb= s->pb;
2509     if(s->data_partitioning){
2510         d->pb2= s->pb2;
2511         d->tex_pb= s->tex_pb;
2512     }
2513     d->block= s->block;
2514     for(i=0; i<8; i++)
2515         d->block_last_index[i]= s->block_last_index[i];
2516     d->interlaced_dct= s->interlaced_dct;
2517     d->qscale= s->qscale;
2518
2519     d->esc3_level_length= s->esc3_level_length;
2520 }
2521
2522 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2523                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2524                            int *dmin, int *next_block, int motion_x, int motion_y)
2525 {
2526     int score;
2527     uint8_t *dest_backup[3];
2528
2529     copy_context_before_encode(s, backup, type);
2530
2531     s->block= s->blocks[*next_block];
2532     s->pb= pb[*next_block];
2533     if(s->data_partitioning){
2534         s->pb2   = pb2   [*next_block];
2535         s->tex_pb= tex_pb[*next_block];
2536     }
2537
2538     if(*next_block){
2539         memcpy(dest_backup, s->dest, sizeof(s->dest));
2540         s->dest[0] = s->sc.rd_scratchpad;
2541         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2542         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2543         av_assert0(s->linesize >= 32); //FIXME
2544     }
2545
2546     encode_mb(s, motion_x, motion_y);
2547
2548     score= put_bits_count(&s->pb);
2549     if(s->data_partitioning){
2550         score+= put_bits_count(&s->pb2);
2551         score+= put_bits_count(&s->tex_pb);
2552     }
2553
2554     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2555         ff_mpv_reconstruct_mb(s, s->block);
2556
2557         score *= s->lambda2;
2558         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2559     }
2560
2561     if(*next_block){
2562         memcpy(s->dest, dest_backup, sizeof(s->dest));
2563     }
2564
2565     if(score<*dmin){
2566         *dmin= score;
2567         *next_block^=1;
2568
2569         copy_context_after_encode(best, s, type);
2570     }
2571 }
2572
2573 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2574     const uint32_t *sq = ff_square_tab + 256;
2575     int acc=0;
2576     int x,y;
2577
2578     if(w==16 && h==16)
2579         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2580     else if(w==8 && h==8)
2581         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2582
2583     for(y=0; y<h; y++){
2584         for(x=0; x<w; x++){
2585             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2586         }
2587     }
2588
2589     av_assert2(acc>=0);
2590
2591     return acc;
2592 }
2593
2594 static int sse_mb(MpegEncContext *s){
2595     int w= 16;
2596     int h= 16;
2597
2598     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2599     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2600
2601     if(w==16 && h==16)
2602       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2603         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2604                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2605                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2606       }else{
2607         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2608                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2609                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2610       }
2611     else
2612         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2613                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2614                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2615 }
2616
2617 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2618     MpegEncContext *s= *(void**)arg;
2619
2620
2621     s->me.pre_pass=1;
2622     s->me.dia_size= s->avctx->pre_dia_size;
2623     s->first_slice_line=1;
2624     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2625         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2626             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2627         }
2628         s->first_slice_line=0;
2629     }
2630
2631     s->me.pre_pass=0;
2632
2633     return 0;
2634 }
2635
2636 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2637     MpegEncContext *s= *(void**)arg;
2638
2639     s->me.dia_size= s->avctx->dia_size;
2640     s->first_slice_line=1;
2641     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2642         s->mb_x=0; //for block init below
2643         ff_init_block_index(s);
2644         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2645             s->block_index[0]+=2;
2646             s->block_index[1]+=2;
2647             s->block_index[2]+=2;
2648             s->block_index[3]+=2;
2649
2650             /* compute motion vector & mb_type and store in context */
2651             if(s->pict_type==AV_PICTURE_TYPE_B)
2652                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2653             else
2654                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2655         }
2656         s->first_slice_line=0;
2657     }
2658     return 0;
2659 }
2660
2661 static int mb_var_thread(AVCodecContext *c, void *arg){
2662     MpegEncContext *s= *(void**)arg;
2663     int mb_x, mb_y;
2664
2665     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2666         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2667             int xx = mb_x * 16;
2668             int yy = mb_y * 16;
2669             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2670             int varc;
2671             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2672
2673             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2674                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2675
2676             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2677             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2678             s->me.mb_var_sum_temp    += varc;
2679         }
2680     }
2681     return 0;
2682 }
2683
2684 static void write_slice_end(MpegEncContext *s){
2685     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2686         if(s->partitioned_frame){
2687             ff_mpeg4_merge_partitions(s);
2688         }
2689
2690         ff_mpeg4_stuffing(&s->pb);
2691     } else if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
2692                s->out_format == FMT_MJPEG) {
2693         ff_mjpeg_encode_stuffing(s);
2694     } else if (CONFIG_SPEEDHQ_ENCODER && s->out_format == FMT_SPEEDHQ) {
2695         ff_speedhq_end_slice(s);
2696     }
2697
2698     flush_put_bits(&s->pb);
2699
2700     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2701         s->misc_bits+= get_bits_diff(s);
2702 }
2703
2704 static void write_mb_info(MpegEncContext *s)
2705 {
2706     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2707     int offset = put_bits_count(&s->pb);
2708     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2709     int gobn = s->mb_y / s->gob_index;
2710     int pred_x, pred_y;
2711     if (CONFIG_H263_ENCODER)
2712         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2713     bytestream_put_le32(&ptr, offset);
2714     bytestream_put_byte(&ptr, s->qscale);
2715     bytestream_put_byte(&ptr, gobn);
2716     bytestream_put_le16(&ptr, mba);
2717     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2718     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2719     /* 4MV not implemented */
2720     bytestream_put_byte(&ptr, 0); /* hmv2 */
2721     bytestream_put_byte(&ptr, 0); /* vmv2 */
2722 }
2723
2724 static void update_mb_info(MpegEncContext *s, int startcode)
2725 {
2726     if (!s->mb_info)
2727         return;
2728     if (put_bytes_count(&s->pb, 0) - s->prev_mb_info >= s->mb_info) {
2729         s->mb_info_size += 12;
2730         s->prev_mb_info = s->last_mb_info;
2731     }
2732     if (startcode) {
2733         s->prev_mb_info = put_bytes_count(&s->pb, 0);
2734         /* This might have incremented mb_info_size above, and we return without
2735          * actually writing any info into that slot yet. But in that case,
2736          * this will be called again at the start of the after writing the
2737          * start code, actually writing the mb info. */
2738         return;
2739     }
2740
2741     s->last_mb_info = put_bytes_count(&s->pb, 0);
2742     if (!s->mb_info_size)
2743         s->mb_info_size += 12;
2744     write_mb_info(s);
2745 }
2746
2747 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2748 {
2749     if (put_bytes_left(&s->pb, 0) < threshold
2750         && s->slice_context_count == 1
2751         && s->pb.buf == s->avctx->internal->byte_buffer) {
2752         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2753         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2754
2755         uint8_t *new_buffer = NULL;
2756         int new_buffer_size = 0;
2757
2758         if ((s->avctx->internal->byte_buffer_size + size_increase) >= INT_MAX/8) {
2759             av_log(s->avctx, AV_LOG_ERROR, "Cannot reallocate putbit buffer\n");
2760             return AVERROR(ENOMEM);
2761         }
2762
2763         emms_c();
2764
2765         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2766                               s->avctx->internal->byte_buffer_size + size_increase);
2767         if (!new_buffer)
2768             return AVERROR(ENOMEM);
2769
2770         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2771         av_free(s->avctx->internal->byte_buffer);
2772         s->avctx->internal->byte_buffer      = new_buffer;
2773         s->avctx->internal->byte_buffer_size = new_buffer_size;
2774         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2775         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2776         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2777     }
2778     if (put_bytes_left(&s->pb, 0) < threshold)
2779         return AVERROR(EINVAL);
2780     return 0;
2781 }
2782
2783 static int encode_thread(AVCodecContext *c, void *arg){
2784     MpegEncContext *s= *(void**)arg;
2785     int mb_x, mb_y, mb_y_order;
2786     int chr_h= 16>>s->chroma_y_shift;
2787     int i, j;
2788     MpegEncContext best_s = { 0 }, backup_s;
2789     uint8_t bit_buf[2][MAX_MB_BYTES];
2790     uint8_t bit_buf2[2][MAX_MB_BYTES];
2791     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2792     PutBitContext pb[2], pb2[2], tex_pb[2];
2793
2794     for(i=0; i<2; i++){
2795         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2796         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2797         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2798     }
2799
2800     s->last_bits= put_bits_count(&s->pb);
2801     s->mv_bits=0;
2802     s->misc_bits=0;
2803     s->i_tex_bits=0;
2804     s->p_tex_bits=0;
2805     s->i_count=0;
2806     s->f_count=0;
2807     s->b_count=0;
2808     s->skip_count=0;
2809
2810     for(i=0; i<3; i++){
2811         /* init last dc values */
2812         /* note: quant matrix value (8) is implied here */
2813         s->last_dc[i] = 128 << s->intra_dc_precision;
2814
2815         s->current_picture.encoding_error[i] = 0;
2816     }
2817     if(s->codec_id==AV_CODEC_ID_AMV){
2818         s->last_dc[0] = 128*8/13;
2819         s->last_dc[1] = 128*8/14;
2820         s->last_dc[2] = 128*8/14;
2821     }
2822     s->mb_skip_run = 0;
2823     memset(s->last_mv, 0, sizeof(s->last_mv));
2824
2825     s->last_mv_dir = 0;
2826
2827     switch(s->codec_id){
2828     case AV_CODEC_ID_H263:
2829     case AV_CODEC_ID_H263P:
2830     case AV_CODEC_ID_FLV1:
2831         if (CONFIG_H263_ENCODER)
2832             s->gob_index = H263_GOB_HEIGHT(s->height);
2833         break;
2834     case AV_CODEC_ID_MPEG4:
2835         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2836             ff_mpeg4_init_partitions(s);
2837         break;
2838     }
2839
2840     s->resync_mb_x=0;
2841     s->resync_mb_y=0;
2842     s->first_slice_line = 1;
2843     s->ptr_lastgob = s->pb.buf;
2844     for (mb_y_order = s->start_mb_y; mb_y_order < s->end_mb_y; mb_y_order++) {
2845         if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
2846             int first_in_slice;
2847             mb_y = ff_speedhq_mb_y_order_to_mb(mb_y_order, s->mb_height, &first_in_slice);
2848             if (first_in_slice && mb_y_order != s->start_mb_y)
2849                 ff_speedhq_end_slice(s);
2850             s->last_dc[0] = s->last_dc[1] = s->last_dc[2] = 1024 << s->intra_dc_precision;
2851         } else {
2852             mb_y = mb_y_order;
2853         }
2854         s->mb_x=0;
2855         s->mb_y= mb_y;
2856
2857         ff_set_qscale(s, s->qscale);
2858         ff_init_block_index(s);
2859
2860         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2861             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2862             int mb_type= s->mb_type[xy];
2863 //            int d;
2864             int dmin= INT_MAX;
2865             int dir;
2866             int size_increase =  s->avctx->internal->byte_buffer_size/4
2867                                + s->mb_width*MAX_MB_BYTES;
2868
2869             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2870             if (put_bytes_left(&s->pb, 0) < MAX_MB_BYTES){
2871                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2872                 return -1;
2873             }
2874             if(s->data_partitioning){
2875                 if (put_bytes_left(&s->pb2,    0) < MAX_MB_BYTES ||
2876                     put_bytes_left(&s->tex_pb, 0) < MAX_MB_BYTES) {
2877                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2878                     return -1;
2879                 }
2880             }
2881
2882             s->mb_x = mb_x;
2883             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2884             ff_update_block_index(s);
2885
2886             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2887                 ff_h261_reorder_mb_index(s);
2888                 xy= s->mb_y*s->mb_stride + s->mb_x;
2889                 mb_type= s->mb_type[xy];
2890             }
2891
2892             /* write gob / video packet header  */
2893             if(s->rtp_mode){
2894                 int current_packet_size, is_gob_start;
2895
2896                 current_packet_size = put_bytes_count(&s->pb, 1)
2897                                       - (s->ptr_lastgob - s->pb.buf);
2898
2899                 is_gob_start = s->rtp_payload_size &&
2900                                current_packet_size >= s->rtp_payload_size &&
2901                                mb_y + mb_x > 0;
2902
2903                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2904
2905                 switch(s->codec_id){
2906                 case AV_CODEC_ID_H263:
2907                 case AV_CODEC_ID_H263P:
2908                     if(!s->h263_slice_structured)
2909                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2910                     break;
2911                 case AV_CODEC_ID_MPEG2VIDEO:
2912                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2913                 case AV_CODEC_ID_MPEG1VIDEO:
2914                     if(s->mb_skip_run) is_gob_start=0;
2915                     break;
2916                 case AV_CODEC_ID_MJPEG:
2917                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2918                     break;
2919                 }
2920
2921                 if(is_gob_start){
2922                     if(s->start_mb_y != mb_y || mb_x!=0){
2923                         write_slice_end(s);
2924
2925                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2926                             ff_mpeg4_init_partitions(s);
2927                         }
2928                     }
2929
2930                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2931                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2932
2933                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2934                         int r = put_bytes_count(&s->pb, 0) + s->picture_number + 16 + s->mb_x + s->mb_y;
2935                         int d = 100 / s->error_rate;
2936                         if(r % d == 0){
2937                             current_packet_size=0;
2938                             s->pb.buf_ptr= s->ptr_lastgob;
2939                             av_assert1(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2940                         }
2941                     }
2942
2943                     update_mb_info(s, 1);
2944
2945                     switch(s->codec_id){
2946                     case AV_CODEC_ID_MPEG4:
2947                         if (CONFIG_MPEG4_ENCODER) {
2948                             ff_mpeg4_encode_video_packet_header(s);
2949                             ff_mpeg4_clean_buffers(s);
2950                         }
2951                     break;
2952                     case AV_CODEC_ID_MPEG1VIDEO:
2953                     case AV_CODEC_ID_MPEG2VIDEO:
2954                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2955                             ff_mpeg1_encode_slice_header(s);
2956                             ff_mpeg1_clean_buffers(s);
2957                         }
2958                     break;
2959                     case AV_CODEC_ID_H263:
2960                     case AV_CODEC_ID_H263P:
2961                         if (CONFIG_H263_ENCODER)
2962                             ff_h263_encode_gob_header(s, mb_y);
2963                     break;
2964                     }
2965
2966                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2967                         int bits= put_bits_count(&s->pb);
2968                         s->misc_bits+= bits - s->last_bits;
2969                         s->last_bits= bits;
2970                     }
2971
2972                     s->ptr_lastgob += current_packet_size;
2973                     s->first_slice_line=1;
2974                     s->resync_mb_x=mb_x;
2975                     s->resync_mb_y=mb_y;
2976                 }
2977             }
2978
2979             if(  (s->resync_mb_x   == s->mb_x)
2980                && s->resync_mb_y+1 == s->mb_y){
2981                 s->first_slice_line=0;
2982             }
2983
2984             s->mb_skipped=0;
2985             s->dquant=0; //only for QP_RD
2986
2987             update_mb_info(s, 0);
2988
2989             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2990                 int next_block=0;
2991                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2992
2993                 copy_context_before_encode(&backup_s, s, -1);
2994                 backup_s.pb= s->pb;
2995                 best_s.data_partitioning= s->data_partitioning;
2996                 best_s.partitioned_frame= s->partitioned_frame;
2997                 if(s->data_partitioning){
2998                     backup_s.pb2= s->pb2;
2999                     backup_s.tex_pb= s->tex_pb;
3000                 }
3001
3002                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
3003                     s->mv_dir = MV_DIR_FORWARD;
3004                     s->mv_type = MV_TYPE_16X16;
3005                     s->mb_intra= 0;
3006                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3007                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3008                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
3009                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3010                 }
3011                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
3012                     s->mv_dir = MV_DIR_FORWARD;
3013                     s->mv_type = MV_TYPE_FIELD;
3014                     s->mb_intra= 0;
3015                     for(i=0; i<2; i++){
3016                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3017                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3018                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3019                     }
3020                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3021                                  &dmin, &next_block, 0, 0);
3022                 }
3023                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3024                     s->mv_dir = MV_DIR_FORWARD;
3025                     s->mv_type = MV_TYPE_16X16;
3026                     s->mb_intra= 0;
3027                     s->mv[0][0][0] = 0;
3028                     s->mv[0][0][1] = 0;
3029                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3030                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3031                 }
3032                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3033                     s->mv_dir = MV_DIR_FORWARD;
3034                     s->mv_type = MV_TYPE_8X8;
3035                     s->mb_intra= 0;
3036                     for(i=0; i<4; i++){
3037                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3038                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3039                     }
3040                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3041                                  &dmin, &next_block, 0, 0);
3042                 }
3043                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3044                     s->mv_dir = MV_DIR_FORWARD;
3045                     s->mv_type = MV_TYPE_16X16;
3046                     s->mb_intra= 0;
3047                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3048                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3049                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3050                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3051                 }
3052                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3053                     s->mv_dir = MV_DIR_BACKWARD;
3054                     s->mv_type = MV_TYPE_16X16;
3055                     s->mb_intra= 0;
3056                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3057                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3058                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3059                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3060                 }
3061                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3062                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3063                     s->mv_type = MV_TYPE_16X16;
3064                     s->mb_intra= 0;
3065                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3066                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3067                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3068                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3069                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3070                                  &dmin, &next_block, 0, 0);
3071                 }
3072                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3073                     s->mv_dir = MV_DIR_FORWARD;
3074                     s->mv_type = MV_TYPE_FIELD;
3075                     s->mb_intra= 0;
3076                     for(i=0; i<2; i++){
3077                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3078                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3079                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3080                     }
3081                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3082                                  &dmin, &next_block, 0, 0);
3083                 }
3084                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3085                     s->mv_dir = MV_DIR_BACKWARD;
3086                     s->mv_type = MV_TYPE_FIELD;
3087                     s->mb_intra= 0;
3088                     for(i=0; i<2; i++){
3089                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3090                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3091                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3092                     }
3093                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3094                                  &dmin, &next_block, 0, 0);
3095                 }
3096                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3097                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3098                     s->mv_type = MV_TYPE_FIELD;
3099                     s->mb_intra= 0;
3100                     for(dir=0; dir<2; dir++){
3101                         for(i=0; i<2; i++){
3102                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3103                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3104                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3105                         }
3106                     }
3107                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3108                                  &dmin, &next_block, 0, 0);
3109                 }
3110                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3111                     s->mv_dir = 0;
3112                     s->mv_type = MV_TYPE_16X16;
3113                     s->mb_intra= 1;
3114                     s->mv[0][0][0] = 0;
3115                     s->mv[0][0][1] = 0;
3116                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3117                                  &dmin, &next_block, 0, 0);
3118                     if(s->h263_pred || s->h263_aic){
3119                         if(best_s.mb_intra)
3120                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3121                         else
3122                             ff_clean_intra_table_entries(s); //old mode?
3123                     }
3124                 }
3125
3126                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3127                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3128                         const int last_qp= backup_s.qscale;
3129                         int qpi, qp, dc[6];
3130                         int16_t ac[6][16];
3131                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3132                         static const int dquant_tab[4]={-1,1,-2,2};
3133                         int storecoefs = s->mb_intra && s->dc_val[0];
3134
3135                         av_assert2(backup_s.dquant == 0);
3136
3137                         //FIXME intra
3138                         s->mv_dir= best_s.mv_dir;
3139                         s->mv_type = MV_TYPE_16X16;
3140                         s->mb_intra= best_s.mb_intra;
3141                         s->mv[0][0][0] = best_s.mv[0][0][0];
3142                         s->mv[0][0][1] = best_s.mv[0][0][1];
3143                         s->mv[1][0][0] = best_s.mv[1][0][0];
3144                         s->mv[1][0][1] = best_s.mv[1][0][1];
3145
3146                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3147                         for(; qpi<4; qpi++){
3148                             int dquant= dquant_tab[qpi];
3149                             qp= last_qp + dquant;
3150                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3151                                 continue;
3152                             backup_s.dquant= dquant;
3153                             if(storecoefs){
3154                                 for(i=0; i<6; i++){
3155                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3156                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3157                                 }
3158                             }
3159
3160                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3161                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3162                             if(best_s.qscale != qp){
3163                                 if(storecoefs){
3164                                     for(i=0; i<6; i++){
3165                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3166                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3167                                     }
3168                                 }
3169                             }
3170                         }
3171                     }
3172                 }
3173                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3174                     int mx= s->b_direct_mv_table[xy][0];
3175                     int my= s->b_direct_mv_table[xy][1];
3176
3177                     backup_s.dquant = 0;
3178                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3179                     s->mb_intra= 0;
3180                     ff_mpeg4_set_direct_mv(s, mx, my);
3181                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3182                                  &dmin, &next_block, mx, my);
3183                 }
3184                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3185                     backup_s.dquant = 0;
3186                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3187                     s->mb_intra= 0;
3188                     ff_mpeg4_set_direct_mv(s, 0, 0);
3189                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3190                                  &dmin, &next_block, 0, 0);
3191                 }
3192                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3193                     int coded=0;
3194                     for(i=0; i<6; i++)
3195                         coded |= s->block_last_index[i];
3196                     if(coded){
3197                         int mx,my;
3198                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3199                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3200                             mx=my=0; //FIXME find the one we actually used
3201                             ff_mpeg4_set_direct_mv(s, mx, my);
3202                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3203                             mx= s->mv[1][0][0];
3204                             my= s->mv[1][0][1];
3205                         }else{
3206                             mx= s->mv[0][0][0];
3207                             my= s->mv[0][0][1];
3208                         }
3209
3210                         s->mv_dir= best_s.mv_dir;
3211                         s->mv_type = best_s.mv_type;
3212                         s->mb_intra= 0;
3213 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3214                         s->mv[0][0][1] = best_s.mv[0][0][1];
3215                         s->mv[1][0][0] = best_s.mv[1][0][0];
3216                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3217                         backup_s.dquant= 0;
3218                         s->skipdct=1;
3219                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3220                                         &dmin, &next_block, mx, my);
3221                         s->skipdct=0;
3222                     }
3223                 }
3224
3225                 s->current_picture.qscale_table[xy] = best_s.qscale;
3226
3227                 copy_context_after_encode(s, &best_s, -1);
3228
3229                 pb_bits_count= put_bits_count(&s->pb);
3230                 flush_put_bits(&s->pb);
3231                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3232                 s->pb= backup_s.pb;
3233
3234                 if(s->data_partitioning){
3235                     pb2_bits_count= put_bits_count(&s->pb2);
3236                     flush_put_bits(&s->pb2);
3237                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3238                     s->pb2= backup_s.pb2;
3239
3240                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3241                     flush_put_bits(&s->tex_pb);
3242                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3243                     s->tex_pb= backup_s.tex_pb;
3244                 }
3245                 s->last_bits= put_bits_count(&s->pb);
3246
3247                 if (CONFIG_H263_ENCODER &&
3248                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3249                     ff_h263_update_motion_val(s);
3250
3251                 if(next_block==0){ //FIXME 16 vs linesize16
3252                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3253                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3254                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3255                 }
3256
3257                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3258                     ff_mpv_reconstruct_mb(s, s->block);
3259             } else {
3260                 int motion_x = 0, motion_y = 0;
3261                 s->mv_type=MV_TYPE_16X16;
3262                 // only one MB-Type possible
3263
3264                 switch(mb_type){
3265                 case CANDIDATE_MB_TYPE_INTRA:
3266                     s->mv_dir = 0;
3267                     s->mb_intra= 1;
3268                     motion_x= s->mv[0][0][0] = 0;
3269                     motion_y= s->mv[0][0][1] = 0;
3270                     break;
3271                 case CANDIDATE_MB_TYPE_INTER:
3272                     s->mv_dir = MV_DIR_FORWARD;
3273                     s->mb_intra= 0;
3274                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3275                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3276                     break;
3277                 case CANDIDATE_MB_TYPE_INTER_I:
3278                     s->mv_dir = MV_DIR_FORWARD;
3279                     s->mv_type = MV_TYPE_FIELD;
3280                     s->mb_intra= 0;
3281                     for(i=0; i<2; i++){
3282                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3283                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3284                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3285                     }
3286                     break;
3287                 case CANDIDATE_MB_TYPE_INTER4V:
3288                     s->mv_dir = MV_DIR_FORWARD;
3289                     s->mv_type = MV_TYPE_8X8;
3290                     s->mb_intra= 0;
3291                     for(i=0; i<4; i++){
3292                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3293                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3294                     }
3295                     break;
3296                 case CANDIDATE_MB_TYPE_DIRECT:
3297                     if (CONFIG_MPEG4_ENCODER) {
3298                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3299                         s->mb_intra= 0;
3300                         motion_x=s->b_direct_mv_table[xy][0];
3301                         motion_y=s->b_direct_mv_table[xy][1];
3302                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3303                     }
3304                     break;
3305                 case CANDIDATE_MB_TYPE_DIRECT0:
3306                     if (CONFIG_MPEG4_ENCODER) {
3307                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3308                         s->mb_intra= 0;
3309                         ff_mpeg4_set_direct_mv(s, 0, 0);
3310                     }
3311                     break;
3312                 case CANDIDATE_MB_TYPE_BIDIR:
3313                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3314                     s->mb_intra= 0;
3315                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3316                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3317                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3318                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3319                     break;
3320                 case CANDIDATE_MB_TYPE_BACKWARD:
3321                     s->mv_dir = MV_DIR_BACKWARD;
3322                     s->mb_intra= 0;
3323                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3324                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3325                     break;
3326                 case CANDIDATE_MB_TYPE_FORWARD:
3327                     s->mv_dir = MV_DIR_FORWARD;
3328                     s->mb_intra= 0;
3329                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3330                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3331                     break;
3332                 case CANDIDATE_MB_TYPE_FORWARD_I:
3333                     s->mv_dir = MV_DIR_FORWARD;
3334                     s->mv_type = MV_TYPE_FIELD;
3335                     s->mb_intra= 0;
3336                     for(i=0; i<2; i++){
3337                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3338                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3339                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3340                     }
3341                     break;
3342                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3343                     s->mv_dir = MV_DIR_BACKWARD;
3344                     s->mv_type = MV_TYPE_FIELD;
3345                     s->mb_intra= 0;
3346                     for(i=0; i<2; i++){
3347                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3348                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3349                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3350                     }
3351                     break;
3352                 case CANDIDATE_MB_TYPE_BIDIR_I:
3353                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3354                     s->mv_type = MV_TYPE_FIELD;
3355                     s->mb_intra= 0;
3356                     for(dir=0; dir<2; dir++){
3357                         for(i=0; i<2; i++){
3358                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3359                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3360                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3361                         }
3362                     }
3363                     break;
3364                 default:
3365                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3366                 }
3367
3368                 encode_mb(s, motion_x, motion_y);
3369
3370                 // RAL: Update last macroblock type
3371                 s->last_mv_dir = s->mv_dir;
3372
3373                 if (CONFIG_H263_ENCODER &&
3374                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3375                     ff_h263_update_motion_val(s);
3376
3377                 ff_mpv_reconstruct_mb(s, s->block);
3378             }
3379
3380             /* clean the MV table in IPS frames for direct mode in B-frames */
3381             if(s->mb_intra /* && I,P,S_TYPE */){
3382                 s->p_mv_table[xy][0]=0;
3383                 s->p_mv_table[xy][1]=0;
3384             }
3385
3386             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3387                 int w= 16;
3388                 int h= 16;
3389
3390                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3391                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3392
3393                 s->current_picture.encoding_error[0] += sse(
3394                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3395                     s->dest[0], w, h, s->linesize);
3396                 s->current_picture.encoding_error[1] += sse(
3397                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3398                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3399                 s->current_picture.encoding_error[2] += sse(
3400                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3401                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3402             }
3403             if(s->loop_filter){
3404                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3405                     ff_h263_loop_filter(s);
3406             }
3407             ff_dlog(s->avctx, "MB %d %d bits\n",
3408                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3409         }
3410     }
3411
3412     //not beautiful here but we must write it before flushing so it has to be here
3413     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3414         ff_msmpeg4_encode_ext_header(s);
3415
3416     write_slice_end(s);
3417
3418     return 0;
3419 }
3420
3421 #define MERGE(field) dst->field += src->field; src->field=0
3422 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3423     MERGE(me.scene_change_score);
3424     MERGE(me.mc_mb_var_sum_temp);
3425     MERGE(me.mb_var_sum_temp);
3426 }
3427
3428 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3429     int i;
3430
3431     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3432     MERGE(dct_count[1]);
3433     MERGE(mv_bits);
3434     MERGE(i_tex_bits);
3435     MERGE(p_tex_bits);
3436     MERGE(i_count);
3437     MERGE(f_count);
3438     MERGE(b_count);
3439     MERGE(skip_count);
3440     MERGE(misc_bits);
3441     MERGE(er.error_count);
3442     MERGE(padding_bug_score);
3443     MERGE(current_picture.encoding_error[0]);
3444     MERGE(current_picture.encoding_error[1]);
3445     MERGE(current_picture.encoding_error[2]);
3446
3447     if (dst->noise_reduction){
3448         for(i=0; i<64; i++){
3449             MERGE(dct_error_sum[0][i]);
3450             MERGE(dct_error_sum[1][i]);
3451         }
3452     }
3453
3454     av_assert1(put_bits_count(&src->pb) % 8 ==0);
3455     av_assert1(put_bits_count(&dst->pb) % 8 ==0);
3456     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3457     flush_put_bits(&dst->pb);
3458 }
3459
3460 static int estimate_qp(MpegEncContext *s, int dry_run){
3461     if (s->next_lambda){
3462         s->current_picture_ptr->f->quality =
3463         s->current_picture.f->quality = s->next_lambda;
3464         if(!dry_run) s->next_lambda= 0;
3465     } else if (!s->fixed_qscale) {
3466         int quality = ff_rate_estimate_qscale(s, dry_run);
3467         s->current_picture_ptr->f->quality =
3468         s->current_picture.f->quality = quality;
3469         if (s->current_picture.f->quality < 0)
3470             return -1;
3471     }
3472
3473     if(s->adaptive_quant){
3474         switch(s->codec_id){
3475         case AV_CODEC_ID_MPEG4:
3476             if (CONFIG_MPEG4_ENCODER)
3477                 ff_clean_mpeg4_qscales(s);
3478             break;
3479         case AV_CODEC_ID_H263:
3480         case AV_CODEC_ID_H263P:
3481         case AV_CODEC_ID_FLV1:
3482             if (CONFIG_H263_ENCODER)
3483                 ff_clean_h263_qscales(s);
3484             break;
3485         default:
3486             ff_init_qscale_tab(s);
3487         }
3488
3489         s->lambda= s->lambda_table[0];
3490         //FIXME broken
3491     }else
3492         s->lambda = s->current_picture.f->quality;
3493     update_qscale(s);
3494     return 0;
3495 }
3496
3497 /* must be called before writing the header */
3498 static void set_frame_distances(MpegEncContext * s){
3499     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3500     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3501
3502     if(s->pict_type==AV_PICTURE_TYPE_B){
3503         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3504         av_assert1(s->pb_time > 0 && s->pb_time < s->pp_time);
3505     }else{
3506         s->pp_time= s->time - s->last_non_b_time;
3507         s->last_non_b_time= s->time;
3508         av_assert1(s->picture_number==0 || s->pp_time > 0);
3509     }
3510 }
3511
3512 static int encode_picture(MpegEncContext *s, int picture_number)
3513 {
3514     int i, ret;
3515     int bits;
3516     int context_count = s->slice_context_count;
3517
3518     s->picture_number = picture_number;
3519
3520     /* Reset the average MB variance */
3521     s->me.mb_var_sum_temp    =
3522     s->me.mc_mb_var_sum_temp = 0;
3523
3524     /* we need to initialize some time vars before we can encode B-frames */
3525     // RAL: Condition added for MPEG1VIDEO
3526     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3527         set_frame_distances(s);
3528     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3529         ff_set_mpeg4_time(s);
3530
3531     s->me.scene_change_score=0;
3532
3533 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3534
3535     if(s->pict_type==AV_PICTURE_TYPE_I){
3536         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3537         else                        s->no_rounding=0;
3538     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3539         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3540             s->no_rounding ^= 1;
3541     }
3542
3543     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3544         if (estimate_qp(s,1) < 0)
3545             return -1;
3546         ff_get_2pass_fcode(s);
3547     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3548         if(s->pict_type==AV_PICTURE_TYPE_B)
3549             s->lambda= s->last_lambda_for[s->pict_type];
3550         else
3551             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3552         update_qscale(s);
3553     }
3554
3555     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3556         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3557         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3558         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3559         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3560     }
3561
3562     s->mb_intra=0; //for the rate distortion & bit compare functions
3563     for(i=1; i<context_count; i++){
3564         ret = ff_update_duplicate_context(s->thread_context[i], s);
3565         if (ret < 0)
3566             return ret;
3567     }
3568
3569     if(ff_init_me(s)<0)
3570         return -1;
3571
3572     /* Estimate motion for every MB */
3573     if(s->pict_type != AV_PICTURE_TYPE_I){
3574         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3575         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3576         if (s->pict_type != AV_PICTURE_TYPE_B) {
3577             if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3578                 s->me_pre == 2) {
3579                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3580             }
3581         }
3582
3583         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3584     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3585         /* I-Frame */
3586         for(i=0; i<s->mb_stride*s->mb_height; i++)
3587             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3588
3589         if(!s->fixed_qscale){
3590             /* finding spatial complexity for I-frame rate control */
3591             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3592         }
3593     }
3594     for(i=1; i<context_count; i++){
3595         merge_context_after_me(s, s->thread_context[i]);
3596     }
3597     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3598     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3599     emms_c();
3600
3601     if (s->me.scene_change_score > s->scenechange_threshold &&
3602         s->pict_type == AV_PICTURE_TYPE_P) {
3603         s->pict_type= AV_PICTURE_TYPE_I;
3604         for(i=0; i<s->mb_stride*s->mb_height; i++)
3605             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3606         if(s->msmpeg4_version >= 3)
3607             s->no_rounding=1;
3608         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3609                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3610     }
3611
3612     if(!s->umvplus){
3613         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3614             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3615
3616             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3617                 int a,b;
3618                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3619                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3620                 s->f_code= FFMAX3(s->f_code, a, b);
3621             }
3622
3623             ff_fix_long_p_mvs(s, s->intra_penalty ? CANDIDATE_MB_TYPE_INTER : CANDIDATE_MB_TYPE_INTRA);
3624             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, !!s->intra_penalty);
3625             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3626                 int j;
3627                 for(i=0; i<2; i++){
3628                     for(j=0; j<2; j++)
3629                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3630                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, !!s->intra_penalty);
3631                 }
3632             }
3633         }
3634
3635         if(s->pict_type==AV_PICTURE_TYPE_B){
3636             int a, b;
3637
3638             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3639             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3640             s->f_code = FFMAX(a, b);
3641
3642             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3643             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3644             s->b_code = FFMAX(a, b);
3645
3646             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3647             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3648             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3649             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3650             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3651                 int dir, j;
3652                 for(dir=0; dir<2; dir++){
3653                     for(i=0; i<2; i++){
3654                         for(j=0; j<2; j++){
3655                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3656                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3657                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3658                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3659                         }
3660                     }
3661                 }
3662             }
3663         }
3664     }
3665
3666     if (estimate_qp(s, 0) < 0)
3667         return -1;
3668
3669     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3670         s->pict_type == AV_PICTURE_TYPE_I &&
3671         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3672         s->qscale= 3; //reduce clipping problems
3673
3674     if (s->out_format == FMT_MJPEG) {
3675         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3676         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3677
3678         if (s->avctx->intra_matrix) {
3679             chroma_matrix =
3680             luma_matrix = s->avctx->intra_matrix;
3681         }
3682         if (s->avctx->chroma_intra_matrix)
3683             chroma_matrix = s->avctx->chroma_intra_matrix;
3684
3685         /* for mjpeg, we do include qscale in the matrix */
3686         for(i=1;i<64;i++){
3687             int j = s->idsp.idct_permutation[i];
3688
3689             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3690             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3691         }
3692         s->y_dc_scale_table=
3693         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3694         s->chroma_intra_matrix[0] =
3695         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3696         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3697                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3698         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3699                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3700         s->qscale= 8;
3701     }
3702     if(s->codec_id == AV_CODEC_ID_AMV){
3703         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3704         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3705         for(i=1;i<64;i++){
3706             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3707
3708             s->intra_matrix[j]        = sp5x_qscale_five_quant_table[0][i];
3709             s->chroma_intra_matrix[j] = sp5x_qscale_five_quant_table[1][i];
3710         }
3711         s->y_dc_scale_table= y;
3712         s->c_dc_scale_table= c;
3713         s->intra_matrix[0] = 13;
3714         s->chroma_intra_matrix[0] = 14;
3715         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3716                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3717         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3718                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3719         s->qscale= 8;
3720     }
3721
3722     if (s->out_format == FMT_SPEEDHQ) {
3723         s->y_dc_scale_table=
3724         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[3];
3725     }
3726
3727     //FIXME var duplication
3728     s->current_picture_ptr->f->key_frame =
3729     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3730     s->current_picture_ptr->f->pict_type =
3731     s->current_picture.f->pict_type = s->pict_type;
3732
3733     if (s->current_picture.f->key_frame)
3734         s->picture_in_gop_number=0;
3735
3736     s->mb_x = s->mb_y = 0;
3737     s->last_bits= put_bits_count(&s->pb);
3738     switch(s->out_format) {
3739 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
3740     case FMT_MJPEG:
3741         /* s->huffman == HUFFMAN_TABLE_OPTIMAL can only be true for MJPEG. */
3742         if (!CONFIG_MJPEG_ENCODER || s->huffman != HUFFMAN_TABLE_OPTIMAL)
3743             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3744                                            s->pred, s->intra_matrix, s->chroma_intra_matrix);
3745         break;
3746 #endif
3747     case FMT_SPEEDHQ:
3748         if (CONFIG_SPEEDHQ_ENCODER)
3749             ff_speedhq_encode_picture_header(s);
3750         break;
3751     case FMT_H261:
3752         if (CONFIG_H261_ENCODER)
3753             ff_h261_encode_picture_header(s, picture_number);
3754         break;
3755     case FMT_H263:
3756         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3757             ff_wmv2_encode_picture_header(s, picture_number);
3758         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3759             ff_msmpeg4_encode_picture_header(s, picture_number);
3760         else if (CONFIG_MPEG4_ENCODER && s->h263_pred) {
3761             ret = ff_mpeg4_encode_picture_header(s, picture_number);
3762             if (ret < 0)
3763                 return ret;
3764         } else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3765             ret = ff_rv10_encode_picture_header(s, picture_number);
3766             if (ret < 0)
3767                 return ret;
3768         }
3769         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3770             ff_rv20_encode_picture_header(s, picture_number);
3771         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3772             ff_flv_encode_picture_header(s, picture_number);
3773         else if (CONFIG_H263_ENCODER)
3774             ff_h263_encode_picture_header(s, picture_number);
3775         break;
3776     case FMT_MPEG1:
3777         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3778             ff_mpeg1_encode_picture_header(s, picture_number);
3779         break;
3780     default:
3781         av_assert0(0);
3782     }
3783     bits= put_bits_count(&s->pb);
3784     s->header_bits= bits - s->last_bits;
3785
3786     for(i=1; i<context_count; i++){
3787         update_duplicate_context_after_me(s->thread_context[i], s);
3788     }
3789     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3790     for(i=1; i<context_count; i++){
3791         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3792             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-BUF_BITS));
3793         merge_context_after_encode(s, s->thread_context[i]);
3794     }
3795     emms_c();
3796     return 0;
3797 }
3798
3799 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3800     const int intra= s->mb_intra;
3801     int i;
3802
3803     s->dct_count[intra]++;
3804
3805     for(i=0; i<64; i++){
3806         int level= block[i];
3807
3808         if(level){
3809             if(level>0){
3810                 s->dct_error_sum[intra][i] += level;
3811                 level -= s->dct_offset[intra][i];
3812                 if(level<0) level=0;
3813             }else{
3814                 s->dct_error_sum[intra][i] -= level;
3815                 level += s->dct_offset[intra][i];
3816                 if(level>0) level=0;
3817             }
3818             block[i]= level;
3819         }
3820     }
3821 }
3822
3823 static int dct_quantize_trellis_c(MpegEncContext *s,
3824                                   int16_t *block, int n,
3825                                   int qscale, int *overflow){
3826     const int *qmat;
3827     const uint16_t *matrix;
3828     const uint8_t *scantable;
3829     const uint8_t *perm_scantable;
3830     int max=0;
3831     unsigned int threshold1, threshold2;
3832     int bias=0;
3833     int run_tab[65];
3834     int level_tab[65];
3835     int score_tab[65];
3836     int survivor[65];
3837     int survivor_count;
3838     int last_run=0;
3839     int last_level=0;
3840     int last_score= 0;
3841     int last_i;
3842     int coeff[2][64];
3843     int coeff_count[64];
3844     int qmul, qadd, start_i, last_non_zero, i, dc;
3845     const int esc_length= s->ac_esc_length;
3846     uint8_t * length;
3847     uint8_t * last_length;
3848     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3849     int mpeg2_qscale;
3850
3851     s->fdsp.fdct(block);
3852
3853     if(s->dct_error_sum)
3854         s->denoise_dct(s, block);
3855     qmul= qscale*16;
3856     qadd= ((qscale-1)|1)*8;
3857
3858     if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
3859     else                 mpeg2_qscale = qscale << 1;
3860
3861     if (s->mb_intra) {
3862         int q;
3863         scantable= s->intra_scantable.scantable;
3864         perm_scantable= s->intra_scantable.permutated;
3865         if (!s->h263_aic) {
3866             if (n < 4)
3867                 q = s->y_dc_scale;
3868             else
3869                 q = s->c_dc_scale;
3870             q = q << 3;
3871         } else{
3872             /* For AIC we skip quant/dequant of INTRADC */
3873             q = 1 << 3;
3874             qadd=0;
3875         }
3876
3877         /* note: block[0] is assumed to be positive */
3878         block[0] = (block[0] + (q >> 1)) / q;
3879         start_i = 1;
3880         last_non_zero = 0;
3881         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3882         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3883         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3884             bias= 1<<(QMAT_SHIFT-1);
3885
3886         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3887             length     = s->intra_chroma_ac_vlc_length;
3888             last_length= s->intra_chroma_ac_vlc_last_length;
3889         } else {
3890             length     = s->intra_ac_vlc_length;
3891             last_length= s->intra_ac_vlc_last_length;
3892         }
3893     } else {
3894         scantable= s->inter_scantable.scantable;
3895         perm_scantable= s->inter_scantable.permutated;
3896         start_i = 0;
3897         last_non_zero = -1;
3898         qmat = s->q_inter_matrix[qscale];
3899         matrix = s->inter_matrix;
3900         length     = s->inter_ac_vlc_length;
3901         last_length= s->inter_ac_vlc_last_length;
3902     }
3903     last_i= start_i;
3904
3905     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3906     threshold2= (threshold1<<1);
3907
3908     for(i=63; i>=start_i; i--) {
3909         const int j = scantable[i];
3910         int level = block[j] * qmat[j];
3911
3912         if(((unsigned)(level+threshold1))>threshold2){
3913             last_non_zero = i;
3914             break;
3915         }
3916     }
3917
3918     for(i=start_i; i<=last_non_zero; i++) {
3919         const int j = scantable[i];
3920         int level = block[j] * qmat[j];
3921
3922 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3923 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3924         if(((unsigned)(level+threshold1))>threshold2){
3925             if(level>0){
3926                 level= (bias + level)>>QMAT_SHIFT;
3927                 coeff[0][i]= level;
3928                 coeff[1][i]= level-1;
3929 //                coeff[2][k]= level-2;
3930             }else{
3931                 level= (bias - level)>>QMAT_SHIFT;
3932                 coeff[0][i]= -level;
3933                 coeff[1][i]= -level+1;
3934 //                coeff[2][k]= -level+2;
3935             }
3936             coeff_count[i]= FFMIN(level, 2);
3937             av_assert2(coeff_count[i]);
3938             max |=level;
3939         }else{
3940             coeff[0][i]= (level>>31)|1;
3941             coeff_count[i]= 1;
3942         }
3943     }
3944
3945     *overflow= s->max_qcoeff < max; //overflow might have happened
3946
3947     if(last_non_zero < start_i){
3948         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3949         return last_non_zero;
3950     }
3951
3952     score_tab[start_i]= 0;
3953     survivor[0]= start_i;
3954     survivor_count= 1;
3955
3956     for(i=start_i; i<=last_non_zero; i++){
3957         int level_index, j, zero_distortion;
3958         int dct_coeff= FFABS(block[ scantable[i] ]);
3959         int best_score=256*256*256*120;
3960
3961         if (s->fdsp.fdct == ff_fdct_ifast)
3962             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3963         zero_distortion= dct_coeff*dct_coeff;
3964
3965         for(level_index=0; level_index < coeff_count[i]; level_index++){
3966             int distortion;
3967             int level= coeff[level_index][i];
3968             const int alevel= FFABS(level);
3969             int unquant_coeff;
3970
3971             av_assert2(level);
3972
3973             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3974                 unquant_coeff= alevel*qmul + qadd;
3975             } else if(s->out_format == FMT_MJPEG) {
3976                 j = s->idsp.idct_permutation[scantable[i]];
3977                 unquant_coeff = alevel * matrix[j] * 8;
3978             }else{ // MPEG-1
3979                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3980                 if(s->mb_intra){
3981                         unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
3982                         unquant_coeff =   (unquant_coeff - 1) | 1;
3983                 }else{
3984                         unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
3985                         unquant_coeff =   (unquant_coeff - 1) | 1;
3986                 }
3987                 unquant_coeff<<= 3;
3988             }
3989
3990             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3991             level+=64;
3992             if((level&(~127)) == 0){
3993                 for(j=survivor_count-1; j>=0; j--){
3994                     int run= i - survivor[j];
3995                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3996                     score += score_tab[i-run];
3997
3998                     if(score < best_score){
3999                         best_score= score;
4000                         run_tab[i+1]= run;
4001                         level_tab[i+1]= level-64;
4002                     }
4003                 }
4004
4005                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4006                     for(j=survivor_count-1; j>=0; j--){
4007                         int run= i - survivor[j];
4008                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4009                         score += score_tab[i-run];
4010                         if(score < last_score){
4011                             last_score= score;
4012                             last_run= run;
4013                             last_level= level-64;
4014                             last_i= i+1;
4015                         }
4016                     }
4017                 }
4018             }else{
4019                 distortion += esc_length*lambda;
4020                 for(j=survivor_count-1; j>=0; j--){
4021                     int run= i - survivor[j];
4022                     int score= distortion + score_tab[i-run];
4023
4024                     if(score < best_score){
4025                         best_score= score;
4026                         run_tab[i+1]= run;
4027                         level_tab[i+1]= level-64;
4028                     }
4029                 }
4030
4031                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4032                   for(j=survivor_count-1; j>=0; j--){
4033                         int run= i - survivor[j];
4034                         int score= distortion + score_tab[i-run];
4035                         if(score < last_score){
4036                             last_score= score;
4037                             last_run= run;
4038                             last_level= level-64;
4039                             last_i= i+1;
4040                         }
4041                     }
4042                 }
4043             }
4044         }
4045
4046         score_tab[i+1]= best_score;
4047
4048         // Note: there is a vlc code in MPEG-4 which is 1 bit shorter then another one with a shorter run and the same level
4049         if(last_non_zero <= 27){
4050             for(; survivor_count; survivor_count--){
4051                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4052                     break;
4053             }
4054         }else{
4055             for(; survivor_count; survivor_count--){
4056                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4057                     break;
4058             }
4059         }
4060
4061         survivor[ survivor_count++ ]= i+1;
4062     }
4063
4064     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4065         last_score= 256*256*256*120;
4066         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4067             int score= score_tab[i];
4068             if (i)
4069                 score += lambda * 2; // FIXME more exact?
4070
4071             if(score < last_score){
4072                 last_score= score;
4073                 last_i= i;
4074                 last_level= level_tab[i];
4075                 last_run= run_tab[i];
4076             }
4077         }
4078     }
4079
4080     s->coded_score[n] = last_score;
4081
4082     dc= FFABS(block[0]);
4083     last_non_zero= last_i - 1;
4084     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4085
4086     if(last_non_zero < start_i)
4087         return last_non_zero;
4088
4089     if(last_non_zero == 0 && start_i == 0){
4090         int best_level= 0;
4091         int best_score= dc * dc;
4092
4093         for(i=0; i<coeff_count[0]; i++){
4094             int level= coeff[i][0];
4095             int alevel= FFABS(level);
4096             int unquant_coeff, score, distortion;
4097
4098             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4099                     unquant_coeff= (alevel*qmul + qadd)>>3;
4100             } else{ // MPEG-1
4101                     unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
4102                     unquant_coeff =   (unquant_coeff - 1) | 1;
4103             }
4104             unquant_coeff = (unquant_coeff + 4) >> 3;
4105             unquant_coeff<<= 3 + 3;
4106
4107             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4108             level+=64;
4109             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4110             else                    score= distortion + esc_length*lambda;
4111
4112             if(score < best_score){
4113                 best_score= score;
4114                 best_level= level - 64;
4115             }
4116         }
4117         block[0]= best_level;
4118         s->coded_score[n] = best_score - dc*dc;
4119         if(best_level == 0) return -1;
4120         else                return last_non_zero;
4121     }
4122
4123     i= last_i;
4124     av_assert2(last_level);
4125
4126     block[ perm_scantable[last_non_zero] ]= last_level;
4127     i -= last_run + 1;
4128
4129     for(; i>start_i; i -= run_tab[i] + 1){
4130         block[ perm_scantable[i-1] ]= level_tab[i];
4131     }
4132
4133     return last_non_zero;
4134 }
4135
4136 static int16_t basis[64][64];
4137
4138 static void build_basis(uint8_t *perm){
4139     int i, j, x, y;
4140     emms_c();
4141     for(i=0; i<8; i++){
4142         for(j=0; j<8; j++){
4143             for(y=0; y<8; y++){
4144                 for(x=0; x<8; x++){
4145                     double s= 0.25*(1<<BASIS_SHIFT);
4146                     int index= 8*i + j;
4147                     int perm_index= perm[index];
4148                     if(i==0) s*= sqrt(0.5);
4149                     if(j==0) s*= sqrt(0.5);
4150                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4151                 }
4152             }
4153         }
4154     }
4155 }
4156
4157 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4158                         int16_t *block, int16_t *weight, int16_t *orig,
4159                         int n, int qscale){
4160     int16_t rem[64];
4161     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4162     const uint8_t *scantable;
4163     const uint8_t *perm_scantable;
4164 //    unsigned int threshold1, threshold2;
4165 //    int bias=0;
4166     int run_tab[65];
4167     int prev_run=0;
4168     int prev_level=0;
4169     int qmul, qadd, start_i, last_non_zero, i, dc;
4170     uint8_t * length;
4171     uint8_t * last_length;
4172     int lambda;
4173     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4174
4175     if(basis[0][0] == 0)
4176         build_basis(s->idsp.idct_permutation);
4177
4178     qmul= qscale*2;
4179     qadd= (qscale-1)|1;
4180     if (s->mb_intra) {
4181         scantable= s->intra_scantable.scantable;
4182         perm_scantable= s->intra_scantable.permutated;
4183         if (!s->h263_aic) {
4184             if (n < 4)
4185                 q = s->y_dc_scale;
4186             else
4187                 q = s->c_dc_scale;
4188         } else{
4189             /* For AIC we skip quant/dequant of INTRADC */
4190             q = 1;
4191             qadd=0;
4192         }
4193         q <<= RECON_SHIFT-3;
4194         /* note: block[0] is assumed to be positive */
4195         dc= block[0]*q;
4196 //        block[0] = (block[0] + (q >> 1)) / q;
4197         start_i = 1;
4198 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4199 //            bias= 1<<(QMAT_SHIFT-1);
4200         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4201             length     = s->intra_chroma_ac_vlc_length;
4202             last_length= s->intra_chroma_ac_vlc_last_length;
4203         } else {
4204             length     = s->intra_ac_vlc_length;
4205             last_length= s->intra_ac_vlc_last_length;
4206         }
4207     } else {
4208         scantable= s->inter_scantable.scantable;
4209         perm_scantable= s->inter_scantable.permutated;
4210         dc= 0;
4211         start_i = 0;
4212         length     = s->inter_ac_vlc_length;
4213         last_length= s->inter_ac_vlc_last_length;
4214     }
4215     last_non_zero = s->block_last_index[n];
4216
4217     dc += (1<<(RECON_SHIFT-1));
4218     for(i=0; i<64; i++){
4219         rem[i] = dc - (orig[i] << RECON_SHIFT); // FIXME use orig directly instead of copying to rem[]
4220     }
4221
4222     sum=0;
4223     for(i=0; i<64; i++){
4224         int one= 36;
4225         int qns=4;
4226         int w;
4227
4228         w= FFABS(weight[i]) + qns*one;
4229         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4230
4231         weight[i] = w;
4232 //        w=weight[i] = (63*qns + (w/2)) / w;
4233
4234         av_assert2(w>0);
4235         av_assert2(w<(1<<6));
4236         sum += w*w;
4237     }
4238     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4239
4240     run=0;
4241     rle_index=0;
4242     for(i=start_i; i<=last_non_zero; i++){
4243         int j= perm_scantable[i];
4244         const int level= block[j];
4245         int coeff;
4246
4247         if(level){
4248             if(level<0) coeff= qmul*level - qadd;
4249             else        coeff= qmul*level + qadd;
4250             run_tab[rle_index++]=run;
4251             run=0;
4252
4253             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4254         }else{
4255             run++;
4256         }
4257     }
4258
4259     for(;;){
4260         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4261         int best_coeff=0;
4262         int best_change=0;
4263         int run2, best_unquant_change=0, analyze_gradient;
4264         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4265
4266         if(analyze_gradient){
4267             for(i=0; i<64; i++){
4268                 int w= weight[i];
4269
4270                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4271             }
4272             s->fdsp.fdct(d1);
4273         }
4274
4275         if(start_i){
4276             const int level= block[0];
4277             int change, old_coeff;
4278
4279             av_assert2(s->mb_intra);
4280
4281             old_coeff= q*level;
4282
4283             for(change=-1; change<=1; change+=2){
4284                 int new_level= level + change;
4285                 int score, new_coeff;
4286
4287                 new_coeff= q*new_level;
4288                 if(new_coeff >= 2048 || new_coeff < 0)
4289                     continue;
4290
4291                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4292                                                   new_coeff - old_coeff);
4293                 if(score<best_score){
4294                     best_score= score;
4295                     best_coeff= 0;
4296                     best_change= change;
4297                     best_unquant_change= new_coeff - old_coeff;
4298                 }
4299             }
4300         }
4301
4302         run=0;
4303         rle_index=0;
4304         run2= run_tab[rle_index++];
4305         prev_level=0;
4306         prev_run=0;
4307
4308         for(i=start_i; i<64; i++){
4309             int j= perm_scantable[i];
4310             const int level= block[j];
4311             int change, old_coeff;
4312
4313             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4314                 break;
4315
4316             if(level){
4317                 if(level<0) old_coeff= qmul*level - qadd;
4318                 else        old_coeff= qmul*level + qadd;
4319                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4320             }else{
4321                 old_coeff=0;
4322                 run2--;
4323                 av_assert2(run2>=0 || i >= last_non_zero );
4324             }
4325
4326             for(change=-1; change<=1; change+=2){
4327                 int new_level= level + change;
4328                 int score, new_coeff, unquant_change;
4329
4330                 score=0;
4331                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4332                    continue;
4333
4334                 if(new_level){
4335                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4336                     else            new_coeff= qmul*new_level + qadd;
4337                     if(new_coeff >= 2048 || new_coeff <= -2048)
4338                         continue;
4339                     //FIXME check for overflow
4340
4341                     if(level){
4342                         if(level < 63 && level > -63){
4343                             if(i < last_non_zero)
4344                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4345                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4346                             else
4347                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4348                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4349                         }
4350                     }else{
4351                         av_assert2(FFABS(new_level)==1);
4352
4353                         if(analyze_gradient){
4354                             int g= d1[ scantable[i] ];
4355                             if(g && (g^new_level) >= 0)
4356                                 continue;
4357                         }
4358
4359                         if(i < last_non_zero){
4360                             int next_i= i + run2 + 1;
4361                             int next_level= block[ perm_scantable[next_i] ] + 64;
4362
4363                             if(next_level&(~127))
4364                                 next_level= 0;
4365
4366                             if(next_i < last_non_zero)
4367                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4368                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4369                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4370                             else
4371                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4372                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4373                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4374                         }else{
4375                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4376                             if(prev_level){
4377                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4378                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4379                             }
4380                         }
4381                     }
4382                 }else{
4383                     new_coeff=0;
4384                     av_assert2(FFABS(level)==1);
4385
4386                     if(i < last_non_zero){
4387                         int next_i= i + run2 + 1;
4388                         int next_level= block[ perm_scantable[next_i] ] + 64;
4389
4390                         if(next_level&(~127))
4391                             next_level= 0;
4392
4393                         if(next_i < last_non_zero)
4394                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4395                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4396                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4397                         else
4398                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4399                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4400                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4401                     }else{
4402                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4403                         if(prev_level){
4404                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4405                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4406                         }
4407                     }
4408                 }
4409
4410                 score *= lambda;
4411
4412                 unquant_change= new_coeff - old_coeff;
4413                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4414
4415                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4416                                                    unquant_change);
4417                 if(score<best_score){
4418                     best_score= score;
4419                     best_coeff= i;
4420                     best_change= change;
4421                     best_unquant_change= unquant_change;
4422                 }
4423             }
4424             if(level){
4425                 prev_level= level + 64;
4426                 if(prev_level&(~127))
4427                     prev_level= 0;
4428                 prev_run= run;
4429                 run=0;
4430             }else{
4431                 run++;
4432             }
4433         }
4434
4435         if(best_change){
4436             int j= perm_scantable[ best_coeff ];
4437
4438             block[j] += best_change;
4439
4440             if(best_coeff > last_non_zero){
4441                 last_non_zero= best_coeff;
4442                 av_assert2(block[j]);
4443             }else{
4444                 for(; last_non_zero>=start_i; last_non_zero--){
4445                     if(block[perm_scantable[last_non_zero]])
4446                         break;
4447                 }
4448             }
4449
4450             run=0;
4451             rle_index=0;
4452             for(i=start_i; i<=last_non_zero; i++){
4453                 int j= perm_scantable[i];
4454                 const int level= block[j];
4455
4456                  if(level){
4457                      run_tab[rle_index++]=run;
4458                      run=0;
4459                  }else{
4460                      run++;
4461                  }
4462             }
4463
4464             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4465         }else{
4466             break;
4467         }
4468     }
4469
4470     return last_non_zero;
4471 }
4472
4473 /**
4474  * Permute an 8x8 block according to permutation.
4475  * @param block the block which will be permuted according to
4476  *              the given permutation vector
4477  * @param permutation the permutation vector
4478  * @param last the last non zero coefficient in scantable order, used to
4479  *             speed the permutation up
4480  * @param scantable the used scantable, this is only used to speed the
4481  *                  permutation up, the block is not (inverse) permutated
4482  *                  to scantable order!
4483  */
4484 void ff_block_permute(int16_t *block, uint8_t *permutation,
4485                       const uint8_t *scantable, int last)
4486 {
4487     int i;
4488     int16_t temp[64];
4489
4490     if (last <= 0)
4491         return;
4492     //FIXME it is ok but not clean and might fail for some permutations
4493     // if (permutation[1] == 1)
4494     // return;
4495
4496     for (i = 0; i <= last; i++) {
4497         const int j = scantable[i];
4498         temp[j] = block[j];
4499         block[j] = 0;
4500     }
4501
4502     for (i = 0; i <= last; i++) {
4503         const int j = scantable[i];
4504         const int perm_j = permutation[j];
4505         block[perm_j] = temp[j];
4506     }
4507 }
4508
4509 int ff_dct_quantize_c(MpegEncContext *s,
4510                         int16_t *block, int n,
4511                         int qscale, int *overflow)
4512 {
4513     int i, j, level, last_non_zero, q, start_i;
4514     const int *qmat;
4515     const uint8_t *scantable;
4516     int bias;
4517     int max=0;
4518     unsigned int threshold1, threshold2;
4519
4520     s->fdsp.fdct(block);
4521
4522     if(s->dct_error_sum)
4523         s->denoise_dct(s, block);
4524
4525     if (s->mb_intra) {
4526         scantable= s->intra_scantable.scantable;
4527         if (!s->h263_aic) {
4528             if (n < 4)
4529                 q = s->y_dc_scale;
4530             else
4531                 q = s->c_dc_scale;
4532             q = q << 3;
4533         } else
4534             /* For AIC we skip quant/dequant of INTRADC */
4535             q = 1 << 3;
4536
4537         /* note: block[0] is assumed to be positive */
4538         block[0] = (block[0] + (q >> 1)) / q;
4539         start_i = 1;
4540         last_non_zero = 0;
4541         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4542         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4543     } else {
4544         scantable= s->inter_scantable.scantable;
4545         start_i = 0;
4546         last_non_zero = -1;
4547         qmat = s->q_inter_matrix[qscale];
4548         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4549     }
4550     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4551     threshold2= (threshold1<<1);
4552     for(i=63;i>=start_i;i--) {
4553         j = scantable[i];
4554         level = block[j] * qmat[j];
4555
4556         if(((unsigned)(level+threshold1))>threshold2){
4557             last_non_zero = i;
4558             break;
4559         }else{
4560             block[j]=0;
4561         }
4562     }
4563     for(i=start_i; i<=last_non_zero; i++) {
4564         j = scantable[i];
4565         level = block[j] * qmat[j];
4566
4567 //        if(   bias+level >= (1<<QMAT_SHIFT)
4568 //           || bias-level >= (1<<QMAT_SHIFT)){
4569         if(((unsigned)(level+threshold1))>threshold2){
4570             if(level>0){
4571                 level= (bias + level)>>QMAT_SHIFT;
4572                 block[j]= level;
4573             }else{
4574                 level= (bias - level)>>QMAT_SHIFT;
4575                 block[j]= -level;
4576             }
4577             max |=level;
4578         }else{
4579             block[j]=0;
4580         }
4581     }
4582     *overflow= s->max_qcoeff < max; //overflow might have happened
4583
4584     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4585     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4586         ff_block_permute(block, s->idsp.idct_permutation,
4587                       scantable, last_non_zero);
4588
4589     return last_non_zero;
4590 }
4591
4592 #define OFFSET(x) offsetof(MpegEncContext, x)
4593 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4594 static const AVOption h263_options[] = {
4595     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4596     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4597     FF_MPV_COMMON_OPTS
4598 #if FF_API_MPEGVIDEO_OPTS
4599     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
4600     FF_MPV_DEPRECATED_A53_CC_OPT
4601     FF_MPV_DEPRECATED_MATRIX_OPT
4602     FF_MPV_DEPRECATED_BFRAME_OPTS
4603 #endif
4604     { NULL },
4605 };
4606
4607 static const AVClass h263_class = {
4608     .class_name = "H.263 encoder",
4609     .item_name  = av_default_item_name,
4610     .option     = h263_options,
4611     .version    = LIBAVUTIL_VERSION_INT,
4612 };
4613
4614 AVCodec ff_h263_encoder = {
4615     .name           = "h263",
4616     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4617     .type           = AVMEDIA_TYPE_VIDEO,
4618     .id             = AV_CODEC_ID_H263,
4619     .priv_data_size = sizeof(MpegEncContext),
4620     .init           = ff_mpv_encode_init,
4621     .encode2        = ff_mpv_encode_picture,
4622     .close          = ff_mpv_encode_end,
4623     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4624     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4625     .priv_class     = &h263_class,
4626 };
4627
4628 static const AVOption h263p_options[] = {
4629     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus),       AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4630     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4631     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
4632     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE},
4633     FF_MPV_COMMON_OPTS
4634 #if FF_API_MPEGVIDEO_OPTS
4635     FF_MPV_DEPRECATED_MPEG_QUANT_OPT
4636     FF_MPV_DEPRECATED_A53_CC_OPT
4637     FF_MPV_DEPRECATED_MATRIX_OPT
4638     FF_MPV_DEPRECATED_BFRAME_OPTS
4639 #endif
4640     { NULL },
4641 };
4642 static const AVClass h263p_class = {
4643     .class_name = "H.263p encoder",
4644     .item_name  = av_default_item_name,
4645     .option     = h263p_options,
4646     .version    = LIBAVUTIL_VERSION_INT,
4647 };
4648
4649 AVCodec ff_h263p_encoder = {
4650     .name           = "h263p",
4651     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4652     .type           = AVMEDIA_TYPE_VIDEO,
4653     .id             = AV_CODEC_ID_H263P,
4654     .priv_data_size = sizeof(MpegEncContext),
4655     .init           = ff_mpv_encode_init,
4656     .encode2        = ff_mpv_encode_picture,
4657     .close          = ff_mpv_encode_end,
4658     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4659     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4660     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4661     .priv_class     = &h263p_class,
4662 };
4663
4664 static const AVClass msmpeg4v2_class = {
4665     .class_name = "msmpeg4v2 encoder",
4666     .item_name  = av_default_item_name,
4667     .option     = ff_mpv_generic_options,
4668     .version    = LIBAVUTIL_VERSION_INT,
4669 };
4670
4671 AVCodec ff_msmpeg4v2_encoder = {
4672     .name           = "msmpeg4v2",
4673     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4674     .type           = AVMEDIA_TYPE_VIDEO,
4675     .id             = AV_CODEC_ID_MSMPEG4V2,
4676     .priv_data_size = sizeof(MpegEncContext),
4677     .init           = ff_mpv_encode_init,
4678     .encode2        = ff_mpv_encode_picture,
4679     .close          = ff_mpv_encode_end,
4680     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4681     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4682     .priv_class     = &msmpeg4v2_class,
4683 };
4684
4685 static const AVClass msmpeg4v3_class = {
4686     .class_name = "msmpeg4v3 encoder",
4687     .item_name  = av_default_item_name,
4688     .option     = ff_mpv_generic_options,
4689     .version    = LIBAVUTIL_VERSION_INT,
4690 };
4691
4692 AVCodec ff_msmpeg4v3_encoder = {
4693     .name           = "msmpeg4",
4694     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4695     .type           = AVMEDIA_TYPE_VIDEO,
4696     .id             = AV_CODEC_ID_MSMPEG4V3,
4697     .priv_data_size = sizeof(MpegEncContext),
4698     .init           = ff_mpv_encode_init,
4699     .encode2        = ff_mpv_encode_picture,
4700     .close          = ff_mpv_encode_end,
4701     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4702     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4703     .priv_class     = &msmpeg4v3_class,
4704 };
4705
4706 static const AVClass wmv1_class = {
4707     .class_name = "wmv1 encoder",
4708     .item_name  = av_default_item_name,
4709     .option     = ff_mpv_generic_options,
4710     .version    = LIBAVUTIL_VERSION_INT,
4711 };
4712
4713 AVCodec ff_wmv1_encoder = {
4714     .name           = "wmv1",
4715     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4716     .type           = AVMEDIA_TYPE_VIDEO,
4717     .id             = AV_CODEC_ID_WMV1,
4718     .priv_data_size = sizeof(MpegEncContext),
4719     .init           = ff_mpv_encode_init,
4720     .encode2        = ff_mpv_encode_picture,
4721     .close          = ff_mpv_encode_end,
4722     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
4723     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4724     .priv_class     = &wmv1_class,
4725 };