]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
avcodec/mpegvideo_enc: Avoid fine lambda steps in VBV retry code when RD is not in use
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /*
26  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
27  */
28
29 /**
30  * @file
31  * The simplest mpeg encoder (well, it was the simplest!).
32  */
33
34 #include <stdint.h>
35
36 #include "libavutil/internal.h"
37 #include "libavutil/intmath.h"
38 #include "libavutil/mathematics.h"
39 #include "libavutil/pixdesc.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/timer.h"
42 #include "avcodec.h"
43 #include "dct.h"
44 #include "idctdsp.h"
45 #include "mpeg12.h"
46 #include "mpegvideo.h"
47 #include "mpegvideodata.h"
48 #include "h261.h"
49 #include "h263.h"
50 #include "h263data.h"
51 #include "mjpegenc_common.h"
52 #include "mathops.h"
53 #include "mpegutils.h"
54 #include "mjpegenc.h"
55 #include "msmpeg4.h"
56 #include "pixblockdsp.h"
57 #include "qpeldsp.h"
58 #include "faandct.h"
59 #include "thread.h"
60 #include "aandcttab.h"
61 #include "flv.h"
62 #include "mpeg4video.h"
63 #include "internal.h"
64 #include "bytestream.h"
65 #include "wmv2.h"
66 #include "rv10.h"
67 #include <limits.h>
68 #include "sp5x.h"
69
70 #define QUANT_BIAS_SHIFT 8
71
72 #define QMAT_SHIFT_MMX 16
73 #define QMAT_SHIFT 21
74
75 static int encode_picture(MpegEncContext *s, int picture_number);
76 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
77 static int sse_mb(MpegEncContext *s);
78 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
79 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
80
81 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
82 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
83
84 const AVOption ff_mpv_generic_options[] = {
85     FF_MPV_COMMON_OPTS
86     { NULL },
87 };
88
89 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
90                        uint16_t (*qmat16)[2][64],
91                        const uint16_t *quant_matrix,
92                        int bias, int qmin, int qmax, int intra)
93 {
94     FDCTDSPContext *fdsp = &s->fdsp;
95     int qscale;
96     int shift = 0;
97
98     for (qscale = qmin; qscale <= qmax; qscale++) {
99         int i;
100         int qscale2;
101
102         if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
103         else                 qscale2 = qscale << 1;
104
105         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
106 #if CONFIG_FAANDCT
107             fdsp->fdct == ff_faandct            ||
108 #endif /* CONFIG_FAANDCT */
109             fdsp->fdct == ff_jpeg_fdct_islow_10) {
110             for (i = 0; i < 64; i++) {
111                 const int j = s->idsp.idct_permutation[i];
112                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
113                 /* 16 <= qscale * quant_matrix[i] <= 7905
114                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
115                  *             19952 <=              x  <= 249205026
116                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
117                  *           3444240 >= (1 << 36) / (x) >= 275 */
118
119                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
120             }
121         } else if (fdsp->fdct == ff_fdct_ifast) {
122             for (i = 0; i < 64; i++) {
123                 const int j = s->idsp.idct_permutation[i];
124                 int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
125                 /* 16 <= qscale * quant_matrix[i] <= 7905
126                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
127                  *             19952 <=              x  <= 249205026
128                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
129                  *           3444240 >= (1 << 36) / (x) >= 275 */
130
131                 qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
132             }
133         } else {
134             for (i = 0; i < 64; i++) {
135                 const int j = s->idsp.idct_permutation[i];
136                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
137                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
138                  * Assume x = qscale * quant_matrix[i]
139                  * So             16 <=              x  <= 7905
140                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
141                  * so          32768 >= (1 << 19) / (x) >= 67 */
142                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
143                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
144                 //                    (qscale * quant_matrix[i]);
145                 qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
146
147                 if (qmat16[qscale][0][i] == 0 ||
148                     qmat16[qscale][0][i] == 128 * 256)
149                     qmat16[qscale][0][i] = 128 * 256 - 1;
150                 qmat16[qscale][1][i] =
151                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
152                                 qmat16[qscale][0][i]);
153             }
154         }
155
156         for (i = intra; i < 64; i++) {
157             int64_t max = 8191;
158             if (fdsp->fdct == ff_fdct_ifast) {
159                 max = (8191LL * ff_aanscales[i]) >> 14;
160             }
161             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
162                 shift++;
163             }
164         }
165     }
166     if (shift) {
167         av_log(NULL, AV_LOG_INFO,
168                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
169                QMAT_SHIFT - shift);
170     }
171 }
172
173 static inline void update_qscale(MpegEncContext *s)
174 {
175     if (s->q_scale_type == 1 && 0) {
176         int i;
177         int bestdiff=INT_MAX;
178         int best = 1;
179
180         for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
181             int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
182             if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
183                 (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
184                 continue;
185             if (diff < bestdiff) {
186                 bestdiff = diff;
187                 best = i;
188             }
189         }
190         s->qscale = best;
191     } else {
192         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
193                     (FF_LAMBDA_SHIFT + 7);
194         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
195     }
196
197     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
198                  FF_LAMBDA_SHIFT;
199 }
200
201 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
202 {
203     int i;
204
205     if (matrix) {
206         put_bits(pb, 1, 1);
207         for (i = 0; i < 64; i++) {
208             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
209         }
210     } else
211         put_bits(pb, 1, 0);
212 }
213
214 /**
215  * init s->current_picture.qscale_table from s->lambda_table
216  */
217 void ff_init_qscale_tab(MpegEncContext *s)
218 {
219     int8_t * const qscale_table = s->current_picture.qscale_table;
220     int i;
221
222     for (i = 0; i < s->mb_num; i++) {
223         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
224         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
225         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
226                                                   s->avctx->qmax);
227     }
228 }
229
230 static void update_duplicate_context_after_me(MpegEncContext *dst,
231                                               MpegEncContext *src)
232 {
233 #define COPY(a) dst->a= src->a
234     COPY(pict_type);
235     COPY(current_picture);
236     COPY(f_code);
237     COPY(b_code);
238     COPY(qscale);
239     COPY(lambda);
240     COPY(lambda2);
241     COPY(picture_in_gop_number);
242     COPY(gop_picture_number);
243     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
244     COPY(progressive_frame);    // FIXME don't set in encode_header
245     COPY(partitioned_frame);    // FIXME don't set in encode_header
246 #undef COPY
247 }
248
249 /**
250  * Set the given MpegEncContext to defaults for encoding.
251  * the changed fields will not depend upon the prior state of the MpegEncContext.
252  */
253 static void mpv_encode_defaults(MpegEncContext *s)
254 {
255     int i;
256     ff_mpv_common_defaults(s);
257
258     for (i = -16; i < 16; i++) {
259         default_fcode_tab[i + MAX_MV] = 1;
260     }
261     s->me.mv_penalty = default_mv_penalty;
262     s->fcode_tab     = default_fcode_tab;
263
264     s->input_picture_number  = 0;
265     s->picture_in_gop_number = 0;
266 }
267
268 av_cold int ff_dct_encode_init(MpegEncContext *s) {
269     if (ARCH_X86)
270         ff_dct_encode_init_x86(s);
271
272     if (CONFIG_H263_ENCODER)
273         ff_h263dsp_init(&s->h263dsp);
274     if (!s->dct_quantize)
275         s->dct_quantize = ff_dct_quantize_c;
276     if (!s->denoise_dct)
277         s->denoise_dct  = denoise_dct_c;
278     s->fast_dct_quantize = s->dct_quantize;
279     if (s->avctx->trellis)
280         s->dct_quantize  = dct_quantize_trellis_c;
281
282     return 0;
283 }
284
285 /* init video encoder */
286 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
287 {
288     MpegEncContext *s = avctx->priv_data;
289     int i, ret, format_supported;
290
291     mpv_encode_defaults(s);
292
293     switch (avctx->codec_id) {
294     case AV_CODEC_ID_MPEG2VIDEO:
295         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
296             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
297             av_log(avctx, AV_LOG_ERROR,
298                    "only YUV420 and YUV422 are supported\n");
299             return -1;
300         }
301         break;
302     case AV_CODEC_ID_MJPEG:
303     case AV_CODEC_ID_AMV:
304         format_supported = 0;
305         /* JPEG color space */
306         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
307             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
308             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
309             (avctx->color_range == AVCOL_RANGE_JPEG &&
310              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
311               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
312               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
313             format_supported = 1;
314         /* MPEG color space */
315         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
316                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
317                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
318                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
319             format_supported = 1;
320
321         if (!format_supported) {
322             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
323             return -1;
324         }
325         break;
326     default:
327         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
328             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
329             return -1;
330         }
331     }
332
333     switch (avctx->pix_fmt) {
334     case AV_PIX_FMT_YUVJ444P:
335     case AV_PIX_FMT_YUV444P:
336         s->chroma_format = CHROMA_444;
337         break;
338     case AV_PIX_FMT_YUVJ422P:
339     case AV_PIX_FMT_YUV422P:
340         s->chroma_format = CHROMA_422;
341         break;
342     case AV_PIX_FMT_YUVJ420P:
343     case AV_PIX_FMT_YUV420P:
344     default:
345         s->chroma_format = CHROMA_420;
346         break;
347     }
348
349     s->bit_rate = avctx->bit_rate;
350     s->width    = avctx->width;
351     s->height   = avctx->height;
352     if (avctx->gop_size > 600 &&
353         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
354         av_log(avctx, AV_LOG_WARNING,
355                "keyframe interval too large!, reducing it from %d to %d\n",
356                avctx->gop_size, 600);
357         avctx->gop_size = 600;
358     }
359     s->gop_size     = avctx->gop_size;
360     s->avctx        = avctx;
361     if (avctx->max_b_frames > MAX_B_FRAMES) {
362         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
363                "is %d.\n", MAX_B_FRAMES);
364         avctx->max_b_frames = MAX_B_FRAMES;
365     }
366     s->max_b_frames = avctx->max_b_frames;
367     s->codec_id     = avctx->codec->id;
368     s->strict_std_compliance = avctx->strict_std_compliance;
369     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
370     s->mpeg_quant         = avctx->mpeg_quant;
371     s->rtp_mode           = !!avctx->rtp_payload_size;
372     s->intra_dc_precision = avctx->intra_dc_precision;
373
374     // workaround some differences between how applications specify dc precision
375     if (s->intra_dc_precision < 0) {
376         s->intra_dc_precision += 8;
377     } else if (s->intra_dc_precision >= 8)
378         s->intra_dc_precision -= 8;
379
380     if (s->intra_dc_precision < 0) {
381         av_log(avctx, AV_LOG_ERROR,
382                 "intra dc precision must be positive, note some applications use"
383                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
384         return AVERROR(EINVAL);
385     }
386
387     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
388         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
389         return AVERROR(EINVAL);
390     }
391     s->user_specified_pts = AV_NOPTS_VALUE;
392
393     if (s->gop_size <= 1) {
394         s->intra_only = 1;
395         s->gop_size   = 12;
396     } else {
397         s->intra_only = 0;
398     }
399
400 #if FF_API_MOTION_EST
401 FF_DISABLE_DEPRECATION_WARNINGS
402     s->me_method = avctx->me_method;
403 FF_ENABLE_DEPRECATION_WARNINGS
404 #endif
405
406     /* Fixed QSCALE */
407     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
408
409 #if FF_API_MPV_OPT
410     FF_DISABLE_DEPRECATION_WARNINGS
411     if (avctx->border_masking != 0.0)
412         s->border_masking = avctx->border_masking;
413     FF_ENABLE_DEPRECATION_WARNINGS
414 #endif
415
416     s->adaptive_quant = (s->avctx->lumi_masking ||
417                          s->avctx->dark_masking ||
418                          s->avctx->temporal_cplx_masking ||
419                          s->avctx->spatial_cplx_masking  ||
420                          s->avctx->p_masking      ||
421                          s->border_masking ||
422                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
423                         !s->fixed_qscale;
424
425     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
426
427     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
428         switch(avctx->codec_id) {
429         case AV_CODEC_ID_MPEG1VIDEO:
430         case AV_CODEC_ID_MPEG2VIDEO:
431             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
432             break;
433         case AV_CODEC_ID_MPEG4:
434         case AV_CODEC_ID_MSMPEG4V1:
435         case AV_CODEC_ID_MSMPEG4V2:
436         case AV_CODEC_ID_MSMPEG4V3:
437             if       (avctx->rc_max_rate >= 15000000) {
438                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
439             } else if(avctx->rc_max_rate >=  2000000) {
440                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
441             } else if(avctx->rc_max_rate >=   384000) {
442                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
443             } else
444                 avctx->rc_buffer_size = 40;
445             avctx->rc_buffer_size *= 16384;
446             break;
447         }
448         if (avctx->rc_buffer_size) {
449             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
450         }
451     }
452
453     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
454         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
455         return -1;
456     }
457
458     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
459         av_log(avctx, AV_LOG_INFO,
460                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
461     }
462
463     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
464         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
465         return -1;
466     }
467
468     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
469         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
470         return -1;
471     }
472
473     if (avctx->rc_max_rate &&
474         avctx->rc_max_rate == avctx->bit_rate &&
475         avctx->rc_max_rate != avctx->rc_min_rate) {
476         av_log(avctx, AV_LOG_INFO,
477                "impossible bitrate constraints, this will fail\n");
478     }
479
480     if (avctx->rc_buffer_size &&
481         avctx->bit_rate * (int64_t)avctx->time_base.num >
482             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
483         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
484         return -1;
485     }
486
487     if (!s->fixed_qscale &&
488         avctx->bit_rate * av_q2d(avctx->time_base) >
489             avctx->bit_rate_tolerance) {
490         av_log(avctx, AV_LOG_WARNING,
491                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, (int64_t)avctx->bit_rate);
492         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
493     }
494
495     if (s->avctx->rc_max_rate &&
496         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
497         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
498          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
499         90000LL * (avctx->rc_buffer_size - 1) >
500             s->avctx->rc_max_rate * 0xFFFFLL) {
501         av_log(avctx, AV_LOG_INFO,
502                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
503                "specified vbv buffer is too large for the given bitrate!\n");
504     }
505
506     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
507         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
508         s->codec_id != AV_CODEC_ID_FLV1) {
509         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
510         return -1;
511     }
512
513     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
514         av_log(avctx, AV_LOG_ERROR,
515                "OBMC is only supported with simple mb decision\n");
516         return -1;
517     }
518
519     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
520         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
521         return -1;
522     }
523
524     if (s->max_b_frames                    &&
525         s->codec_id != AV_CODEC_ID_MPEG4      &&
526         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
527         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
528         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
529         return -1;
530     }
531     if (s->max_b_frames < 0) {
532         av_log(avctx, AV_LOG_ERROR,
533                "max b frames must be 0 or positive for mpegvideo based encoders\n");
534         return -1;
535     }
536
537     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
538          s->codec_id == AV_CODEC_ID_H263  ||
539          s->codec_id == AV_CODEC_ID_H263P) &&
540         (avctx->sample_aspect_ratio.num > 255 ||
541          avctx->sample_aspect_ratio.den > 255)) {
542         av_log(avctx, AV_LOG_WARNING,
543                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
544                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
545         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
546                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
547     }
548
549     if ((s->codec_id == AV_CODEC_ID_H263  ||
550          s->codec_id == AV_CODEC_ID_H263P) &&
551         (avctx->width  > 2048 ||
552          avctx->height > 1152 )) {
553         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
554         return -1;
555     }
556     if ((s->codec_id == AV_CODEC_ID_H263  ||
557          s->codec_id == AV_CODEC_ID_H263P) &&
558         ((avctx->width &3) ||
559          (avctx->height&3) )) {
560         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
561         return -1;
562     }
563
564     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
565         (avctx->width  > 4095 ||
566          avctx->height > 4095 )) {
567         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
568         return -1;
569     }
570
571     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
572         (avctx->width  > 16383 ||
573          avctx->height > 16383 )) {
574         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
575         return -1;
576     }
577
578     if (s->codec_id == AV_CODEC_ID_RV10 &&
579         (avctx->width &15 ||
580          avctx->height&15 )) {
581         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
582         return AVERROR(EINVAL);
583     }
584
585     if (s->codec_id == AV_CODEC_ID_RV20 &&
586         (avctx->width &3 ||
587          avctx->height&3 )) {
588         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
589         return AVERROR(EINVAL);
590     }
591
592     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
593          s->codec_id == AV_CODEC_ID_WMV2) &&
594          avctx->width & 1) {
595          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
596          return -1;
597     }
598
599     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
600         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
601         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
602         return -1;
603     }
604
605     // FIXME mpeg2 uses that too
606     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
607                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
608         av_log(avctx, AV_LOG_ERROR,
609                "mpeg2 style quantization not supported by codec\n");
610         return -1;
611     }
612
613     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
614         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
615         return -1;
616     }
617
618     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
619         s->avctx->mb_decision != FF_MB_DECISION_RD) {
620         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
621         return -1;
622     }
623
624     if (s->avctx->scenechange_threshold < 1000000000 &&
625         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
626         av_log(avctx, AV_LOG_ERROR,
627                "closed gop with scene change detection are not supported yet, "
628                "set threshold to 1000000000\n");
629         return -1;
630     }
631
632     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
633         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
634             av_log(avctx, AV_LOG_ERROR,
635                   "low delay forcing is only available for mpeg2\n");
636             return -1;
637         }
638         if (s->max_b_frames != 0) {
639             av_log(avctx, AV_LOG_ERROR,
640                    "b frames cannot be used with low delay\n");
641             return -1;
642         }
643     }
644
645     if (s->q_scale_type == 1) {
646         if (avctx->qmax > 28) {
647             av_log(avctx, AV_LOG_ERROR,
648                    "non linear quant only supports qmax <= 28 currently\n");
649             return -1;
650         }
651     }
652
653     if (s->avctx->thread_count > 1         &&
654         s->codec_id != AV_CODEC_ID_MPEG4      &&
655         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
656         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
657         s->codec_id != AV_CODEC_ID_MJPEG      &&
658         (s->codec_id != AV_CODEC_ID_H263P)) {
659         av_log(avctx, AV_LOG_ERROR,
660                "multi threaded encoding not supported by codec\n");
661         return -1;
662     }
663
664     if (s->avctx->thread_count < 1) {
665         av_log(avctx, AV_LOG_ERROR,
666                "automatic thread number detection not supported by codec, "
667                "patch welcome\n");
668         return -1;
669     }
670
671     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
672         s->rtp_mode = 1;
673
674     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
675         s->h263_slice_structured = 1;
676
677     if (!avctx->time_base.den || !avctx->time_base.num) {
678         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
679         return -1;
680     }
681
682     if (avctx->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
683         av_log(avctx, AV_LOG_INFO,
684                "notice: b_frame_strategy only affects the first pass\n");
685         avctx->b_frame_strategy = 0;
686     }
687
688     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
689     if (i > 1) {
690         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
691         avctx->time_base.den /= i;
692         avctx->time_base.num /= i;
693         //return -1;
694     }
695
696     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
697         // (a + x * 3 / 8) / x
698         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
699         s->inter_quant_bias = 0;
700     } else {
701         s->intra_quant_bias = 0;
702         // (a - x / 4) / x
703         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
704     }
705
706     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
707         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
708         return AVERROR(EINVAL);
709     }
710
711 #if FF_API_QUANT_BIAS
712 FF_DISABLE_DEPRECATION_WARNINGS
713     if (s->intra_quant_bias == FF_DEFAULT_QUANT_BIAS &&
714         avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
715         s->intra_quant_bias = avctx->intra_quant_bias;
716     if (s->inter_quant_bias == FF_DEFAULT_QUANT_BIAS &&
717         avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
718         s->inter_quant_bias = avctx->inter_quant_bias;
719 FF_ENABLE_DEPRECATION_WARNINGS
720 #endif
721
722     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
723
724     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
725         s->avctx->time_base.den > (1 << 16) - 1) {
726         av_log(avctx, AV_LOG_ERROR,
727                "timebase %d/%d not supported by MPEG 4 standard, "
728                "the maximum admitted value for the timebase denominator "
729                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
730                (1 << 16) - 1);
731         return -1;
732     }
733     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
734
735     switch (avctx->codec->id) {
736     case AV_CODEC_ID_MPEG1VIDEO:
737         s->out_format = FMT_MPEG1;
738         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
739         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
740         break;
741     case AV_CODEC_ID_MPEG2VIDEO:
742         s->out_format = FMT_MPEG1;
743         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
744         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
745         s->rtp_mode   = 1;
746         break;
747     case AV_CODEC_ID_MJPEG:
748     case AV_CODEC_ID_AMV:
749         s->out_format = FMT_MJPEG;
750         s->intra_only = 1; /* force intra only for jpeg */
751         if (!CONFIG_MJPEG_ENCODER ||
752             ff_mjpeg_encode_init(s) < 0)
753             return -1;
754         avctx->delay = 0;
755         s->low_delay = 1;
756         break;
757     case AV_CODEC_ID_H261:
758         if (!CONFIG_H261_ENCODER)
759             return -1;
760         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
761             av_log(avctx, AV_LOG_ERROR,
762                    "The specified picture size of %dx%d is not valid for the "
763                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
764                     s->width, s->height);
765             return -1;
766         }
767         s->out_format = FMT_H261;
768         avctx->delay  = 0;
769         s->low_delay  = 1;
770         s->rtp_mode   = 0; /* Sliced encoding not supported */
771         break;
772     case AV_CODEC_ID_H263:
773         if (!CONFIG_H263_ENCODER)
774             return -1;
775         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
776                              s->width, s->height) == 8) {
777             av_log(avctx, AV_LOG_ERROR,
778                    "The specified picture size of %dx%d is not valid for "
779                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
780                    "352x288, 704x576, and 1408x1152. "
781                    "Try H.263+.\n", s->width, s->height);
782             return -1;
783         }
784         s->out_format = FMT_H263;
785         avctx->delay  = 0;
786         s->low_delay  = 1;
787         break;
788     case AV_CODEC_ID_H263P:
789         s->out_format = FMT_H263;
790         s->h263_plus  = 1;
791         /* Fx */
792         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
793         s->modified_quant  = s->h263_aic;
794         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
795         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
796
797         /* /Fx */
798         /* These are just to be sure */
799         avctx->delay = 0;
800         s->low_delay = 1;
801         break;
802     case AV_CODEC_ID_FLV1:
803         s->out_format      = FMT_H263;
804         s->h263_flv        = 2; /* format = 1; 11-bit codes */
805         s->unrestricted_mv = 1;
806         s->rtp_mode  = 0; /* don't allow GOB */
807         avctx->delay = 0;
808         s->low_delay = 1;
809         break;
810     case AV_CODEC_ID_RV10:
811         s->out_format = FMT_H263;
812         avctx->delay  = 0;
813         s->low_delay  = 1;
814         break;
815     case AV_CODEC_ID_RV20:
816         s->out_format      = FMT_H263;
817         avctx->delay       = 0;
818         s->low_delay       = 1;
819         s->modified_quant  = 1;
820         s->h263_aic        = 1;
821         s->h263_plus       = 1;
822         s->loop_filter     = 1;
823         s->unrestricted_mv = 0;
824         break;
825     case AV_CODEC_ID_MPEG4:
826         s->out_format      = FMT_H263;
827         s->h263_pred       = 1;
828         s->unrestricted_mv = 1;
829         s->low_delay       = s->max_b_frames ? 0 : 1;
830         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
831         break;
832     case AV_CODEC_ID_MSMPEG4V2:
833         s->out_format      = FMT_H263;
834         s->h263_pred       = 1;
835         s->unrestricted_mv = 1;
836         s->msmpeg4_version = 2;
837         avctx->delay       = 0;
838         s->low_delay       = 1;
839         break;
840     case AV_CODEC_ID_MSMPEG4V3:
841         s->out_format        = FMT_H263;
842         s->h263_pred         = 1;
843         s->unrestricted_mv   = 1;
844         s->msmpeg4_version   = 3;
845         s->flipflop_rounding = 1;
846         avctx->delay         = 0;
847         s->low_delay         = 1;
848         break;
849     case AV_CODEC_ID_WMV1:
850         s->out_format        = FMT_H263;
851         s->h263_pred         = 1;
852         s->unrestricted_mv   = 1;
853         s->msmpeg4_version   = 4;
854         s->flipflop_rounding = 1;
855         avctx->delay         = 0;
856         s->low_delay         = 1;
857         break;
858     case AV_CODEC_ID_WMV2:
859         s->out_format        = FMT_H263;
860         s->h263_pred         = 1;
861         s->unrestricted_mv   = 1;
862         s->msmpeg4_version   = 5;
863         s->flipflop_rounding = 1;
864         avctx->delay         = 0;
865         s->low_delay         = 1;
866         break;
867     default:
868         return -1;
869     }
870
871     avctx->has_b_frames = !s->low_delay;
872
873     s->encoding = 1;
874
875     s->progressive_frame    =
876     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
877                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
878                                 s->alternate_scan);
879
880     /* init */
881     ff_mpv_idct_init(s);
882     if (ff_mpv_common_init(s) < 0)
883         return -1;
884
885     ff_fdctdsp_init(&s->fdsp, avctx);
886     ff_me_cmp_init(&s->mecc, avctx);
887     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
888     ff_pixblockdsp_init(&s->pdsp, avctx);
889     ff_qpeldsp_init(&s->qdsp);
890
891     if (s->msmpeg4_version) {
892         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
893                           2 * 2 * (MAX_LEVEL + 1) *
894                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
895     }
896     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
897
898     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
899     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
900     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
901     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
902     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
903     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
904     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
905                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
906     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
907                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
908
909     if (s->avctx->noise_reduction) {
910         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
911                           2 * 64 * sizeof(uint16_t), fail);
912     }
913
914     ff_dct_encode_init(s);
915
916     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
917         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
918
919     s->quant_precision = 5;
920
921     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
922     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
923
924     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
925         ff_h261_encode_init(s);
926     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
927         ff_h263_encode_init(s);
928     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
929         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
930             return ret;
931     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
932         && s->out_format == FMT_MPEG1)
933         ff_mpeg1_encode_init(s);
934
935     /* init q matrix */
936     for (i = 0; i < 64; i++) {
937         int j = s->idsp.idct_permutation[i];
938         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
939             s->mpeg_quant) {
940             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
941             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
942         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
943             s->intra_matrix[j] =
944             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
945         } else {
946             /* mpeg1/2 */
947             s->chroma_intra_matrix[j] =
948             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
949             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
950         }
951         if (s->avctx->intra_matrix)
952             s->intra_matrix[j] = s->avctx->intra_matrix[i];
953         if (s->avctx->inter_matrix)
954             s->inter_matrix[j] = s->avctx->inter_matrix[i];
955     }
956
957     /* precompute matrix */
958     /* for mjpeg, we do include qscale in the matrix */
959     if (s->out_format != FMT_MJPEG) {
960         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
961                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
962                           31, 1);
963         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
964                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
965                           31, 0);
966     }
967
968     if (ff_rate_control_init(s) < 0)
969         return -1;
970
971 #if FF_API_ERROR_RATE
972     FF_DISABLE_DEPRECATION_WARNINGS
973     if (avctx->error_rate)
974         s->error_rate = avctx->error_rate;
975     FF_ENABLE_DEPRECATION_WARNINGS;
976 #endif
977
978 #if FF_API_NORMALIZE_AQP
979     FF_DISABLE_DEPRECATION_WARNINGS
980     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
981         s->mpv_flags |= FF_MPV_FLAG_NAQ;
982     FF_ENABLE_DEPRECATION_WARNINGS;
983 #endif
984
985 #if FF_API_MV0
986     FF_DISABLE_DEPRECATION_WARNINGS
987     if (avctx->flags & CODEC_FLAG_MV0)
988         s->mpv_flags |= FF_MPV_FLAG_MV0;
989     FF_ENABLE_DEPRECATION_WARNINGS
990 #endif
991
992 #if FF_API_MPV_OPT
993     FF_DISABLE_DEPRECATION_WARNINGS
994     if (avctx->rc_qsquish != 0.0)
995         s->rc_qsquish = avctx->rc_qsquish;
996     if (avctx->rc_qmod_amp != 0.0)
997         s->rc_qmod_amp = avctx->rc_qmod_amp;
998     if (avctx->rc_qmod_freq)
999         s->rc_qmod_freq = avctx->rc_qmod_freq;
1000     if (avctx->rc_buffer_aggressivity != 1.0)
1001         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
1002     if (avctx->rc_initial_cplx != 0.0)
1003         s->rc_initial_cplx = avctx->rc_initial_cplx;
1004     if (avctx->lmin)
1005         s->lmin = avctx->lmin;
1006     if (avctx->lmax)
1007         s->lmax = avctx->lmax;
1008
1009     if (avctx->rc_eq) {
1010         av_freep(&s->rc_eq);
1011         s->rc_eq = av_strdup(avctx->rc_eq);
1012         if (!s->rc_eq)
1013             return AVERROR(ENOMEM);
1014     }
1015     FF_ENABLE_DEPRECATION_WARNINGS
1016 #endif
1017
1018     if (avctx->b_frame_strategy == 2) {
1019         for (i = 0; i < s->max_b_frames + 2; i++) {
1020             s->tmp_frames[i] = av_frame_alloc();
1021             if (!s->tmp_frames[i])
1022                 return AVERROR(ENOMEM);
1023
1024             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
1025             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
1026             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
1027
1028             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
1029             if (ret < 0)
1030                 return ret;
1031         }
1032     }
1033
1034     return 0;
1035 fail:
1036     ff_mpv_encode_end(avctx);
1037     return AVERROR_UNKNOWN;
1038 }
1039
1040 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1041 {
1042     MpegEncContext *s = avctx->priv_data;
1043     int i;
1044
1045     ff_rate_control_uninit(s);
1046
1047     ff_mpv_common_end(s);
1048     if (CONFIG_MJPEG_ENCODER &&
1049         s->out_format == FMT_MJPEG)
1050         ff_mjpeg_encode_close(s);
1051
1052     av_freep(&avctx->extradata);
1053
1054     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1055         av_frame_free(&s->tmp_frames[i]);
1056
1057     ff_free_picture_tables(&s->new_picture);
1058     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1059
1060     av_freep(&s->avctx->stats_out);
1061     av_freep(&s->ac_stats);
1062
1063     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1064     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1065     s->q_chroma_intra_matrix=   NULL;
1066     s->q_chroma_intra_matrix16= NULL;
1067     av_freep(&s->q_intra_matrix);
1068     av_freep(&s->q_inter_matrix);
1069     av_freep(&s->q_intra_matrix16);
1070     av_freep(&s->q_inter_matrix16);
1071     av_freep(&s->input_picture);
1072     av_freep(&s->reordered_input_picture);
1073     av_freep(&s->dct_offset);
1074
1075     return 0;
1076 }
1077
1078 static int get_sae(uint8_t *src, int ref, int stride)
1079 {
1080     int x,y;
1081     int acc = 0;
1082
1083     for (y = 0; y < 16; y++) {
1084         for (x = 0; x < 16; x++) {
1085             acc += FFABS(src[x + y * stride] - ref);
1086         }
1087     }
1088
1089     return acc;
1090 }
1091
1092 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1093                            uint8_t *ref, int stride)
1094 {
1095     int x, y, w, h;
1096     int acc = 0;
1097
1098     w = s->width  & ~15;
1099     h = s->height & ~15;
1100
1101     for (y = 0; y < h; y += 16) {
1102         for (x = 0; x < w; x += 16) {
1103             int offset = x + y * stride;
1104             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1105                                       stride, 16);
1106             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1107             int sae  = get_sae(src + offset, mean, stride);
1108
1109             acc += sae + 500 < sad;
1110         }
1111     }
1112     return acc;
1113 }
1114
1115 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1116 {
1117     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1118                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1119                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1120                             &s->linesize, &s->uvlinesize);
1121 }
1122
1123 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1124 {
1125     Picture *pic = NULL;
1126     int64_t pts;
1127     int i, display_picture_number = 0, ret;
1128     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1129                                                  (s->low_delay ? 0 : 1);
1130     int direct = 1;
1131
1132     if (pic_arg) {
1133         pts = pic_arg->pts;
1134         display_picture_number = s->input_picture_number++;
1135
1136         if (pts != AV_NOPTS_VALUE) {
1137             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1138                 int64_t last = s->user_specified_pts;
1139
1140                 if (pts <= last) {
1141                     av_log(s->avctx, AV_LOG_ERROR,
1142                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1143                            pts, last);
1144                     return AVERROR(EINVAL);
1145                 }
1146
1147                 if (!s->low_delay && display_picture_number == 1)
1148                     s->dts_delta = pts - last;
1149             }
1150             s->user_specified_pts = pts;
1151         } else {
1152             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1153                 s->user_specified_pts =
1154                 pts = s->user_specified_pts + 1;
1155                 av_log(s->avctx, AV_LOG_INFO,
1156                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1157                        pts);
1158             } else {
1159                 pts = display_picture_number;
1160             }
1161         }
1162     }
1163
1164     if (pic_arg) {
1165         if (!pic_arg->buf[0] ||
1166             pic_arg->linesize[0] != s->linesize ||
1167             pic_arg->linesize[1] != s->uvlinesize ||
1168             pic_arg->linesize[2] != s->uvlinesize)
1169             direct = 0;
1170         if ((s->width & 15) || (s->height & 15))
1171             direct = 0;
1172         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1173             direct = 0;
1174         if (s->linesize & (STRIDE_ALIGN-1))
1175             direct = 0;
1176
1177         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1178                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1179
1180         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1181         if (i < 0)
1182             return i;
1183
1184         pic = &s->picture[i];
1185         pic->reference = 3;
1186
1187         if (direct) {
1188             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1189                 return ret;
1190         }
1191         ret = alloc_picture(s, pic, direct);
1192         if (ret < 0)
1193             return ret;
1194
1195         if (!direct) {
1196             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1197                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1198                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1199                 // empty
1200             } else {
1201                 int h_chroma_shift, v_chroma_shift;
1202                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1203                                                  &h_chroma_shift,
1204                                                  &v_chroma_shift);
1205
1206                 for (i = 0; i < 3; i++) {
1207                     int src_stride = pic_arg->linesize[i];
1208                     int dst_stride = i ? s->uvlinesize : s->linesize;
1209                     int h_shift = i ? h_chroma_shift : 0;
1210                     int v_shift = i ? v_chroma_shift : 0;
1211                     int w = s->width  >> h_shift;
1212                     int h = s->height >> v_shift;
1213                     uint8_t *src = pic_arg->data[i];
1214                     uint8_t *dst = pic->f->data[i];
1215                     int vpad = 16;
1216
1217                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1218                         && !s->progressive_sequence
1219                         && FFALIGN(s->height, 32) - s->height > 16)
1220                         vpad = 32;
1221
1222                     if (!s->avctx->rc_buffer_size)
1223                         dst += INPLACE_OFFSET;
1224
1225                     if (src_stride == dst_stride)
1226                         memcpy(dst, src, src_stride * h);
1227                     else {
1228                         int h2 = h;
1229                         uint8_t *dst2 = dst;
1230                         while (h2--) {
1231                             memcpy(dst2, src, w);
1232                             dst2 += dst_stride;
1233                             src += src_stride;
1234                         }
1235                     }
1236                     if ((s->width & 15) || (s->height & (vpad-1))) {
1237                         s->mpvencdsp.draw_edges(dst, dst_stride,
1238                                                 w, h,
1239                                                 16 >> h_shift,
1240                                                 vpad >> v_shift,
1241                                                 EDGE_BOTTOM);
1242                     }
1243                 }
1244             }
1245         }
1246         ret = av_frame_copy_props(pic->f, pic_arg);
1247         if (ret < 0)
1248             return ret;
1249
1250         pic->f->display_picture_number = display_picture_number;
1251         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1252     }
1253
1254     /* shift buffer entries */
1255     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1256         s->input_picture[i - 1] = s->input_picture[i];
1257
1258     s->input_picture[encoding_delay] = (Picture*) pic;
1259
1260     return 0;
1261 }
1262
1263 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1264 {
1265     int x, y, plane;
1266     int score = 0;
1267     int64_t score64 = 0;
1268
1269     for (plane = 0; plane < 3; plane++) {
1270         const int stride = p->f->linesize[plane];
1271         const int bw = plane ? 1 : 2;
1272         for (y = 0; y < s->mb_height * bw; y++) {
1273             for (x = 0; x < s->mb_width * bw; x++) {
1274                 int off = p->shared ? 0 : 16;
1275                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1276                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1277                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1278
1279                 switch (FFABS(s->avctx->frame_skip_exp)) {
1280                 case 0: score    =  FFMAX(score, v);          break;
1281                 case 1: score   += FFABS(v);                  break;
1282                 case 2: score64 += v * (int64_t)v;                       break;
1283                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1284                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1285                 }
1286             }
1287         }
1288     }
1289     emms_c();
1290
1291     if (score)
1292         score64 = score;
1293     if (s->avctx->frame_skip_exp < 0)
1294         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1295                       -1.0/s->avctx->frame_skip_exp);
1296
1297     if (score64 < s->avctx->frame_skip_threshold)
1298         return 1;
1299     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1300         return 1;
1301     return 0;
1302 }
1303
1304 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1305 {
1306     AVPacket pkt = { 0 };
1307     int ret, got_output;
1308
1309     av_init_packet(&pkt);
1310     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1311     if (ret < 0)
1312         return ret;
1313
1314     ret = pkt.size;
1315     av_free_packet(&pkt);
1316     return ret;
1317 }
1318
1319 static int estimate_best_b_count(MpegEncContext *s)
1320 {
1321     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1322     AVCodecContext *c = avcodec_alloc_context3(NULL);
1323     const int scale = s->avctx->brd_scale;
1324     int i, j, out_size, p_lambda, b_lambda, lambda2;
1325     int64_t best_rd  = INT64_MAX;
1326     int best_b_count = -1;
1327
1328     if (!c)
1329         return AVERROR(ENOMEM);
1330     av_assert0(scale >= 0 && scale <= 3);
1331
1332     //emms_c();
1333     //s->next_picture_ptr->quality;
1334     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1335     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1336     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1337     if (!b_lambda) // FIXME we should do this somewhere else
1338         b_lambda = p_lambda;
1339     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1340                FF_LAMBDA_SHIFT;
1341
1342     c->width        = s->width  >> scale;
1343     c->height       = s->height >> scale;
1344     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1345     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1346     c->mb_decision  = s->avctx->mb_decision;
1347     c->me_cmp       = s->avctx->me_cmp;
1348     c->mb_cmp       = s->avctx->mb_cmp;
1349     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1350     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1351     c->time_base    = s->avctx->time_base;
1352     c->max_b_frames = s->max_b_frames;
1353
1354     if (avcodec_open2(c, codec, NULL) < 0)
1355         return -1;
1356
1357     for (i = 0; i < s->max_b_frames + 2; i++) {
1358         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1359                                                 s->next_picture_ptr;
1360         uint8_t *data[4];
1361
1362         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1363             pre_input = *pre_input_ptr;
1364             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1365
1366             if (!pre_input.shared && i) {
1367                 data[0] += INPLACE_OFFSET;
1368                 data[1] += INPLACE_OFFSET;
1369                 data[2] += INPLACE_OFFSET;
1370             }
1371
1372             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1373                                        s->tmp_frames[i]->linesize[0],
1374                                        data[0],
1375                                        pre_input.f->linesize[0],
1376                                        c->width, c->height);
1377             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1378                                        s->tmp_frames[i]->linesize[1],
1379                                        data[1],
1380                                        pre_input.f->linesize[1],
1381                                        c->width >> 1, c->height >> 1);
1382             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1383                                        s->tmp_frames[i]->linesize[2],
1384                                        data[2],
1385                                        pre_input.f->linesize[2],
1386                                        c->width >> 1, c->height >> 1);
1387         }
1388     }
1389
1390     for (j = 0; j < s->max_b_frames + 1; j++) {
1391         int64_t rd = 0;
1392
1393         if (!s->input_picture[j])
1394             break;
1395
1396         c->error[0] = c->error[1] = c->error[2] = 0;
1397
1398         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1399         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1400
1401         out_size = encode_frame(c, s->tmp_frames[0]);
1402
1403         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1404
1405         for (i = 0; i < s->max_b_frames + 1; i++) {
1406             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1407
1408             s->tmp_frames[i + 1]->pict_type = is_p ?
1409                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1410             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1411
1412             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1413
1414             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1415         }
1416
1417         /* get the delayed frames */
1418         while (out_size) {
1419             out_size = encode_frame(c, NULL);
1420             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1421         }
1422
1423         rd += c->error[0] + c->error[1] + c->error[2];
1424
1425         if (rd < best_rd) {
1426             best_rd = rd;
1427             best_b_count = j;
1428         }
1429     }
1430
1431     avcodec_close(c);
1432     av_freep(&c);
1433
1434     return best_b_count;
1435 }
1436
1437 static int select_input_picture(MpegEncContext *s)
1438 {
1439     int i, ret;
1440
1441     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1442         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1443     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1444
1445     /* set next picture type & ordering */
1446     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1447         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1448             if (s->picture_in_gop_number < s->gop_size &&
1449                 s->next_picture_ptr &&
1450                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1451                 // FIXME check that te gop check above is +-1 correct
1452                 av_frame_unref(s->input_picture[0]->f);
1453
1454                 ff_vbv_update(s, 0);
1455
1456                 goto no_output_pic;
1457             }
1458         }
1459
1460         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1461             !s->next_picture_ptr || s->intra_only) {
1462             s->reordered_input_picture[0] = s->input_picture[0];
1463             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1464             s->reordered_input_picture[0]->f->coded_picture_number =
1465                 s->coded_picture_number++;
1466         } else {
1467             int b_frames;
1468
1469             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1470                 for (i = 0; i < s->max_b_frames + 1; i++) {
1471                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1472
1473                     if (pict_num >= s->rc_context.num_entries)
1474                         break;
1475                     if (!s->input_picture[i]) {
1476                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1477                         break;
1478                     }
1479
1480                     s->input_picture[i]->f->pict_type =
1481                         s->rc_context.entry[pict_num].new_pict_type;
1482                 }
1483             }
1484
1485             if (s->avctx->b_frame_strategy == 0) {
1486                 b_frames = s->max_b_frames;
1487                 while (b_frames && !s->input_picture[b_frames])
1488                     b_frames--;
1489             } else if (s->avctx->b_frame_strategy == 1) {
1490                 for (i = 1; i < s->max_b_frames + 1; i++) {
1491                     if (s->input_picture[i] &&
1492                         s->input_picture[i]->b_frame_score == 0) {
1493                         s->input_picture[i]->b_frame_score =
1494                             get_intra_count(s,
1495                                             s->input_picture[i    ]->f->data[0],
1496                                             s->input_picture[i - 1]->f->data[0],
1497                                             s->linesize) + 1;
1498                     }
1499                 }
1500                 for (i = 0; i < s->max_b_frames + 1; i++) {
1501                     if (!s->input_picture[i] ||
1502                         s->input_picture[i]->b_frame_score - 1 >
1503                             s->mb_num / s->avctx->b_sensitivity)
1504                         break;
1505                 }
1506
1507                 b_frames = FFMAX(0, i - 1);
1508
1509                 /* reset scores */
1510                 for (i = 0; i < b_frames + 1; i++) {
1511                     s->input_picture[i]->b_frame_score = 0;
1512                 }
1513             } else if (s->avctx->b_frame_strategy == 2) {
1514                 b_frames = estimate_best_b_count(s);
1515             } else {
1516                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1517                 b_frames = 0;
1518             }
1519
1520             emms_c();
1521
1522             for (i = b_frames - 1; i >= 0; i--) {
1523                 int type = s->input_picture[i]->f->pict_type;
1524                 if (type && type != AV_PICTURE_TYPE_B)
1525                     b_frames = i;
1526             }
1527             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1528                 b_frames == s->max_b_frames) {
1529                 av_log(s->avctx, AV_LOG_ERROR,
1530                        "warning, too many b frames in a row\n");
1531             }
1532
1533             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1534                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1535                     s->gop_size > s->picture_in_gop_number) {
1536                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1537                 } else {
1538                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1539                         b_frames = 0;
1540                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1541                 }
1542             }
1543
1544             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1545                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1546                 b_frames--;
1547
1548             s->reordered_input_picture[0] = s->input_picture[b_frames];
1549             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1550                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1551             s->reordered_input_picture[0]->f->coded_picture_number =
1552                 s->coded_picture_number++;
1553             for (i = 0; i < b_frames; i++) {
1554                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1555                 s->reordered_input_picture[i + 1]->f->pict_type =
1556                     AV_PICTURE_TYPE_B;
1557                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1558                     s->coded_picture_number++;
1559             }
1560         }
1561     }
1562 no_output_pic:
1563     if (s->reordered_input_picture[0]) {
1564         s->reordered_input_picture[0]->reference =
1565            s->reordered_input_picture[0]->f->pict_type !=
1566                AV_PICTURE_TYPE_B ? 3 : 0;
1567
1568         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1569         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1570             return ret;
1571
1572         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1573             // input is a shared pix, so we can't modifiy it -> alloc a new
1574             // one & ensure that the shared one is reuseable
1575
1576             Picture *pic;
1577             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1578             if (i < 0)
1579                 return i;
1580             pic = &s->picture[i];
1581
1582             pic->reference = s->reordered_input_picture[0]->reference;
1583             if (alloc_picture(s, pic, 0) < 0) {
1584                 return -1;
1585             }
1586
1587             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1588             if (ret < 0)
1589                 return ret;
1590
1591             /* mark us unused / free shared pic */
1592             av_frame_unref(s->reordered_input_picture[0]->f);
1593             s->reordered_input_picture[0]->shared = 0;
1594
1595             s->current_picture_ptr = pic;
1596         } else {
1597             // input is not a shared pix -> reuse buffer for current_pix
1598             s->current_picture_ptr = s->reordered_input_picture[0];
1599             for (i = 0; i < 4; i++) {
1600                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1601             }
1602         }
1603         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1604         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1605                                        s->current_picture_ptr)) < 0)
1606             return ret;
1607
1608         s->picture_number = s->new_picture.f->display_picture_number;
1609     } else {
1610         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1611     }
1612     return 0;
1613 }
1614
1615 static void frame_end(MpegEncContext *s)
1616 {
1617     if (s->unrestricted_mv &&
1618         s->current_picture.reference &&
1619         !s->intra_only) {
1620         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1621         int hshift = desc->log2_chroma_w;
1622         int vshift = desc->log2_chroma_h;
1623         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1624                                 s->current_picture.f->linesize[0],
1625                                 s->h_edge_pos, s->v_edge_pos,
1626                                 EDGE_WIDTH, EDGE_WIDTH,
1627                                 EDGE_TOP | EDGE_BOTTOM);
1628         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1629                                 s->current_picture.f->linesize[1],
1630                                 s->h_edge_pos >> hshift,
1631                                 s->v_edge_pos >> vshift,
1632                                 EDGE_WIDTH >> hshift,
1633                                 EDGE_WIDTH >> vshift,
1634                                 EDGE_TOP | EDGE_BOTTOM);
1635         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1636                                 s->current_picture.f->linesize[2],
1637                                 s->h_edge_pos >> hshift,
1638                                 s->v_edge_pos >> vshift,
1639                                 EDGE_WIDTH >> hshift,
1640                                 EDGE_WIDTH >> vshift,
1641                                 EDGE_TOP | EDGE_BOTTOM);
1642     }
1643
1644     emms_c();
1645
1646     s->last_pict_type                 = s->pict_type;
1647     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1648     if (s->pict_type!= AV_PICTURE_TYPE_B)
1649         s->last_non_b_pict_type = s->pict_type;
1650
1651 #if FF_API_CODED_FRAME
1652 FF_DISABLE_DEPRECATION_WARNINGS
1653     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1654 FF_ENABLE_DEPRECATION_WARNINGS
1655 #endif
1656 }
1657
1658 static void update_noise_reduction(MpegEncContext *s)
1659 {
1660     int intra, i;
1661
1662     for (intra = 0; intra < 2; intra++) {
1663         if (s->dct_count[intra] > (1 << 16)) {
1664             for (i = 0; i < 64; i++) {
1665                 s->dct_error_sum[intra][i] >>= 1;
1666             }
1667             s->dct_count[intra] >>= 1;
1668         }
1669
1670         for (i = 0; i < 64; i++) {
1671             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1672                                        s->dct_count[intra] +
1673                                        s->dct_error_sum[intra][i] / 2) /
1674                                       (s->dct_error_sum[intra][i] + 1);
1675         }
1676     }
1677 }
1678
1679 static int frame_start(MpegEncContext *s)
1680 {
1681     int ret;
1682
1683     /* mark & release old frames */
1684     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1685         s->last_picture_ptr != s->next_picture_ptr &&
1686         s->last_picture_ptr->f->buf[0]) {
1687         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1688     }
1689
1690     s->current_picture_ptr->f->pict_type = s->pict_type;
1691     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1692
1693     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1694     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1695                                    s->current_picture_ptr)) < 0)
1696         return ret;
1697
1698     if (s->pict_type != AV_PICTURE_TYPE_B) {
1699         s->last_picture_ptr = s->next_picture_ptr;
1700         if (!s->droppable)
1701             s->next_picture_ptr = s->current_picture_ptr;
1702     }
1703
1704     if (s->last_picture_ptr) {
1705         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1706         if (s->last_picture_ptr->f->buf[0] &&
1707             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1708                                        s->last_picture_ptr)) < 0)
1709             return ret;
1710     }
1711     if (s->next_picture_ptr) {
1712         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1713         if (s->next_picture_ptr->f->buf[0] &&
1714             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1715                                        s->next_picture_ptr)) < 0)
1716             return ret;
1717     }
1718
1719     if (s->picture_structure!= PICT_FRAME) {
1720         int i;
1721         for (i = 0; i < 4; i++) {
1722             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1723                 s->current_picture.f->data[i] +=
1724                     s->current_picture.f->linesize[i];
1725             }
1726             s->current_picture.f->linesize[i] *= 2;
1727             s->last_picture.f->linesize[i]    *= 2;
1728             s->next_picture.f->linesize[i]    *= 2;
1729         }
1730     }
1731
1732     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1733         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1734         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1735     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1736         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1737         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1738     } else {
1739         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1740         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1741     }
1742
1743     if (s->dct_error_sum) {
1744         av_assert2(s->avctx->noise_reduction && s->encoding);
1745         update_noise_reduction(s);
1746     }
1747
1748     return 0;
1749 }
1750
1751 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1752                           const AVFrame *pic_arg, int *got_packet)
1753 {
1754     MpegEncContext *s = avctx->priv_data;
1755     int i, stuffing_count, ret;
1756     int context_count = s->slice_context_count;
1757
1758     s->vbv_ignore_qmax = 0;
1759
1760     s->picture_in_gop_number++;
1761
1762     if (load_input_picture(s, pic_arg) < 0)
1763         return -1;
1764
1765     if (select_input_picture(s) < 0) {
1766         return -1;
1767     }
1768
1769     /* output? */
1770     if (s->new_picture.f->data[0]) {
1771         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1772         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1773                                               :
1774                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1775         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1776             return ret;
1777         if (s->mb_info) {
1778             s->mb_info_ptr = av_packet_new_side_data(pkt,
1779                                  AV_PKT_DATA_H263_MB_INFO,
1780                                  s->mb_width*s->mb_height*12);
1781             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1782         }
1783
1784         for (i = 0; i < context_count; i++) {
1785             int start_y = s->thread_context[i]->start_mb_y;
1786             int   end_y = s->thread_context[i]->  end_mb_y;
1787             int h       = s->mb_height;
1788             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1789             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1790
1791             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1792         }
1793
1794         s->pict_type = s->new_picture.f->pict_type;
1795         //emms_c();
1796         ret = frame_start(s);
1797         if (ret < 0)
1798             return ret;
1799 vbv_retry:
1800         ret = encode_picture(s, s->picture_number);
1801         if (growing_buffer) {
1802             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1803             pkt->data = s->pb.buf;
1804             pkt->size = avctx->internal->byte_buffer_size;
1805         }
1806         if (ret < 0)
1807             return -1;
1808
1809         avctx->header_bits = s->header_bits;
1810         avctx->mv_bits     = s->mv_bits;
1811         avctx->misc_bits   = s->misc_bits;
1812         avctx->i_tex_bits  = s->i_tex_bits;
1813         avctx->p_tex_bits  = s->p_tex_bits;
1814         avctx->i_count     = s->i_count;
1815         // FIXME f/b_count in avctx
1816         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1817         avctx->skip_count  = s->skip_count;
1818
1819         frame_end(s);
1820
1821         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1822             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1823
1824         if (avctx->rc_buffer_size) {
1825             RateControlContext *rcc = &s->rc_context;
1826             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1827             int hq = (s->avctx->mb_decision == FF_MB_DECISION_RD || s->avctx->trellis);
1828             int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
1829
1830             if (put_bits_count(&s->pb) > max_size &&
1831                 s->lambda < s->lmax) {
1832                 s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
1833                                        (s->qscale + 1) / s->qscale);
1834                 if (s->adaptive_quant) {
1835                     int i;
1836                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1837                         s->lambda_table[i] =
1838                             FFMAX(s->lambda_table[i] + min_step,
1839                                   s->lambda_table[i] * (s->qscale + 1) /
1840                                   s->qscale);
1841                 }
1842                 s->mb_skipped = 0;        // done in frame_start()
1843                 // done in encode_picture() so we must undo it
1844                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1845                     if (s->flipflop_rounding          ||
1846                         s->codec_id == AV_CODEC_ID_H263P ||
1847                         s->codec_id == AV_CODEC_ID_MPEG4)
1848                         s->no_rounding ^= 1;
1849                 }
1850                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1851                     s->time_base       = s->last_time_base;
1852                     s->last_non_b_time = s->time - s->pp_time;
1853                 }
1854                 for (i = 0; i < context_count; i++) {
1855                     PutBitContext *pb = &s->thread_context[i]->pb;
1856                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1857                 }
1858                 s->vbv_ignore_qmax = 1;
1859                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1860                 goto vbv_retry;
1861             }
1862
1863             av_assert0(s->avctx->rc_max_rate);
1864         }
1865
1866         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1867             ff_write_pass1_stats(s);
1868
1869         for (i = 0; i < 4; i++) {
1870             s->current_picture_ptr->f->error[i] =
1871             s->current_picture.f->error[i] =
1872                 s->current_picture.error[i];
1873             avctx->error[i] += s->current_picture_ptr->f->error[i];
1874         }
1875         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1876                                        s->current_picture_ptr->f->error,
1877                                        (s->avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1878                                        s->pict_type);
1879
1880         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1881             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1882                    avctx->i_tex_bits + avctx->p_tex_bits ==
1883                        put_bits_count(&s->pb));
1884         flush_put_bits(&s->pb);
1885         s->frame_bits  = put_bits_count(&s->pb);
1886
1887         stuffing_count = ff_vbv_update(s, s->frame_bits);
1888         s->stuffing_bits = 8*stuffing_count;
1889         if (stuffing_count) {
1890             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1891                     stuffing_count + 50) {
1892                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1893                 return -1;
1894             }
1895
1896             switch (s->codec_id) {
1897             case AV_CODEC_ID_MPEG1VIDEO:
1898             case AV_CODEC_ID_MPEG2VIDEO:
1899                 while (stuffing_count--) {
1900                     put_bits(&s->pb, 8, 0);
1901                 }
1902             break;
1903             case AV_CODEC_ID_MPEG4:
1904                 put_bits(&s->pb, 16, 0);
1905                 put_bits(&s->pb, 16, 0x1C3);
1906                 stuffing_count -= 4;
1907                 while (stuffing_count--) {
1908                     put_bits(&s->pb, 8, 0xFF);
1909                 }
1910             break;
1911             default:
1912                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1913             }
1914             flush_put_bits(&s->pb);
1915             s->frame_bits  = put_bits_count(&s->pb);
1916         }
1917
1918         /* update mpeg1/2 vbv_delay for CBR */
1919         if (s->avctx->rc_max_rate                          &&
1920             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1921             s->out_format == FMT_MPEG1                     &&
1922             90000LL * (avctx->rc_buffer_size - 1) <=
1923                 s->avctx->rc_max_rate * 0xFFFFLL) {
1924             int vbv_delay, min_delay;
1925             double inbits  = s->avctx->rc_max_rate *
1926                              av_q2d(s->avctx->time_base);
1927             int    minbits = s->frame_bits - 8 *
1928                              (s->vbv_delay_ptr - s->pb.buf - 1);
1929             double bits    = s->rc_context.buffer_index + minbits - inbits;
1930
1931             if (bits < 0)
1932                 av_log(s->avctx, AV_LOG_ERROR,
1933                        "Internal error, negative bits\n");
1934
1935             assert(s->repeat_first_field == 0);
1936
1937             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1938             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1939                         s->avctx->rc_max_rate;
1940
1941             vbv_delay = FFMAX(vbv_delay, min_delay);
1942
1943             av_assert0(vbv_delay < 0xFFFF);
1944
1945             s->vbv_delay_ptr[0] &= 0xF8;
1946             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1947             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1948             s->vbv_delay_ptr[2] &= 0x07;
1949             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1950             avctx->vbv_delay     = vbv_delay * 300;
1951         }
1952         s->total_bits     += s->frame_bits;
1953         avctx->frame_bits  = s->frame_bits;
1954
1955         pkt->pts = s->current_picture.f->pts;
1956         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1957             if (!s->current_picture.f->coded_picture_number)
1958                 pkt->dts = pkt->pts - s->dts_delta;
1959             else
1960                 pkt->dts = s->reordered_pts;
1961             s->reordered_pts = pkt->pts;
1962         } else
1963             pkt->dts = pkt->pts;
1964         if (s->current_picture.f->key_frame)
1965             pkt->flags |= AV_PKT_FLAG_KEY;
1966         if (s->mb_info)
1967             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1968     } else {
1969         s->frame_bits = 0;
1970     }
1971
1972     /* release non-reference frames */
1973     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1974         if (!s->picture[i].reference)
1975             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1976     }
1977
1978     av_assert1((s->frame_bits & 7) == 0);
1979
1980     pkt->size = s->frame_bits / 8;
1981     *got_packet = !!pkt->size;
1982     return 0;
1983 }
1984
1985 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1986                                                 int n, int threshold)
1987 {
1988     static const char tab[64] = {
1989         3, 2, 2, 1, 1, 1, 1, 1,
1990         1, 1, 1, 1, 1, 1, 1, 1,
1991         1, 1, 1, 1, 1, 1, 1, 1,
1992         0, 0, 0, 0, 0, 0, 0, 0,
1993         0, 0, 0, 0, 0, 0, 0, 0,
1994         0, 0, 0, 0, 0, 0, 0, 0,
1995         0, 0, 0, 0, 0, 0, 0, 0,
1996         0, 0, 0, 0, 0, 0, 0, 0
1997     };
1998     int score = 0;
1999     int run = 0;
2000     int i;
2001     int16_t *block = s->block[n];
2002     const int last_index = s->block_last_index[n];
2003     int skip_dc;
2004
2005     if (threshold < 0) {
2006         skip_dc = 0;
2007         threshold = -threshold;
2008     } else
2009         skip_dc = 1;
2010
2011     /* Are all we could set to zero already zero? */
2012     if (last_index <= skip_dc - 1)
2013         return;
2014
2015     for (i = 0; i <= last_index; i++) {
2016         const int j = s->intra_scantable.permutated[i];
2017         const int level = FFABS(block[j]);
2018         if (level == 1) {
2019             if (skip_dc && i == 0)
2020                 continue;
2021             score += tab[run];
2022             run = 0;
2023         } else if (level > 1) {
2024             return;
2025         } else {
2026             run++;
2027         }
2028     }
2029     if (score >= threshold)
2030         return;
2031     for (i = skip_dc; i <= last_index; i++) {
2032         const int j = s->intra_scantable.permutated[i];
2033         block[j] = 0;
2034     }
2035     if (block[0])
2036         s->block_last_index[n] = 0;
2037     else
2038         s->block_last_index[n] = -1;
2039 }
2040
2041 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2042                                int last_index)
2043 {
2044     int i;
2045     const int maxlevel = s->max_qcoeff;
2046     const int minlevel = s->min_qcoeff;
2047     int overflow = 0;
2048
2049     if (s->mb_intra) {
2050         i = 1; // skip clipping of intra dc
2051     } else
2052         i = 0;
2053
2054     for (; i <= last_index; i++) {
2055         const int j = s->intra_scantable.permutated[i];
2056         int level = block[j];
2057
2058         if (level > maxlevel) {
2059             level = maxlevel;
2060             overflow++;
2061         } else if (level < minlevel) {
2062             level = minlevel;
2063             overflow++;
2064         }
2065
2066         block[j] = level;
2067     }
2068
2069     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2070         av_log(s->avctx, AV_LOG_INFO,
2071                "warning, clipping %d dct coefficients to %d..%d\n",
2072                overflow, minlevel, maxlevel);
2073 }
2074
2075 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2076 {
2077     int x, y;
2078     // FIXME optimize
2079     for (y = 0; y < 8; y++) {
2080         for (x = 0; x < 8; x++) {
2081             int x2, y2;
2082             int sum = 0;
2083             int sqr = 0;
2084             int count = 0;
2085
2086             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2087                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2088                     int v = ptr[x2 + y2 * stride];
2089                     sum += v;
2090                     sqr += v * v;
2091                     count++;
2092                 }
2093             }
2094             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2095         }
2096     }
2097 }
2098
2099 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2100                                                 int motion_x, int motion_y,
2101                                                 int mb_block_height,
2102                                                 int mb_block_width,
2103                                                 int mb_block_count)
2104 {
2105     int16_t weight[12][64];
2106     int16_t orig[12][64];
2107     const int mb_x = s->mb_x;
2108     const int mb_y = s->mb_y;
2109     int i;
2110     int skip_dct[12];
2111     int dct_offset = s->linesize * 8; // default for progressive frames
2112     int uv_dct_offset = s->uvlinesize * 8;
2113     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2114     ptrdiff_t wrap_y, wrap_c;
2115
2116     for (i = 0; i < mb_block_count; i++)
2117         skip_dct[i] = s->skipdct;
2118
2119     if (s->adaptive_quant) {
2120         const int last_qp = s->qscale;
2121         const int mb_xy = mb_x + mb_y * s->mb_stride;
2122
2123         s->lambda = s->lambda_table[mb_xy];
2124         update_qscale(s);
2125
2126         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2127             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2128             s->dquant = s->qscale - last_qp;
2129
2130             if (s->out_format == FMT_H263) {
2131                 s->dquant = av_clip(s->dquant, -2, 2);
2132
2133                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2134                     if (!s->mb_intra) {
2135                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2136                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2137                                 s->dquant = 0;
2138                         }
2139                         if (s->mv_type == MV_TYPE_8X8)
2140                             s->dquant = 0;
2141                     }
2142                 }
2143             }
2144         }
2145         ff_set_qscale(s, last_qp + s->dquant);
2146     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2147         ff_set_qscale(s, s->qscale + s->dquant);
2148
2149     wrap_y = s->linesize;
2150     wrap_c = s->uvlinesize;
2151     ptr_y  = s->new_picture.f->data[0] +
2152              (mb_y * 16 * wrap_y)              + mb_x * 16;
2153     ptr_cb = s->new_picture.f->data[1] +
2154              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2155     ptr_cr = s->new_picture.f->data[2] +
2156              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2157
2158     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2159         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2160         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2161         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2162         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2163                                  wrap_y, wrap_y,
2164                                  16, 16, mb_x * 16, mb_y * 16,
2165                                  s->width, s->height);
2166         ptr_y = ebuf;
2167         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2168                                  wrap_c, wrap_c,
2169                                  mb_block_width, mb_block_height,
2170                                  mb_x * mb_block_width, mb_y * mb_block_height,
2171                                  cw, ch);
2172         ptr_cb = ebuf + 16 * wrap_y;
2173         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2174                                  wrap_c, wrap_c,
2175                                  mb_block_width, mb_block_height,
2176                                  mb_x * mb_block_width, mb_y * mb_block_height,
2177                                  cw, ch);
2178         ptr_cr = ebuf + 16 * wrap_y + 16;
2179     }
2180
2181     if (s->mb_intra) {
2182         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2183             int progressive_score, interlaced_score;
2184
2185             s->interlaced_dct = 0;
2186             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2187                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2188                                                      NULL, wrap_y, 8) - 400;
2189
2190             if (progressive_score > 0) {
2191                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2192                                                         NULL, wrap_y * 2, 8) +
2193                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2194                                                         NULL, wrap_y * 2, 8);
2195                 if (progressive_score > interlaced_score) {
2196                     s->interlaced_dct = 1;
2197
2198                     dct_offset = wrap_y;
2199                     uv_dct_offset = wrap_c;
2200                     wrap_y <<= 1;
2201                     if (s->chroma_format == CHROMA_422 ||
2202                         s->chroma_format == CHROMA_444)
2203                         wrap_c <<= 1;
2204                 }
2205             }
2206         }
2207
2208         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2209         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2210         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2211         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2212
2213         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2214             skip_dct[4] = 1;
2215             skip_dct[5] = 1;
2216         } else {
2217             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2218             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2219             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2220                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2221                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2222             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2223                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2224                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2225                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2226                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2227                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2228                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2229             }
2230         }
2231     } else {
2232         op_pixels_func (*op_pix)[4];
2233         qpel_mc_func (*op_qpix)[16];
2234         uint8_t *dest_y, *dest_cb, *dest_cr;
2235
2236         dest_y  = s->dest[0];
2237         dest_cb = s->dest[1];
2238         dest_cr = s->dest[2];
2239
2240         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2241             op_pix  = s->hdsp.put_pixels_tab;
2242             op_qpix = s->qdsp.put_qpel_pixels_tab;
2243         } else {
2244             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2245             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2246         }
2247
2248         if (s->mv_dir & MV_DIR_FORWARD) {
2249             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2250                           s->last_picture.f->data,
2251                           op_pix, op_qpix);
2252             op_pix  = s->hdsp.avg_pixels_tab;
2253             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2254         }
2255         if (s->mv_dir & MV_DIR_BACKWARD) {
2256             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2257                           s->next_picture.f->data,
2258                           op_pix, op_qpix);
2259         }
2260
2261         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2262             int progressive_score, interlaced_score;
2263
2264             s->interlaced_dct = 0;
2265             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2266                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2267                                                      ptr_y + wrap_y * 8,
2268                                                      wrap_y, 8) - 400;
2269
2270             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2271                 progressive_score -= 400;
2272
2273             if (progressive_score > 0) {
2274                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2275                                                         wrap_y * 2, 8) +
2276                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2277                                                         ptr_y + wrap_y,
2278                                                         wrap_y * 2, 8);
2279
2280                 if (progressive_score > interlaced_score) {
2281                     s->interlaced_dct = 1;
2282
2283                     dct_offset = wrap_y;
2284                     uv_dct_offset = wrap_c;
2285                     wrap_y <<= 1;
2286                     if (s->chroma_format == CHROMA_422)
2287                         wrap_c <<= 1;
2288                 }
2289             }
2290         }
2291
2292         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2293         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2294         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2295                             dest_y + dct_offset, wrap_y);
2296         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2297                             dest_y + dct_offset + 8, wrap_y);
2298
2299         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2300             skip_dct[4] = 1;
2301             skip_dct[5] = 1;
2302         } else {
2303             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2304             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2305             if (!s->chroma_y_shift) { /* 422 */
2306                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2307                                     dest_cb + uv_dct_offset, wrap_c);
2308                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2309                                     dest_cr + uv_dct_offset, wrap_c);
2310             }
2311         }
2312         /* pre quantization */
2313         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2314                 2 * s->qscale * s->qscale) {
2315             // FIXME optimize
2316             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2317                 skip_dct[0] = 1;
2318             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2319                 skip_dct[1] = 1;
2320             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2321                                wrap_y, 8) < 20 * s->qscale)
2322                 skip_dct[2] = 1;
2323             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2324                                wrap_y, 8) < 20 * s->qscale)
2325                 skip_dct[3] = 1;
2326             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2327                 skip_dct[4] = 1;
2328             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2329                 skip_dct[5] = 1;
2330             if (!s->chroma_y_shift) { /* 422 */
2331                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2332                                    dest_cb + uv_dct_offset,
2333                                    wrap_c, 8) < 20 * s->qscale)
2334                     skip_dct[6] = 1;
2335                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2336                                    dest_cr + uv_dct_offset,
2337                                    wrap_c, 8) < 20 * s->qscale)
2338                     skip_dct[7] = 1;
2339             }
2340         }
2341     }
2342
2343     if (s->quantizer_noise_shaping) {
2344         if (!skip_dct[0])
2345             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2346         if (!skip_dct[1])
2347             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2348         if (!skip_dct[2])
2349             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2350         if (!skip_dct[3])
2351             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2352         if (!skip_dct[4])
2353             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2354         if (!skip_dct[5])
2355             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2356         if (!s->chroma_y_shift) { /* 422 */
2357             if (!skip_dct[6])
2358                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2359                                   wrap_c);
2360             if (!skip_dct[7])
2361                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2362                                   wrap_c);
2363         }
2364         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2365     }
2366
2367     /* DCT & quantize */
2368     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2369     {
2370         for (i = 0; i < mb_block_count; i++) {
2371             if (!skip_dct[i]) {
2372                 int overflow;
2373                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2374                 // FIXME we could decide to change to quantizer instead of
2375                 // clipping
2376                 // JS: I don't think that would be a good idea it could lower
2377                 //     quality instead of improve it. Just INTRADC clipping
2378                 //     deserves changes in quantizer
2379                 if (overflow)
2380                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2381             } else
2382                 s->block_last_index[i] = -1;
2383         }
2384         if (s->quantizer_noise_shaping) {
2385             for (i = 0; i < mb_block_count; i++) {
2386                 if (!skip_dct[i]) {
2387                     s->block_last_index[i] =
2388                         dct_quantize_refine(s, s->block[i], weight[i],
2389                                             orig[i], i, s->qscale);
2390                 }
2391             }
2392         }
2393
2394         if (s->luma_elim_threshold && !s->mb_intra)
2395             for (i = 0; i < 4; i++)
2396                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2397         if (s->chroma_elim_threshold && !s->mb_intra)
2398             for (i = 4; i < mb_block_count; i++)
2399                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2400
2401         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2402             for (i = 0; i < mb_block_count; i++) {
2403                 if (s->block_last_index[i] == -1)
2404                     s->coded_score[i] = INT_MAX / 256;
2405             }
2406         }
2407     }
2408
2409     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2410         s->block_last_index[4] =
2411         s->block_last_index[5] = 0;
2412         s->block[4][0] =
2413         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2414         if (!s->chroma_y_shift) { /* 422 / 444 */
2415             for (i=6; i<12; i++) {
2416                 s->block_last_index[i] = 0;
2417                 s->block[i][0] = s->block[4][0];
2418             }
2419         }
2420     }
2421
2422     // non c quantize code returns incorrect block_last_index FIXME
2423     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2424         for (i = 0; i < mb_block_count; i++) {
2425             int j;
2426             if (s->block_last_index[i] > 0) {
2427                 for (j = 63; j > 0; j--) {
2428                     if (s->block[i][s->intra_scantable.permutated[j]])
2429                         break;
2430                 }
2431                 s->block_last_index[i] = j;
2432             }
2433         }
2434     }
2435
2436     /* huffman encode */
2437     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2438     case AV_CODEC_ID_MPEG1VIDEO:
2439     case AV_CODEC_ID_MPEG2VIDEO:
2440         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2441             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2442         break;
2443     case AV_CODEC_ID_MPEG4:
2444         if (CONFIG_MPEG4_ENCODER)
2445             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2446         break;
2447     case AV_CODEC_ID_MSMPEG4V2:
2448     case AV_CODEC_ID_MSMPEG4V3:
2449     case AV_CODEC_ID_WMV1:
2450         if (CONFIG_MSMPEG4_ENCODER)
2451             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2452         break;
2453     case AV_CODEC_ID_WMV2:
2454         if (CONFIG_WMV2_ENCODER)
2455             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2456         break;
2457     case AV_CODEC_ID_H261:
2458         if (CONFIG_H261_ENCODER)
2459             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2460         break;
2461     case AV_CODEC_ID_H263:
2462     case AV_CODEC_ID_H263P:
2463     case AV_CODEC_ID_FLV1:
2464     case AV_CODEC_ID_RV10:
2465     case AV_CODEC_ID_RV20:
2466         if (CONFIG_H263_ENCODER)
2467             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2468         break;
2469     case AV_CODEC_ID_MJPEG:
2470     case AV_CODEC_ID_AMV:
2471         if (CONFIG_MJPEG_ENCODER)
2472             ff_mjpeg_encode_mb(s, s->block);
2473         break;
2474     default:
2475         av_assert1(0);
2476     }
2477 }
2478
2479 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2480 {
2481     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2482     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2483     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2484 }
2485
2486 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2487     int i;
2488
2489     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2490
2491     /* mpeg1 */
2492     d->mb_skip_run= s->mb_skip_run;
2493     for(i=0; i<3; i++)
2494         d->last_dc[i] = s->last_dc[i];
2495
2496     /* statistics */
2497     d->mv_bits= s->mv_bits;
2498     d->i_tex_bits= s->i_tex_bits;
2499     d->p_tex_bits= s->p_tex_bits;
2500     d->i_count= s->i_count;
2501     d->f_count= s->f_count;
2502     d->b_count= s->b_count;
2503     d->skip_count= s->skip_count;
2504     d->misc_bits= s->misc_bits;
2505     d->last_bits= 0;
2506
2507     d->mb_skipped= 0;
2508     d->qscale= s->qscale;
2509     d->dquant= s->dquant;
2510
2511     d->esc3_level_length= s->esc3_level_length;
2512 }
2513
2514 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2515     int i;
2516
2517     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2518     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2519
2520     /* mpeg1 */
2521     d->mb_skip_run= s->mb_skip_run;
2522     for(i=0; i<3; i++)
2523         d->last_dc[i] = s->last_dc[i];
2524
2525     /* statistics */
2526     d->mv_bits= s->mv_bits;
2527     d->i_tex_bits= s->i_tex_bits;
2528     d->p_tex_bits= s->p_tex_bits;
2529     d->i_count= s->i_count;
2530     d->f_count= s->f_count;
2531     d->b_count= s->b_count;
2532     d->skip_count= s->skip_count;
2533     d->misc_bits= s->misc_bits;
2534
2535     d->mb_intra= s->mb_intra;
2536     d->mb_skipped= s->mb_skipped;
2537     d->mv_type= s->mv_type;
2538     d->mv_dir= s->mv_dir;
2539     d->pb= s->pb;
2540     if(s->data_partitioning){
2541         d->pb2= s->pb2;
2542         d->tex_pb= s->tex_pb;
2543     }
2544     d->block= s->block;
2545     for(i=0; i<8; i++)
2546         d->block_last_index[i]= s->block_last_index[i];
2547     d->interlaced_dct= s->interlaced_dct;
2548     d->qscale= s->qscale;
2549
2550     d->esc3_level_length= s->esc3_level_length;
2551 }
2552
2553 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2554                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2555                            int *dmin, int *next_block, int motion_x, int motion_y)
2556 {
2557     int score;
2558     uint8_t *dest_backup[3];
2559
2560     copy_context_before_encode(s, backup, type);
2561
2562     s->block= s->blocks[*next_block];
2563     s->pb= pb[*next_block];
2564     if(s->data_partitioning){
2565         s->pb2   = pb2   [*next_block];
2566         s->tex_pb= tex_pb[*next_block];
2567     }
2568
2569     if(*next_block){
2570         memcpy(dest_backup, s->dest, sizeof(s->dest));
2571         s->dest[0] = s->sc.rd_scratchpad;
2572         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2573         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2574         av_assert0(s->linesize >= 32); //FIXME
2575     }
2576
2577     encode_mb(s, motion_x, motion_y);
2578
2579     score= put_bits_count(&s->pb);
2580     if(s->data_partitioning){
2581         score+= put_bits_count(&s->pb2);
2582         score+= put_bits_count(&s->tex_pb);
2583     }
2584
2585     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2586         ff_mpv_decode_mb(s, s->block);
2587
2588         score *= s->lambda2;
2589         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2590     }
2591
2592     if(*next_block){
2593         memcpy(s->dest, dest_backup, sizeof(s->dest));
2594     }
2595
2596     if(score<*dmin){
2597         *dmin= score;
2598         *next_block^=1;
2599
2600         copy_context_after_encode(best, s, type);
2601     }
2602 }
2603
2604 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2605     uint32_t *sq = ff_square_tab + 256;
2606     int acc=0;
2607     int x,y;
2608
2609     if(w==16 && h==16)
2610         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2611     else if(w==8 && h==8)
2612         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2613
2614     for(y=0; y<h; y++){
2615         for(x=0; x<w; x++){
2616             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2617         }
2618     }
2619
2620     av_assert2(acc>=0);
2621
2622     return acc;
2623 }
2624
2625 static int sse_mb(MpegEncContext *s){
2626     int w= 16;
2627     int h= 16;
2628
2629     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2630     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2631
2632     if(w==16 && h==16)
2633       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2634         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2635                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2636                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2637       }else{
2638         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2639                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2640                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2641       }
2642     else
2643         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2644                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2645                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2646 }
2647
2648 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2649     MpegEncContext *s= *(void**)arg;
2650
2651
2652     s->me.pre_pass=1;
2653     s->me.dia_size= s->avctx->pre_dia_size;
2654     s->first_slice_line=1;
2655     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2656         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2657             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2658         }
2659         s->first_slice_line=0;
2660     }
2661
2662     s->me.pre_pass=0;
2663
2664     return 0;
2665 }
2666
2667 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2668     MpegEncContext *s= *(void**)arg;
2669
2670     ff_check_alignment();
2671
2672     s->me.dia_size= s->avctx->dia_size;
2673     s->first_slice_line=1;
2674     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2675         s->mb_x=0; //for block init below
2676         ff_init_block_index(s);
2677         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2678             s->block_index[0]+=2;
2679             s->block_index[1]+=2;
2680             s->block_index[2]+=2;
2681             s->block_index[3]+=2;
2682
2683             /* compute motion vector & mb_type and store in context */
2684             if(s->pict_type==AV_PICTURE_TYPE_B)
2685                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2686             else
2687                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2688         }
2689         s->first_slice_line=0;
2690     }
2691     return 0;
2692 }
2693
2694 static int mb_var_thread(AVCodecContext *c, void *arg){
2695     MpegEncContext *s= *(void**)arg;
2696     int mb_x, mb_y;
2697
2698     ff_check_alignment();
2699
2700     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2701         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2702             int xx = mb_x * 16;
2703             int yy = mb_y * 16;
2704             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2705             int varc;
2706             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2707
2708             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2709                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2710
2711             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2712             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2713             s->me.mb_var_sum_temp    += varc;
2714         }
2715     }
2716     return 0;
2717 }
2718
2719 static void write_slice_end(MpegEncContext *s){
2720     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2721         if(s->partitioned_frame){
2722             ff_mpeg4_merge_partitions(s);
2723         }
2724
2725         ff_mpeg4_stuffing(&s->pb);
2726     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2727         ff_mjpeg_encode_stuffing(s);
2728     }
2729
2730     avpriv_align_put_bits(&s->pb);
2731     flush_put_bits(&s->pb);
2732
2733     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2734         s->misc_bits+= get_bits_diff(s);
2735 }
2736
2737 static void write_mb_info(MpegEncContext *s)
2738 {
2739     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2740     int offset = put_bits_count(&s->pb);
2741     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2742     int gobn = s->mb_y / s->gob_index;
2743     int pred_x, pred_y;
2744     if (CONFIG_H263_ENCODER)
2745         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2746     bytestream_put_le32(&ptr, offset);
2747     bytestream_put_byte(&ptr, s->qscale);
2748     bytestream_put_byte(&ptr, gobn);
2749     bytestream_put_le16(&ptr, mba);
2750     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2751     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2752     /* 4MV not implemented */
2753     bytestream_put_byte(&ptr, 0); /* hmv2 */
2754     bytestream_put_byte(&ptr, 0); /* vmv2 */
2755 }
2756
2757 static void update_mb_info(MpegEncContext *s, int startcode)
2758 {
2759     if (!s->mb_info)
2760         return;
2761     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2762         s->mb_info_size += 12;
2763         s->prev_mb_info = s->last_mb_info;
2764     }
2765     if (startcode) {
2766         s->prev_mb_info = put_bits_count(&s->pb)/8;
2767         /* This might have incremented mb_info_size above, and we return without
2768          * actually writing any info into that slot yet. But in that case,
2769          * this will be called again at the start of the after writing the
2770          * start code, actually writing the mb info. */
2771         return;
2772     }
2773
2774     s->last_mb_info = put_bits_count(&s->pb)/8;
2775     if (!s->mb_info_size)
2776         s->mb_info_size += 12;
2777     write_mb_info(s);
2778 }
2779
2780 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2781 {
2782     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2783         && s->slice_context_count == 1
2784         && s->pb.buf == s->avctx->internal->byte_buffer) {
2785         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2786         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2787
2788         uint8_t *new_buffer = NULL;
2789         int new_buffer_size = 0;
2790
2791         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2792                               s->avctx->internal->byte_buffer_size + size_increase);
2793         if (!new_buffer)
2794             return AVERROR(ENOMEM);
2795
2796         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2797         av_free(s->avctx->internal->byte_buffer);
2798         s->avctx->internal->byte_buffer      = new_buffer;
2799         s->avctx->internal->byte_buffer_size = new_buffer_size;
2800         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2801         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2802         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2803     }
2804     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2805         return AVERROR(EINVAL);
2806     return 0;
2807 }
2808
2809 static int encode_thread(AVCodecContext *c, void *arg){
2810     MpegEncContext *s= *(void**)arg;
2811     int mb_x, mb_y, pdif = 0;
2812     int chr_h= 16>>s->chroma_y_shift;
2813     int i, j;
2814     MpegEncContext best_s = { 0 }, backup_s;
2815     uint8_t bit_buf[2][MAX_MB_BYTES];
2816     uint8_t bit_buf2[2][MAX_MB_BYTES];
2817     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2818     PutBitContext pb[2], pb2[2], tex_pb[2];
2819
2820     ff_check_alignment();
2821
2822     for(i=0; i<2; i++){
2823         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2824         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2825         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2826     }
2827
2828     s->last_bits= put_bits_count(&s->pb);
2829     s->mv_bits=0;
2830     s->misc_bits=0;
2831     s->i_tex_bits=0;
2832     s->p_tex_bits=0;
2833     s->i_count=0;
2834     s->f_count=0;
2835     s->b_count=0;
2836     s->skip_count=0;
2837
2838     for(i=0; i<3; i++){
2839         /* init last dc values */
2840         /* note: quant matrix value (8) is implied here */
2841         s->last_dc[i] = 128 << s->intra_dc_precision;
2842
2843         s->current_picture.error[i] = 0;
2844     }
2845     if(s->codec_id==AV_CODEC_ID_AMV){
2846         s->last_dc[0] = 128*8/13;
2847         s->last_dc[1] = 128*8/14;
2848         s->last_dc[2] = 128*8/14;
2849     }
2850     s->mb_skip_run = 0;
2851     memset(s->last_mv, 0, sizeof(s->last_mv));
2852
2853     s->last_mv_dir = 0;
2854
2855     switch(s->codec_id){
2856     case AV_CODEC_ID_H263:
2857     case AV_CODEC_ID_H263P:
2858     case AV_CODEC_ID_FLV1:
2859         if (CONFIG_H263_ENCODER)
2860             s->gob_index = H263_GOB_HEIGHT(s->height);
2861         break;
2862     case AV_CODEC_ID_MPEG4:
2863         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2864             ff_mpeg4_init_partitions(s);
2865         break;
2866     }
2867
2868     s->resync_mb_x=0;
2869     s->resync_mb_y=0;
2870     s->first_slice_line = 1;
2871     s->ptr_lastgob = s->pb.buf;
2872     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2873         s->mb_x=0;
2874         s->mb_y= mb_y;
2875
2876         ff_set_qscale(s, s->qscale);
2877         ff_init_block_index(s);
2878
2879         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2880             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2881             int mb_type= s->mb_type[xy];
2882 //            int d;
2883             int dmin= INT_MAX;
2884             int dir;
2885             int size_increase =  s->avctx->internal->byte_buffer_size/4
2886                                + s->mb_width*MAX_MB_BYTES;
2887
2888             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2889             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2890                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2891                 return -1;
2892             }
2893             if(s->data_partitioning){
2894                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2895                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2896                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2897                     return -1;
2898                 }
2899             }
2900
2901             s->mb_x = mb_x;
2902             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2903             ff_update_block_index(s);
2904
2905             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2906                 ff_h261_reorder_mb_index(s);
2907                 xy= s->mb_y*s->mb_stride + s->mb_x;
2908                 mb_type= s->mb_type[xy];
2909             }
2910
2911             /* write gob / video packet header  */
2912             if(s->rtp_mode){
2913                 int current_packet_size, is_gob_start;
2914
2915                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2916
2917                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2918
2919                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2920
2921                 switch(s->codec_id){
2922                 case AV_CODEC_ID_H263:
2923                 case AV_CODEC_ID_H263P:
2924                     if(!s->h263_slice_structured)
2925                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2926                     break;
2927                 case AV_CODEC_ID_MPEG2VIDEO:
2928                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2929                 case AV_CODEC_ID_MPEG1VIDEO:
2930                     if(s->mb_skip_run) is_gob_start=0;
2931                     break;
2932                 case AV_CODEC_ID_MJPEG:
2933                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2934                     break;
2935                 }
2936
2937                 if(is_gob_start){
2938                     if(s->start_mb_y != mb_y || mb_x!=0){
2939                         write_slice_end(s);
2940
2941                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2942                             ff_mpeg4_init_partitions(s);
2943                         }
2944                     }
2945
2946                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2947                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2948
2949                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2950                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2951                         int d = 100 / s->error_rate;
2952                         if(r % d == 0){
2953                             current_packet_size=0;
2954                             s->pb.buf_ptr= s->ptr_lastgob;
2955                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2956                         }
2957                     }
2958
2959                     if (s->avctx->rtp_callback){
2960                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2961                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2962                     }
2963                     update_mb_info(s, 1);
2964
2965                     switch(s->codec_id){
2966                     case AV_CODEC_ID_MPEG4:
2967                         if (CONFIG_MPEG4_ENCODER) {
2968                             ff_mpeg4_encode_video_packet_header(s);
2969                             ff_mpeg4_clean_buffers(s);
2970                         }
2971                     break;
2972                     case AV_CODEC_ID_MPEG1VIDEO:
2973                     case AV_CODEC_ID_MPEG2VIDEO:
2974                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2975                             ff_mpeg1_encode_slice_header(s);
2976                             ff_mpeg1_clean_buffers(s);
2977                         }
2978                     break;
2979                     case AV_CODEC_ID_H263:
2980                     case AV_CODEC_ID_H263P:
2981                         if (CONFIG_H263_ENCODER)
2982                             ff_h263_encode_gob_header(s, mb_y);
2983                     break;
2984                     }
2985
2986                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2987                         int bits= put_bits_count(&s->pb);
2988                         s->misc_bits+= bits - s->last_bits;
2989                         s->last_bits= bits;
2990                     }
2991
2992                     s->ptr_lastgob += current_packet_size;
2993                     s->first_slice_line=1;
2994                     s->resync_mb_x=mb_x;
2995                     s->resync_mb_y=mb_y;
2996                 }
2997             }
2998
2999             if(  (s->resync_mb_x   == s->mb_x)
3000                && s->resync_mb_y+1 == s->mb_y){
3001                 s->first_slice_line=0;
3002             }
3003
3004             s->mb_skipped=0;
3005             s->dquant=0; //only for QP_RD
3006
3007             update_mb_info(s, 0);
3008
3009             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
3010                 int next_block=0;
3011                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3012
3013                 copy_context_before_encode(&backup_s, s, -1);
3014                 backup_s.pb= s->pb;
3015                 best_s.data_partitioning= s->data_partitioning;
3016                 best_s.partitioned_frame= s->partitioned_frame;
3017                 if(s->data_partitioning){
3018                     backup_s.pb2= s->pb2;
3019                     backup_s.tex_pb= s->tex_pb;
3020                 }
3021
3022                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
3023                     s->mv_dir = MV_DIR_FORWARD;
3024                     s->mv_type = MV_TYPE_16X16;
3025                     s->mb_intra= 0;
3026                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3027                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3028                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
3029                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3030                 }
3031                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
3032                     s->mv_dir = MV_DIR_FORWARD;
3033                     s->mv_type = MV_TYPE_FIELD;
3034                     s->mb_intra= 0;
3035                     for(i=0; i<2; i++){
3036                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3037                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3038                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3039                     }
3040                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3041                                  &dmin, &next_block, 0, 0);
3042                 }
3043                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3044                     s->mv_dir = MV_DIR_FORWARD;
3045                     s->mv_type = MV_TYPE_16X16;
3046                     s->mb_intra= 0;
3047                     s->mv[0][0][0] = 0;
3048                     s->mv[0][0][1] = 0;
3049                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3050                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3051                 }
3052                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3053                     s->mv_dir = MV_DIR_FORWARD;
3054                     s->mv_type = MV_TYPE_8X8;
3055                     s->mb_intra= 0;
3056                     for(i=0; i<4; i++){
3057                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3058                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3059                     }
3060                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3061                                  &dmin, &next_block, 0, 0);
3062                 }
3063                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3064                     s->mv_dir = MV_DIR_FORWARD;
3065                     s->mv_type = MV_TYPE_16X16;
3066                     s->mb_intra= 0;
3067                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3068                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3069                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3070                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3071                 }
3072                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3073                     s->mv_dir = MV_DIR_BACKWARD;
3074                     s->mv_type = MV_TYPE_16X16;
3075                     s->mb_intra= 0;
3076                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3077                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3078                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3079                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3080                 }
3081                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3082                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3083                     s->mv_type = MV_TYPE_16X16;
3084                     s->mb_intra= 0;
3085                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3086                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3087                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3088                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3089                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3090                                  &dmin, &next_block, 0, 0);
3091                 }
3092                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3093                     s->mv_dir = MV_DIR_FORWARD;
3094                     s->mv_type = MV_TYPE_FIELD;
3095                     s->mb_intra= 0;
3096                     for(i=0; i<2; i++){
3097                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3098                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3099                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3100                     }
3101                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3102                                  &dmin, &next_block, 0, 0);
3103                 }
3104                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3105                     s->mv_dir = MV_DIR_BACKWARD;
3106                     s->mv_type = MV_TYPE_FIELD;
3107                     s->mb_intra= 0;
3108                     for(i=0; i<2; i++){
3109                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3110                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3111                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3112                     }
3113                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3114                                  &dmin, &next_block, 0, 0);
3115                 }
3116                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3117                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3118                     s->mv_type = MV_TYPE_FIELD;
3119                     s->mb_intra= 0;
3120                     for(dir=0; dir<2; dir++){
3121                         for(i=0; i<2; i++){
3122                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3123                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3124                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3125                         }
3126                     }
3127                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3128                                  &dmin, &next_block, 0, 0);
3129                 }
3130                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3131                     s->mv_dir = 0;
3132                     s->mv_type = MV_TYPE_16X16;
3133                     s->mb_intra= 1;
3134                     s->mv[0][0][0] = 0;
3135                     s->mv[0][0][1] = 0;
3136                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3137                                  &dmin, &next_block, 0, 0);
3138                     if(s->h263_pred || s->h263_aic){
3139                         if(best_s.mb_intra)
3140                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3141                         else
3142                             ff_clean_intra_table_entries(s); //old mode?
3143                     }
3144                 }
3145
3146                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3147                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3148                         const int last_qp= backup_s.qscale;
3149                         int qpi, qp, dc[6];
3150                         int16_t ac[6][16];
3151                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3152                         static const int dquant_tab[4]={-1,1,-2,2};
3153                         int storecoefs = s->mb_intra && s->dc_val[0];
3154
3155                         av_assert2(backup_s.dquant == 0);
3156
3157                         //FIXME intra
3158                         s->mv_dir= best_s.mv_dir;
3159                         s->mv_type = MV_TYPE_16X16;
3160                         s->mb_intra= best_s.mb_intra;
3161                         s->mv[0][0][0] = best_s.mv[0][0][0];
3162                         s->mv[0][0][1] = best_s.mv[0][0][1];
3163                         s->mv[1][0][0] = best_s.mv[1][0][0];
3164                         s->mv[1][0][1] = best_s.mv[1][0][1];
3165
3166                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3167                         for(; qpi<4; qpi++){
3168                             int dquant= dquant_tab[qpi];
3169                             qp= last_qp + dquant;
3170                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3171                                 continue;
3172                             backup_s.dquant= dquant;
3173                             if(storecoefs){
3174                                 for(i=0; i<6; i++){
3175                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3176                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3177                                 }
3178                             }
3179
3180                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3181                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3182                             if(best_s.qscale != qp){
3183                                 if(storecoefs){
3184                                     for(i=0; i<6; i++){
3185                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3186                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3187                                     }
3188                                 }
3189                             }
3190                         }
3191                     }
3192                 }
3193                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3194                     int mx= s->b_direct_mv_table[xy][0];
3195                     int my= s->b_direct_mv_table[xy][1];
3196
3197                     backup_s.dquant = 0;
3198                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3199                     s->mb_intra= 0;
3200                     ff_mpeg4_set_direct_mv(s, mx, my);
3201                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3202                                  &dmin, &next_block, mx, my);
3203                 }
3204                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3205                     backup_s.dquant = 0;
3206                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3207                     s->mb_intra= 0;
3208                     ff_mpeg4_set_direct_mv(s, 0, 0);
3209                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3210                                  &dmin, &next_block, 0, 0);
3211                 }
3212                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3213                     int coded=0;
3214                     for(i=0; i<6; i++)
3215                         coded |= s->block_last_index[i];
3216                     if(coded){
3217                         int mx,my;
3218                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3219                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3220                             mx=my=0; //FIXME find the one we actually used
3221                             ff_mpeg4_set_direct_mv(s, mx, my);
3222                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3223                             mx= s->mv[1][0][0];
3224                             my= s->mv[1][0][1];
3225                         }else{
3226                             mx= s->mv[0][0][0];
3227                             my= s->mv[0][0][1];
3228                         }
3229
3230                         s->mv_dir= best_s.mv_dir;
3231                         s->mv_type = best_s.mv_type;
3232                         s->mb_intra= 0;
3233 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3234                         s->mv[0][0][1] = best_s.mv[0][0][1];
3235                         s->mv[1][0][0] = best_s.mv[1][0][0];
3236                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3237                         backup_s.dquant= 0;
3238                         s->skipdct=1;
3239                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3240                                         &dmin, &next_block, mx, my);
3241                         s->skipdct=0;
3242                     }
3243                 }
3244
3245                 s->current_picture.qscale_table[xy] = best_s.qscale;
3246
3247                 copy_context_after_encode(s, &best_s, -1);
3248
3249                 pb_bits_count= put_bits_count(&s->pb);
3250                 flush_put_bits(&s->pb);
3251                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3252                 s->pb= backup_s.pb;
3253
3254                 if(s->data_partitioning){
3255                     pb2_bits_count= put_bits_count(&s->pb2);
3256                     flush_put_bits(&s->pb2);
3257                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3258                     s->pb2= backup_s.pb2;
3259
3260                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3261                     flush_put_bits(&s->tex_pb);
3262                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3263                     s->tex_pb= backup_s.tex_pb;
3264                 }
3265                 s->last_bits= put_bits_count(&s->pb);
3266
3267                 if (CONFIG_H263_ENCODER &&
3268                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3269                     ff_h263_update_motion_val(s);
3270
3271                 if(next_block==0){ //FIXME 16 vs linesize16
3272                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3273                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3274                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3275                 }
3276
3277                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3278                     ff_mpv_decode_mb(s, s->block);
3279             } else {
3280                 int motion_x = 0, motion_y = 0;
3281                 s->mv_type=MV_TYPE_16X16;
3282                 // only one MB-Type possible
3283
3284                 switch(mb_type){
3285                 case CANDIDATE_MB_TYPE_INTRA:
3286                     s->mv_dir = 0;
3287                     s->mb_intra= 1;
3288                     motion_x= s->mv[0][0][0] = 0;
3289                     motion_y= s->mv[0][0][1] = 0;
3290                     break;
3291                 case CANDIDATE_MB_TYPE_INTER:
3292                     s->mv_dir = MV_DIR_FORWARD;
3293                     s->mb_intra= 0;
3294                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3295                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3296                     break;
3297                 case CANDIDATE_MB_TYPE_INTER_I:
3298                     s->mv_dir = MV_DIR_FORWARD;
3299                     s->mv_type = MV_TYPE_FIELD;
3300                     s->mb_intra= 0;
3301                     for(i=0; i<2; i++){
3302                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3303                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3304                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3305                     }
3306                     break;
3307                 case CANDIDATE_MB_TYPE_INTER4V:
3308                     s->mv_dir = MV_DIR_FORWARD;
3309                     s->mv_type = MV_TYPE_8X8;
3310                     s->mb_intra= 0;
3311                     for(i=0; i<4; i++){
3312                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3313                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3314                     }
3315                     break;
3316                 case CANDIDATE_MB_TYPE_DIRECT:
3317                     if (CONFIG_MPEG4_ENCODER) {
3318                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3319                         s->mb_intra= 0;
3320                         motion_x=s->b_direct_mv_table[xy][0];
3321                         motion_y=s->b_direct_mv_table[xy][1];
3322                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3323                     }
3324                     break;
3325                 case CANDIDATE_MB_TYPE_DIRECT0:
3326                     if (CONFIG_MPEG4_ENCODER) {
3327                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3328                         s->mb_intra= 0;
3329                         ff_mpeg4_set_direct_mv(s, 0, 0);
3330                     }
3331                     break;
3332                 case CANDIDATE_MB_TYPE_BIDIR:
3333                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3334                     s->mb_intra= 0;
3335                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3336                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3337                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3338                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3339                     break;
3340                 case CANDIDATE_MB_TYPE_BACKWARD:
3341                     s->mv_dir = MV_DIR_BACKWARD;
3342                     s->mb_intra= 0;
3343                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3344                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3345                     break;
3346                 case CANDIDATE_MB_TYPE_FORWARD:
3347                     s->mv_dir = MV_DIR_FORWARD;
3348                     s->mb_intra= 0;
3349                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3350                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3351                     break;
3352                 case CANDIDATE_MB_TYPE_FORWARD_I:
3353                     s->mv_dir = MV_DIR_FORWARD;
3354                     s->mv_type = MV_TYPE_FIELD;
3355                     s->mb_intra= 0;
3356                     for(i=0; i<2; i++){
3357                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3358                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3359                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3360                     }
3361                     break;
3362                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3363                     s->mv_dir = MV_DIR_BACKWARD;
3364                     s->mv_type = MV_TYPE_FIELD;
3365                     s->mb_intra= 0;
3366                     for(i=0; i<2; i++){
3367                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3368                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3369                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3370                     }
3371                     break;
3372                 case CANDIDATE_MB_TYPE_BIDIR_I:
3373                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3374                     s->mv_type = MV_TYPE_FIELD;
3375                     s->mb_intra= 0;
3376                     for(dir=0; dir<2; dir++){
3377                         for(i=0; i<2; i++){
3378                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3379                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3380                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3381                         }
3382                     }
3383                     break;
3384                 default:
3385                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3386                 }
3387
3388                 encode_mb(s, motion_x, motion_y);
3389
3390                 // RAL: Update last macroblock type
3391                 s->last_mv_dir = s->mv_dir;
3392
3393                 if (CONFIG_H263_ENCODER &&
3394                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3395                     ff_h263_update_motion_val(s);
3396
3397                 ff_mpv_decode_mb(s, s->block);
3398             }
3399
3400             /* clean the MV table in IPS frames for direct mode in B frames */
3401             if(s->mb_intra /* && I,P,S_TYPE */){
3402                 s->p_mv_table[xy][0]=0;
3403                 s->p_mv_table[xy][1]=0;
3404             }
3405
3406             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3407                 int w= 16;
3408                 int h= 16;
3409
3410                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3411                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3412
3413                 s->current_picture.error[0] += sse(
3414                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3415                     s->dest[0], w, h, s->linesize);
3416                 s->current_picture.error[1] += sse(
3417                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3418                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3419                 s->current_picture.error[2] += sse(
3420                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3421                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3422             }
3423             if(s->loop_filter){
3424                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3425                     ff_h263_loop_filter(s);
3426             }
3427             ff_dlog(s->avctx, "MB %d %d bits\n",
3428                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3429         }
3430     }
3431
3432     //not beautiful here but we must write it before flushing so it has to be here
3433     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3434         ff_msmpeg4_encode_ext_header(s);
3435
3436     write_slice_end(s);
3437
3438     /* Send the last GOB if RTP */
3439     if (s->avctx->rtp_callback) {
3440         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3441         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3442         /* Call the RTP callback to send the last GOB */
3443         emms_c();
3444         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3445     }
3446
3447     return 0;
3448 }
3449
3450 #define MERGE(field) dst->field += src->field; src->field=0
3451 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3452     MERGE(me.scene_change_score);
3453     MERGE(me.mc_mb_var_sum_temp);
3454     MERGE(me.mb_var_sum_temp);
3455 }
3456
3457 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3458     int i;
3459
3460     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3461     MERGE(dct_count[1]);
3462     MERGE(mv_bits);
3463     MERGE(i_tex_bits);
3464     MERGE(p_tex_bits);
3465     MERGE(i_count);
3466     MERGE(f_count);
3467     MERGE(b_count);
3468     MERGE(skip_count);
3469     MERGE(misc_bits);
3470     MERGE(er.error_count);
3471     MERGE(padding_bug_score);
3472     MERGE(current_picture.error[0]);
3473     MERGE(current_picture.error[1]);
3474     MERGE(current_picture.error[2]);
3475
3476     if(dst->avctx->noise_reduction){
3477         for(i=0; i<64; i++){
3478             MERGE(dct_error_sum[0][i]);
3479             MERGE(dct_error_sum[1][i]);
3480         }
3481     }
3482
3483     assert(put_bits_count(&src->pb) % 8 ==0);
3484     assert(put_bits_count(&dst->pb) % 8 ==0);
3485     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3486     flush_put_bits(&dst->pb);
3487 }
3488
3489 static int estimate_qp(MpegEncContext *s, int dry_run){
3490     if (s->next_lambda){
3491         s->current_picture_ptr->f->quality =
3492         s->current_picture.f->quality = s->next_lambda;
3493         if(!dry_run) s->next_lambda= 0;
3494     } else if (!s->fixed_qscale) {
3495         s->current_picture_ptr->f->quality =
3496         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3497         if (s->current_picture.f->quality < 0)
3498             return -1;
3499     }
3500
3501     if(s->adaptive_quant){
3502         switch(s->codec_id){
3503         case AV_CODEC_ID_MPEG4:
3504             if (CONFIG_MPEG4_ENCODER)
3505                 ff_clean_mpeg4_qscales(s);
3506             break;
3507         case AV_CODEC_ID_H263:
3508         case AV_CODEC_ID_H263P:
3509         case AV_CODEC_ID_FLV1:
3510             if (CONFIG_H263_ENCODER)
3511                 ff_clean_h263_qscales(s);
3512             break;
3513         default:
3514             ff_init_qscale_tab(s);
3515         }
3516
3517         s->lambda= s->lambda_table[0];
3518         //FIXME broken
3519     }else
3520         s->lambda = s->current_picture.f->quality;
3521     update_qscale(s);
3522     return 0;
3523 }
3524
3525 /* must be called before writing the header */
3526 static void set_frame_distances(MpegEncContext * s){
3527     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3528     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3529
3530     if(s->pict_type==AV_PICTURE_TYPE_B){
3531         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3532         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3533     }else{
3534         s->pp_time= s->time - s->last_non_b_time;
3535         s->last_non_b_time= s->time;
3536         assert(s->picture_number==0 || s->pp_time > 0);
3537     }
3538 }
3539
3540 static int encode_picture(MpegEncContext *s, int picture_number)
3541 {
3542     int i, ret;
3543     int bits;
3544     int context_count = s->slice_context_count;
3545
3546     s->picture_number = picture_number;
3547
3548     /* Reset the average MB variance */
3549     s->me.mb_var_sum_temp    =
3550     s->me.mc_mb_var_sum_temp = 0;
3551
3552     /* we need to initialize some time vars before we can encode b-frames */
3553     // RAL: Condition added for MPEG1VIDEO
3554     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3555         set_frame_distances(s);
3556     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3557         ff_set_mpeg4_time(s);
3558
3559     s->me.scene_change_score=0;
3560
3561 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3562
3563     if(s->pict_type==AV_PICTURE_TYPE_I){
3564         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3565         else                        s->no_rounding=0;
3566     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3567         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3568             s->no_rounding ^= 1;
3569     }
3570
3571     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3572         if (estimate_qp(s,1) < 0)
3573             return -1;
3574         ff_get_2pass_fcode(s);
3575     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3576         if(s->pict_type==AV_PICTURE_TYPE_B)
3577             s->lambda= s->last_lambda_for[s->pict_type];
3578         else
3579             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3580         update_qscale(s);
3581     }
3582
3583     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3584         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3585         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3586         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3587         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3588     }
3589
3590     s->mb_intra=0; //for the rate distortion & bit compare functions
3591     for(i=1; i<context_count; i++){
3592         ret = ff_update_duplicate_context(s->thread_context[i], s);
3593         if (ret < 0)
3594             return ret;
3595     }
3596
3597     if(ff_init_me(s)<0)
3598         return -1;
3599
3600     /* Estimate motion for every MB */
3601     if(s->pict_type != AV_PICTURE_TYPE_I){
3602         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3603         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3604         if (s->pict_type != AV_PICTURE_TYPE_B) {
3605             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3606                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3607             }
3608         }
3609
3610         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3611     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3612         /* I-Frame */
3613         for(i=0; i<s->mb_stride*s->mb_height; i++)
3614             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3615
3616         if(!s->fixed_qscale){
3617             /* finding spatial complexity for I-frame rate control */
3618             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3619         }
3620     }
3621     for(i=1; i<context_count; i++){
3622         merge_context_after_me(s, s->thread_context[i]);
3623     }
3624     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3625     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3626     emms_c();
3627
3628     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3629         s->pict_type= AV_PICTURE_TYPE_I;
3630         for(i=0; i<s->mb_stride*s->mb_height; i++)
3631             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3632         if(s->msmpeg4_version >= 3)
3633             s->no_rounding=1;
3634         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3635                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3636     }
3637
3638     if(!s->umvplus){
3639         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3640             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3641
3642             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3643                 int a,b;
3644                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3645                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3646                 s->f_code= FFMAX3(s->f_code, a, b);
3647             }
3648
3649             ff_fix_long_p_mvs(s);
3650             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3651             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3652                 int j;
3653                 for(i=0; i<2; i++){
3654                     for(j=0; j<2; j++)
3655                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3656                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3657                 }
3658             }
3659         }
3660
3661         if(s->pict_type==AV_PICTURE_TYPE_B){
3662             int a, b;
3663
3664             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3665             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3666             s->f_code = FFMAX(a, b);
3667
3668             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3669             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3670             s->b_code = FFMAX(a, b);
3671
3672             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3673             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3674             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3675             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3676             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3677                 int dir, j;
3678                 for(dir=0; dir<2; dir++){
3679                     for(i=0; i<2; i++){
3680                         for(j=0; j<2; j++){
3681                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3682                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3683                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3684                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3685                         }
3686                     }
3687                 }
3688             }
3689         }
3690     }
3691
3692     if (estimate_qp(s, 0) < 0)
3693         return -1;
3694
3695     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3696         s->pict_type == AV_PICTURE_TYPE_I &&
3697         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3698         s->qscale= 3; //reduce clipping problems
3699
3700     if (s->out_format == FMT_MJPEG) {
3701         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3702         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3703
3704         if (s->avctx->intra_matrix) {
3705             chroma_matrix =
3706             luma_matrix = s->avctx->intra_matrix;
3707         }
3708         if (s->avctx->chroma_intra_matrix)
3709             chroma_matrix = s->avctx->chroma_intra_matrix;
3710
3711         /* for mjpeg, we do include qscale in the matrix */
3712         for(i=1;i<64;i++){
3713             int j = s->idsp.idct_permutation[i];
3714
3715             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3716             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3717         }
3718         s->y_dc_scale_table=
3719         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3720         s->chroma_intra_matrix[0] =
3721         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3722         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3723                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3724         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3725                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3726         s->qscale= 8;
3727     }
3728     if(s->codec_id == AV_CODEC_ID_AMV){
3729         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3730         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3731         for(i=1;i<64;i++){
3732             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3733
3734             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3735             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3736         }
3737         s->y_dc_scale_table= y;
3738         s->c_dc_scale_table= c;
3739         s->intra_matrix[0] = 13;
3740         s->chroma_intra_matrix[0] = 14;
3741         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3742                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3743         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3744                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3745         s->qscale= 8;
3746     }
3747
3748     //FIXME var duplication
3749     s->current_picture_ptr->f->key_frame =
3750     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3751     s->current_picture_ptr->f->pict_type =
3752     s->current_picture.f->pict_type = s->pict_type;
3753
3754     if (s->current_picture.f->key_frame)
3755         s->picture_in_gop_number=0;
3756
3757     s->mb_x = s->mb_y = 0;
3758     s->last_bits= put_bits_count(&s->pb);
3759     switch(s->out_format) {
3760     case FMT_MJPEG:
3761         if (CONFIG_MJPEG_ENCODER)
3762             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3763                                            s->intra_matrix, s->chroma_intra_matrix);
3764         break;
3765     case FMT_H261:
3766         if (CONFIG_H261_ENCODER)
3767             ff_h261_encode_picture_header(s, picture_number);
3768         break;
3769     case FMT_H263:
3770         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3771             ff_wmv2_encode_picture_header(s, picture_number);
3772         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3773             ff_msmpeg4_encode_picture_header(s, picture_number);
3774         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3775             ff_mpeg4_encode_picture_header(s, picture_number);
3776         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3777             ret = ff_rv10_encode_picture_header(s, picture_number);
3778             if (ret < 0)
3779                 return ret;
3780         }
3781         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3782             ff_rv20_encode_picture_header(s, picture_number);
3783         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3784             ff_flv_encode_picture_header(s, picture_number);
3785         else if (CONFIG_H263_ENCODER)
3786             ff_h263_encode_picture_header(s, picture_number);
3787         break;
3788     case FMT_MPEG1:
3789         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3790             ff_mpeg1_encode_picture_header(s, picture_number);
3791         break;
3792     default:
3793         av_assert0(0);
3794     }
3795     bits= put_bits_count(&s->pb);
3796     s->header_bits= bits - s->last_bits;
3797
3798     for(i=1; i<context_count; i++){
3799         update_duplicate_context_after_me(s->thread_context[i], s);
3800     }
3801     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3802     for(i=1; i<context_count; i++){
3803         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3804             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3805         merge_context_after_encode(s, s->thread_context[i]);
3806     }
3807     emms_c();
3808     return 0;
3809 }
3810
3811 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3812     const int intra= s->mb_intra;
3813     int i;
3814
3815     s->dct_count[intra]++;
3816
3817     for(i=0; i<64; i++){
3818         int level= block[i];
3819
3820         if(level){
3821             if(level>0){
3822                 s->dct_error_sum[intra][i] += level;
3823                 level -= s->dct_offset[intra][i];
3824                 if(level<0) level=0;
3825             }else{
3826                 s->dct_error_sum[intra][i] -= level;
3827                 level += s->dct_offset[intra][i];
3828                 if(level>0) level=0;
3829             }
3830             block[i]= level;
3831         }
3832     }
3833 }
3834
3835 static int dct_quantize_trellis_c(MpegEncContext *s,
3836                                   int16_t *block, int n,
3837                                   int qscale, int *overflow){
3838     const int *qmat;
3839     const uint16_t *matrix;
3840     const uint8_t *scantable= s->intra_scantable.scantable;
3841     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3842     int max=0;
3843     unsigned int threshold1, threshold2;
3844     int bias=0;
3845     int run_tab[65];
3846     int level_tab[65];
3847     int score_tab[65];
3848     int survivor[65];
3849     int survivor_count;
3850     int last_run=0;
3851     int last_level=0;
3852     int last_score= 0;
3853     int last_i;
3854     int coeff[2][64];
3855     int coeff_count[64];
3856     int qmul, qadd, start_i, last_non_zero, i, dc;
3857     const int esc_length= s->ac_esc_length;
3858     uint8_t * length;
3859     uint8_t * last_length;
3860     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3861     int mpeg2_qscale;
3862
3863     s->fdsp.fdct(block);
3864
3865     if(s->dct_error_sum)
3866         s->denoise_dct(s, block);
3867     qmul= qscale*16;
3868     qadd= ((qscale-1)|1)*8;
3869
3870     if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
3871     else                 mpeg2_qscale = qscale << 1;
3872
3873     if (s->mb_intra) {
3874         int q;
3875         if (!s->h263_aic) {
3876             if (n < 4)
3877                 q = s->y_dc_scale;
3878             else
3879                 q = s->c_dc_scale;
3880             q = q << 3;
3881         } else{
3882             /* For AIC we skip quant/dequant of INTRADC */
3883             q = 1 << 3;
3884             qadd=0;
3885         }
3886
3887         /* note: block[0] is assumed to be positive */
3888         block[0] = (block[0] + (q >> 1)) / q;
3889         start_i = 1;
3890         last_non_zero = 0;
3891         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3892         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3893         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3894             bias= 1<<(QMAT_SHIFT-1);
3895
3896         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3897             length     = s->intra_chroma_ac_vlc_length;
3898             last_length= s->intra_chroma_ac_vlc_last_length;
3899         } else {
3900             length     = s->intra_ac_vlc_length;
3901             last_length= s->intra_ac_vlc_last_length;
3902         }
3903     } else {
3904         start_i = 0;
3905         last_non_zero = -1;
3906         qmat = s->q_inter_matrix[qscale];
3907         matrix = s->inter_matrix;
3908         length     = s->inter_ac_vlc_length;
3909         last_length= s->inter_ac_vlc_last_length;
3910     }
3911     last_i= start_i;
3912
3913     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3914     threshold2= (threshold1<<1);
3915
3916     for(i=63; i>=start_i; i--) {
3917         const int j = scantable[i];
3918         int level = block[j] * qmat[j];
3919
3920         if(((unsigned)(level+threshold1))>threshold2){
3921             last_non_zero = i;
3922             break;
3923         }
3924     }
3925
3926     for(i=start_i; i<=last_non_zero; i++) {
3927         const int j = scantable[i];
3928         int level = block[j] * qmat[j];
3929
3930 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3931 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3932         if(((unsigned)(level+threshold1))>threshold2){
3933             if(level>0){
3934                 level= (bias + level)>>QMAT_SHIFT;
3935                 coeff[0][i]= level;
3936                 coeff[1][i]= level-1;
3937 //                coeff[2][k]= level-2;
3938             }else{
3939                 level= (bias - level)>>QMAT_SHIFT;
3940                 coeff[0][i]= -level;
3941                 coeff[1][i]= -level+1;
3942 //                coeff[2][k]= -level+2;
3943             }
3944             coeff_count[i]= FFMIN(level, 2);
3945             av_assert2(coeff_count[i]);
3946             max |=level;
3947         }else{
3948             coeff[0][i]= (level>>31)|1;
3949             coeff_count[i]= 1;
3950         }
3951     }
3952
3953     *overflow= s->max_qcoeff < max; //overflow might have happened
3954
3955     if(last_non_zero < start_i){
3956         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3957         return last_non_zero;
3958     }
3959
3960     score_tab[start_i]= 0;
3961     survivor[0]= start_i;
3962     survivor_count= 1;
3963
3964     for(i=start_i; i<=last_non_zero; i++){
3965         int level_index, j, zero_distortion;
3966         int dct_coeff= FFABS(block[ scantable[i] ]);
3967         int best_score=256*256*256*120;
3968
3969         if (s->fdsp.fdct == ff_fdct_ifast)
3970             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3971         zero_distortion= dct_coeff*dct_coeff;
3972
3973         for(level_index=0; level_index < coeff_count[i]; level_index++){
3974             int distortion;
3975             int level= coeff[level_index][i];
3976             const int alevel= FFABS(level);
3977             int unquant_coeff;
3978
3979             av_assert2(level);
3980
3981             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3982                 unquant_coeff= alevel*qmul + qadd;
3983             } else if(s->out_format == FMT_MJPEG) {
3984                 j = s->idsp.idct_permutation[scantable[i]];
3985                 unquant_coeff = alevel * matrix[j] * 8;
3986             }else{ //MPEG1
3987                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3988                 if(s->mb_intra){
3989                         unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
3990                         unquant_coeff =   (unquant_coeff - 1) | 1;
3991                 }else{
3992                         unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
3993                         unquant_coeff =   (unquant_coeff - 1) | 1;
3994                 }
3995                 unquant_coeff<<= 3;
3996             }
3997
3998             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3999             level+=64;
4000             if((level&(~127)) == 0){
4001                 for(j=survivor_count-1; j>=0; j--){
4002                     int run= i - survivor[j];
4003                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4004                     score += score_tab[i-run];
4005
4006                     if(score < best_score){
4007                         best_score= score;
4008                         run_tab[i+1]= run;
4009                         level_tab[i+1]= level-64;
4010                     }
4011                 }
4012
4013                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4014                     for(j=survivor_count-1; j>=0; j--){
4015                         int run= i - survivor[j];
4016                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4017                         score += score_tab[i-run];
4018                         if(score < last_score){
4019                             last_score= score;
4020                             last_run= run;
4021                             last_level= level-64;
4022                             last_i= i+1;
4023                         }
4024                     }
4025                 }
4026             }else{
4027                 distortion += esc_length*lambda;
4028                 for(j=survivor_count-1; j>=0; j--){
4029                     int run= i - survivor[j];
4030                     int score= distortion + score_tab[i-run];
4031
4032                     if(score < best_score){
4033                         best_score= score;
4034                         run_tab[i+1]= run;
4035                         level_tab[i+1]= level-64;
4036                     }
4037                 }
4038
4039                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4040                   for(j=survivor_count-1; j>=0; j--){
4041                         int run= i - survivor[j];
4042                         int score= distortion + score_tab[i-run];
4043                         if(score < last_score){
4044                             last_score= score;
4045                             last_run= run;
4046                             last_level= level-64;
4047                             last_i= i+1;
4048                         }
4049                     }
4050                 }
4051             }
4052         }
4053
4054         score_tab[i+1]= best_score;
4055
4056         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4057         if(last_non_zero <= 27){
4058             for(; survivor_count; survivor_count--){
4059                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4060                     break;
4061             }
4062         }else{
4063             for(; survivor_count; survivor_count--){
4064                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4065                     break;
4066             }
4067         }
4068
4069         survivor[ survivor_count++ ]= i+1;
4070     }
4071
4072     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4073         last_score= 256*256*256*120;
4074         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4075             int score= score_tab[i];
4076             if(i) score += lambda*2; //FIXME exacter?
4077
4078             if(score < last_score){
4079                 last_score= score;
4080                 last_i= i;
4081                 last_level= level_tab[i];
4082                 last_run= run_tab[i];
4083             }
4084         }
4085     }
4086
4087     s->coded_score[n] = last_score;
4088
4089     dc= FFABS(block[0]);
4090     last_non_zero= last_i - 1;
4091     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4092
4093     if(last_non_zero < start_i)
4094         return last_non_zero;
4095
4096     if(last_non_zero == 0 && start_i == 0){
4097         int best_level= 0;
4098         int best_score= dc * dc;
4099
4100         for(i=0; i<coeff_count[0]; i++){
4101             int level= coeff[i][0];
4102             int alevel= FFABS(level);
4103             int unquant_coeff, score, distortion;
4104
4105             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4106                     unquant_coeff= (alevel*qmul + qadd)>>3;
4107             }else{ //MPEG1
4108                     unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
4109                     unquant_coeff =   (unquant_coeff - 1) | 1;
4110             }
4111             unquant_coeff = (unquant_coeff + 4) >> 3;
4112             unquant_coeff<<= 3 + 3;
4113
4114             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4115             level+=64;
4116             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4117             else                    score= distortion + esc_length*lambda;
4118
4119             if(score < best_score){
4120                 best_score= score;
4121                 best_level= level - 64;
4122             }
4123         }
4124         block[0]= best_level;
4125         s->coded_score[n] = best_score - dc*dc;
4126         if(best_level == 0) return -1;
4127         else                return last_non_zero;
4128     }
4129
4130     i= last_i;
4131     av_assert2(last_level);
4132
4133     block[ perm_scantable[last_non_zero] ]= last_level;
4134     i -= last_run + 1;
4135
4136     for(; i>start_i; i -= run_tab[i] + 1){
4137         block[ perm_scantable[i-1] ]= level_tab[i];
4138     }
4139
4140     return last_non_zero;
4141 }
4142
4143 //#define REFINE_STATS 1
4144 static int16_t basis[64][64];
4145
4146 static void build_basis(uint8_t *perm){
4147     int i, j, x, y;
4148     emms_c();
4149     for(i=0; i<8; i++){
4150         for(j=0; j<8; j++){
4151             for(y=0; y<8; y++){
4152                 for(x=0; x<8; x++){
4153                     double s= 0.25*(1<<BASIS_SHIFT);
4154                     int index= 8*i + j;
4155                     int perm_index= perm[index];
4156                     if(i==0) s*= sqrt(0.5);
4157                     if(j==0) s*= sqrt(0.5);
4158                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4159                 }
4160             }
4161         }
4162     }
4163 }
4164
4165 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4166                         int16_t *block, int16_t *weight, int16_t *orig,
4167                         int n, int qscale){
4168     int16_t rem[64];
4169     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4170     const uint8_t *scantable= s->intra_scantable.scantable;
4171     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4172 //    unsigned int threshold1, threshold2;
4173 //    int bias=0;
4174     int run_tab[65];
4175     int prev_run=0;
4176     int prev_level=0;
4177     int qmul, qadd, start_i, last_non_zero, i, dc;
4178     uint8_t * length;
4179     uint8_t * last_length;
4180     int lambda;
4181     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4182 #ifdef REFINE_STATS
4183 static int count=0;
4184 static int after_last=0;
4185 static int to_zero=0;
4186 static int from_zero=0;
4187 static int raise=0;
4188 static int lower=0;
4189 static int messed_sign=0;
4190 #endif
4191
4192     if(basis[0][0] == 0)
4193         build_basis(s->idsp.idct_permutation);
4194
4195     qmul= qscale*2;
4196     qadd= (qscale-1)|1;
4197     if (s->mb_intra) {
4198         if (!s->h263_aic) {
4199             if (n < 4)
4200                 q = s->y_dc_scale;
4201             else
4202                 q = s->c_dc_scale;
4203         } else{
4204             /* For AIC we skip quant/dequant of INTRADC */
4205             q = 1;
4206             qadd=0;
4207         }
4208         q <<= RECON_SHIFT-3;
4209         /* note: block[0] is assumed to be positive */
4210         dc= block[0]*q;
4211 //        block[0] = (block[0] + (q >> 1)) / q;
4212         start_i = 1;
4213 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4214 //            bias= 1<<(QMAT_SHIFT-1);
4215         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4216             length     = s->intra_chroma_ac_vlc_length;
4217             last_length= s->intra_chroma_ac_vlc_last_length;
4218         } else {
4219             length     = s->intra_ac_vlc_length;
4220             last_length= s->intra_ac_vlc_last_length;
4221         }
4222     } else {
4223         dc= 0;
4224         start_i = 0;
4225         length     = s->inter_ac_vlc_length;
4226         last_length= s->inter_ac_vlc_last_length;
4227     }
4228     last_non_zero = s->block_last_index[n];
4229
4230 #ifdef REFINE_STATS
4231 {START_TIMER
4232 #endif
4233     dc += (1<<(RECON_SHIFT-1));
4234     for(i=0; i<64; i++){
4235         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4236     }
4237 #ifdef REFINE_STATS
4238 STOP_TIMER("memset rem[]")}
4239 #endif
4240     sum=0;
4241     for(i=0; i<64; i++){
4242         int one= 36;
4243         int qns=4;
4244         int w;
4245
4246         w= FFABS(weight[i]) + qns*one;
4247         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4248
4249         weight[i] = w;
4250 //        w=weight[i] = (63*qns + (w/2)) / w;
4251
4252         av_assert2(w>0);
4253         av_assert2(w<(1<<6));
4254         sum += w*w;
4255     }
4256     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4257 #ifdef REFINE_STATS
4258 {START_TIMER
4259 #endif
4260     run=0;
4261     rle_index=0;
4262     for(i=start_i; i<=last_non_zero; i++){
4263         int j= perm_scantable[i];
4264         const int level= block[j];
4265         int coeff;
4266
4267         if(level){
4268             if(level<0) coeff= qmul*level - qadd;
4269             else        coeff= qmul*level + qadd;
4270             run_tab[rle_index++]=run;
4271             run=0;
4272
4273             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4274         }else{
4275             run++;
4276         }
4277     }
4278 #ifdef REFINE_STATS
4279 if(last_non_zero>0){
4280 STOP_TIMER("init rem[]")
4281 }
4282 }
4283
4284 {START_TIMER
4285 #endif
4286     for(;;){
4287         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4288         int best_coeff=0;
4289         int best_change=0;
4290         int run2, best_unquant_change=0, analyze_gradient;
4291 #ifdef REFINE_STATS
4292 {START_TIMER
4293 #endif
4294         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4295
4296         if(analyze_gradient){
4297 #ifdef REFINE_STATS
4298 {START_TIMER
4299 #endif
4300             for(i=0; i<64; i++){
4301                 int w= weight[i];
4302
4303                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4304             }
4305 #ifdef REFINE_STATS
4306 STOP_TIMER("rem*w*w")}
4307 {START_TIMER
4308 #endif
4309             s->fdsp.fdct(d1);
4310 #ifdef REFINE_STATS
4311 STOP_TIMER("dct")}
4312 #endif
4313         }
4314
4315         if(start_i){
4316             const int level= block[0];
4317             int change, old_coeff;
4318
4319             av_assert2(s->mb_intra);
4320
4321             old_coeff= q*level;
4322
4323             for(change=-1; change<=1; change+=2){
4324                 int new_level= level + change;
4325                 int score, new_coeff;
4326
4327                 new_coeff= q*new_level;
4328                 if(new_coeff >= 2048 || new_coeff < 0)
4329                     continue;
4330
4331                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4332                                                   new_coeff - old_coeff);
4333                 if(score<best_score){
4334                     best_score= score;
4335                     best_coeff= 0;
4336                     best_change= change;
4337                     best_unquant_change= new_coeff - old_coeff;
4338                 }
4339             }
4340         }
4341
4342         run=0;
4343         rle_index=0;
4344         run2= run_tab[rle_index++];
4345         prev_level=0;
4346         prev_run=0;
4347
4348         for(i=start_i; i<64; i++){
4349             int j= perm_scantable[i];
4350             const int level= block[j];
4351             int change, old_coeff;
4352
4353             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4354                 break;
4355
4356             if(level){
4357                 if(level<0) old_coeff= qmul*level - qadd;
4358                 else        old_coeff= qmul*level + qadd;
4359                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4360             }else{
4361                 old_coeff=0;
4362                 run2--;
4363                 av_assert2(run2>=0 || i >= last_non_zero );
4364             }
4365
4366             for(change=-1; change<=1; change+=2){
4367                 int new_level= level + change;
4368                 int score, new_coeff, unquant_change;
4369
4370                 score=0;
4371                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4372                    continue;
4373
4374                 if(new_level){
4375                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4376                     else            new_coeff= qmul*new_level + qadd;
4377                     if(new_coeff >= 2048 || new_coeff <= -2048)
4378                         continue;
4379                     //FIXME check for overflow
4380
4381                     if(level){
4382                         if(level < 63 && level > -63){
4383                             if(i < last_non_zero)
4384                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4385                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4386                             else
4387                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4388                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4389                         }
4390                     }else{
4391                         av_assert2(FFABS(new_level)==1);
4392
4393                         if(analyze_gradient){
4394                             int g= d1[ scantable[i] ];
4395                             if(g && (g^new_level) >= 0)
4396                                 continue;
4397                         }
4398
4399                         if(i < last_non_zero){
4400                             int next_i= i + run2 + 1;
4401                             int next_level= block[ perm_scantable[next_i] ] + 64;
4402
4403                             if(next_level&(~127))
4404                                 next_level= 0;
4405
4406                             if(next_i < last_non_zero)
4407                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4408                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4409                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4410                             else
4411                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4412                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4413                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4414                         }else{
4415                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4416                             if(prev_level){
4417                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4418                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4419                             }
4420                         }
4421                     }
4422                 }else{
4423                     new_coeff=0;
4424                     av_assert2(FFABS(level)==1);
4425
4426                     if(i < last_non_zero){
4427                         int next_i= i + run2 + 1;
4428                         int next_level= block[ perm_scantable[next_i] ] + 64;
4429
4430                         if(next_level&(~127))
4431                             next_level= 0;
4432
4433                         if(next_i < last_non_zero)
4434                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4435                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4436                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4437                         else
4438                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4439                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4440                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4441                     }else{
4442                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4443                         if(prev_level){
4444                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4445                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4446                         }
4447                     }
4448                 }
4449
4450                 score *= lambda;
4451
4452                 unquant_change= new_coeff - old_coeff;
4453                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4454
4455                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4456                                                    unquant_change);
4457                 if(score<best_score){
4458                     best_score= score;
4459                     best_coeff= i;
4460                     best_change= change;
4461                     best_unquant_change= unquant_change;
4462                 }
4463             }
4464             if(level){
4465                 prev_level= level + 64;
4466                 if(prev_level&(~127))
4467                     prev_level= 0;
4468                 prev_run= run;
4469                 run=0;
4470             }else{
4471                 run++;
4472             }
4473         }
4474 #ifdef REFINE_STATS
4475 STOP_TIMER("iterative step")}
4476 #endif
4477
4478         if(best_change){
4479             int j= perm_scantable[ best_coeff ];
4480
4481             block[j] += best_change;
4482
4483             if(best_coeff > last_non_zero){
4484                 last_non_zero= best_coeff;
4485                 av_assert2(block[j]);
4486 #ifdef REFINE_STATS
4487 after_last++;
4488 #endif
4489             }else{
4490 #ifdef REFINE_STATS
4491 if(block[j]){
4492     if(block[j] - best_change){
4493         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4494             raise++;
4495         }else{
4496             lower++;
4497         }
4498     }else{
4499         from_zero++;
4500     }
4501 }else{
4502     to_zero++;
4503 }
4504 #endif
4505                 for(; last_non_zero>=start_i; last_non_zero--){
4506                     if(block[perm_scantable[last_non_zero]])
4507                         break;
4508                 }
4509             }
4510 #ifdef REFINE_STATS
4511 count++;
4512 if(256*256*256*64 % count == 0){
4513     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4514 }
4515 #endif
4516             run=0;
4517             rle_index=0;
4518             for(i=start_i; i<=last_non_zero; i++){
4519                 int j= perm_scantable[i];
4520                 const int level= block[j];
4521
4522                  if(level){
4523                      run_tab[rle_index++]=run;
4524                      run=0;
4525                  }else{
4526                      run++;
4527                  }
4528             }
4529
4530             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4531         }else{
4532             break;
4533         }
4534     }
4535 #ifdef REFINE_STATS
4536 if(last_non_zero>0){
4537 STOP_TIMER("iterative search")
4538 }
4539 }
4540 #endif
4541
4542     return last_non_zero;
4543 }
4544
4545 /**
4546  * Permute an 8x8 block according to permuatation.
4547  * @param block the block which will be permuted according to
4548  *              the given permutation vector
4549  * @param permutation the permutation vector
4550  * @param last the last non zero coefficient in scantable order, used to
4551  *             speed the permutation up
4552  * @param scantable the used scantable, this is only used to speed the
4553  *                  permutation up, the block is not (inverse) permutated
4554  *                  to scantable order!
4555  */
4556 static void block_permute(int16_t *block, uint8_t *permutation,
4557                           const uint8_t *scantable, int last)
4558 {
4559     int i;
4560     int16_t temp[64];
4561
4562     if (last <= 0)
4563         return;
4564     //FIXME it is ok but not clean and might fail for some permutations
4565     // if (permutation[1] == 1)
4566     // return;
4567
4568     for (i = 0; i <= last; i++) {
4569         const int j = scantable[i];
4570         temp[j] = block[j];
4571         block[j] = 0;
4572     }
4573
4574     for (i = 0; i <= last; i++) {
4575         const int j = scantable[i];
4576         const int perm_j = permutation[j];
4577         block[perm_j] = temp[j];
4578     }
4579 }
4580
4581 int ff_dct_quantize_c(MpegEncContext *s,
4582                         int16_t *block, int n,
4583                         int qscale, int *overflow)
4584 {
4585     int i, j, level, last_non_zero, q, start_i;
4586     const int *qmat;
4587     const uint8_t *scantable= s->intra_scantable.scantable;
4588     int bias;
4589     int max=0;
4590     unsigned int threshold1, threshold2;
4591
4592     s->fdsp.fdct(block);
4593
4594     if(s->dct_error_sum)
4595         s->denoise_dct(s, block);
4596
4597     if (s->mb_intra) {
4598         if (!s->h263_aic) {
4599             if (n < 4)
4600                 q = s->y_dc_scale;
4601             else
4602                 q = s->c_dc_scale;
4603             q = q << 3;
4604         } else
4605             /* For AIC we skip quant/dequant of INTRADC */
4606             q = 1 << 3;
4607
4608         /* note: block[0] is assumed to be positive */
4609         block[0] = (block[0] + (q >> 1)) / q;
4610         start_i = 1;
4611         last_non_zero = 0;
4612         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4613         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4614     } else {
4615         start_i = 0;
4616         last_non_zero = -1;
4617         qmat = s->q_inter_matrix[qscale];
4618         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4619     }
4620     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4621     threshold2= (threshold1<<1);
4622     for(i=63;i>=start_i;i--) {
4623         j = scantable[i];
4624         level = block[j] * qmat[j];
4625
4626         if(((unsigned)(level+threshold1))>threshold2){
4627             last_non_zero = i;
4628             break;
4629         }else{
4630             block[j]=0;
4631         }
4632     }
4633     for(i=start_i; i<=last_non_zero; i++) {
4634         j = scantable[i];
4635         level = block[j] * qmat[j];
4636
4637 //        if(   bias+level >= (1<<QMAT_SHIFT)
4638 //           || bias-level >= (1<<QMAT_SHIFT)){
4639         if(((unsigned)(level+threshold1))>threshold2){
4640             if(level>0){
4641                 level= (bias + level)>>QMAT_SHIFT;
4642                 block[j]= level;
4643             }else{
4644                 level= (bias - level)>>QMAT_SHIFT;
4645                 block[j]= -level;
4646             }
4647             max |=level;
4648         }else{
4649             block[j]=0;
4650         }
4651     }
4652     *overflow= s->max_qcoeff < max; //overflow might have happened
4653
4654     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4655     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4656         block_permute(block, s->idsp.idct_permutation,
4657                       scantable, last_non_zero);
4658
4659     return last_non_zero;
4660 }
4661
4662 #define OFFSET(x) offsetof(MpegEncContext, x)
4663 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4664 static const AVOption h263_options[] = {
4665     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4666     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4667     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4668     FF_MPV_COMMON_OPTS
4669     { NULL },
4670 };
4671
4672 static const AVClass h263_class = {
4673     .class_name = "H.263 encoder",
4674     .item_name  = av_default_item_name,
4675     .option     = h263_options,
4676     .version    = LIBAVUTIL_VERSION_INT,
4677 };
4678
4679 AVCodec ff_h263_encoder = {
4680     .name           = "h263",
4681     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4682     .type           = AVMEDIA_TYPE_VIDEO,
4683     .id             = AV_CODEC_ID_H263,
4684     .priv_data_size = sizeof(MpegEncContext),
4685     .init           = ff_mpv_encode_init,
4686     .encode2        = ff_mpv_encode_picture,
4687     .close          = ff_mpv_encode_end,
4688     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4689     .priv_class     = &h263_class,
4690 };
4691
4692 static const AVOption h263p_options[] = {
4693     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4694     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4695     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4696     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4697     FF_MPV_COMMON_OPTS
4698     { NULL },
4699 };
4700 static const AVClass h263p_class = {
4701     .class_name = "H.263p encoder",
4702     .item_name  = av_default_item_name,
4703     .option     = h263p_options,
4704     .version    = LIBAVUTIL_VERSION_INT,
4705 };
4706
4707 AVCodec ff_h263p_encoder = {
4708     .name           = "h263p",
4709     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4710     .type           = AVMEDIA_TYPE_VIDEO,
4711     .id             = AV_CODEC_ID_H263P,
4712     .priv_data_size = sizeof(MpegEncContext),
4713     .init           = ff_mpv_encode_init,
4714     .encode2        = ff_mpv_encode_picture,
4715     .close          = ff_mpv_encode_end,
4716     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4717     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4718     .priv_class     = &h263p_class,
4719 };
4720
4721 static const AVClass msmpeg4v2_class = {
4722     .class_name = "msmpeg4v2 encoder",
4723     .item_name  = av_default_item_name,
4724     .option     = ff_mpv_generic_options,
4725     .version    = LIBAVUTIL_VERSION_INT,
4726 };
4727
4728 AVCodec ff_msmpeg4v2_encoder = {
4729     .name           = "msmpeg4v2",
4730     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4731     .type           = AVMEDIA_TYPE_VIDEO,
4732     .id             = AV_CODEC_ID_MSMPEG4V2,
4733     .priv_data_size = sizeof(MpegEncContext),
4734     .init           = ff_mpv_encode_init,
4735     .encode2        = ff_mpv_encode_picture,
4736     .close          = ff_mpv_encode_end,
4737     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4738     .priv_class     = &msmpeg4v2_class,
4739 };
4740
4741 static const AVClass msmpeg4v3_class = {
4742     .class_name = "msmpeg4v3 encoder",
4743     .item_name  = av_default_item_name,
4744     .option     = ff_mpv_generic_options,
4745     .version    = LIBAVUTIL_VERSION_INT,
4746 };
4747
4748 AVCodec ff_msmpeg4v3_encoder = {
4749     .name           = "msmpeg4",
4750     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4751     .type           = AVMEDIA_TYPE_VIDEO,
4752     .id             = AV_CODEC_ID_MSMPEG4V3,
4753     .priv_data_size = sizeof(MpegEncContext),
4754     .init           = ff_mpv_encode_init,
4755     .encode2        = ff_mpv_encode_picture,
4756     .close          = ff_mpv_encode_end,
4757     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4758     .priv_class     = &msmpeg4v3_class,
4759 };
4760
4761 static const AVClass wmv1_class = {
4762     .class_name = "wmv1 encoder",
4763     .item_name  = av_default_item_name,
4764     .option     = ff_mpv_generic_options,
4765     .version    = LIBAVUTIL_VERSION_INT,
4766 };
4767
4768 AVCodec ff_wmv1_encoder = {
4769     .name           = "wmv1",
4770     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4771     .type           = AVMEDIA_TYPE_VIDEO,
4772     .id             = AV_CODEC_ID_WMV1,
4773     .priv_data_size = sizeof(MpegEncContext),
4774     .init           = ff_mpv_encode_init,
4775     .encode2        = ff_mpv_encode_picture,
4776     .close          = ff_mpv_encode_end,
4777     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4778     .priv_class     = &wmv1_class,
4779 };