]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
avcodec/pngenc: Check that there is at least 1 frame
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /*
26  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
27  */
28
29 /**
30  * @file
31  * The simplest mpeg encoder (well, it was the simplest!).
32  */
33
34 #include <stdint.h>
35
36 #include "libavutil/internal.h"
37 #include "libavutil/intmath.h"
38 #include "libavutil/mathematics.h"
39 #include "libavutil/pixdesc.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/timer.h"
42 #include "avcodec.h"
43 #include "dct.h"
44 #include "idctdsp.h"
45 #include "mpeg12.h"
46 #include "mpegvideo.h"
47 #include "mpegvideodata.h"
48 #include "h261.h"
49 #include "h263.h"
50 #include "h263data.h"
51 #include "mjpegenc_common.h"
52 #include "mathops.h"
53 #include "mpegutils.h"
54 #include "mjpegenc.h"
55 #include "msmpeg4.h"
56 #include "pixblockdsp.h"
57 #include "qpeldsp.h"
58 #include "faandct.h"
59 #include "thread.h"
60 #include "aandcttab.h"
61 #include "flv.h"
62 #include "mpeg4video.h"
63 #include "internal.h"
64 #include "bytestream.h"
65 #include "wmv2.h"
66 #include "rv10.h"
67 #include <limits.h>
68 #include "sp5x.h"
69
70 #define QUANT_BIAS_SHIFT 8
71
72 #define QMAT_SHIFT_MMX 16
73 #define QMAT_SHIFT 21
74
75 static int encode_picture(MpegEncContext *s, int picture_number);
76 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
77 static int sse_mb(MpegEncContext *s);
78 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
79 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
80
81 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
82 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
83
84 const AVOption ff_mpv_generic_options[] = {
85     FF_MPV_COMMON_OPTS
86     { NULL },
87 };
88
89 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
90                        uint16_t (*qmat16)[2][64],
91                        const uint16_t *quant_matrix,
92                        int bias, int qmin, int qmax, int intra)
93 {
94     FDCTDSPContext *fdsp = &s->fdsp;
95     int qscale;
96     int shift = 0;
97
98     for (qscale = qmin; qscale <= qmax; qscale++) {
99         int i;
100         int qscale2;
101
102         if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
103         else                 qscale2 = qscale << 1;
104
105         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
106 #if CONFIG_FAANDCT
107             fdsp->fdct == ff_faandct            ||
108 #endif /* CONFIG_FAANDCT */
109             fdsp->fdct == ff_jpeg_fdct_islow_10) {
110             for (i = 0; i < 64; i++) {
111                 const int j = s->idsp.idct_permutation[i];
112                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
113                 /* 16 <= qscale * quant_matrix[i] <= 7905
114                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
115                  *             19952 <=              x  <= 249205026
116                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
117                  *           3444240 >= (1 << 36) / (x) >= 275 */
118
119                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
120             }
121         } else if (fdsp->fdct == ff_fdct_ifast) {
122             for (i = 0; i < 64; i++) {
123                 const int j = s->idsp.idct_permutation[i];
124                 int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
125                 /* 16 <= qscale * quant_matrix[i] <= 7905
126                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
127                  *             19952 <=              x  <= 249205026
128                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
129                  *           3444240 >= (1 << 36) / (x) >= 275 */
130
131                 qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
132             }
133         } else {
134             for (i = 0; i < 64; i++) {
135                 const int j = s->idsp.idct_permutation[i];
136                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
137                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
138                  * Assume x = qscale * quant_matrix[i]
139                  * So             16 <=              x  <= 7905
140                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
141                  * so          32768 >= (1 << 19) / (x) >= 67 */
142                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
143                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
144                 //                    (qscale * quant_matrix[i]);
145                 qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
146
147                 if (qmat16[qscale][0][i] == 0 ||
148                     qmat16[qscale][0][i] == 128 * 256)
149                     qmat16[qscale][0][i] = 128 * 256 - 1;
150                 qmat16[qscale][1][i] =
151                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
152                                 qmat16[qscale][0][i]);
153             }
154         }
155
156         for (i = intra; i < 64; i++) {
157             int64_t max = 8191;
158             if (fdsp->fdct == ff_fdct_ifast) {
159                 max = (8191LL * ff_aanscales[i]) >> 14;
160             }
161             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
162                 shift++;
163             }
164         }
165     }
166     if (shift) {
167         av_log(NULL, AV_LOG_INFO,
168                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
169                QMAT_SHIFT - shift);
170     }
171 }
172
173 static inline void update_qscale(MpegEncContext *s)
174 {
175     if (s->q_scale_type == 1 && 0) {
176         int i;
177         int bestdiff=INT_MAX;
178         int best = 1;
179
180         for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
181             int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
182             if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
183                 (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
184                 continue;
185             if (diff < bestdiff) {
186                 bestdiff = diff;
187                 best = i;
188             }
189         }
190         s->qscale = best;
191     } else {
192         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
193                     (FF_LAMBDA_SHIFT + 7);
194         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
195     }
196
197     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
198                  FF_LAMBDA_SHIFT;
199 }
200
201 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
202 {
203     int i;
204
205     if (matrix) {
206         put_bits(pb, 1, 1);
207         for (i = 0; i < 64; i++) {
208             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
209         }
210     } else
211         put_bits(pb, 1, 0);
212 }
213
214 /**
215  * init s->current_picture.qscale_table from s->lambda_table
216  */
217 void ff_init_qscale_tab(MpegEncContext *s)
218 {
219     int8_t * const qscale_table = s->current_picture.qscale_table;
220     int i;
221
222     for (i = 0; i < s->mb_num; i++) {
223         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
224         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
225         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
226                                                   s->avctx->qmax);
227     }
228 }
229
230 static void update_duplicate_context_after_me(MpegEncContext *dst,
231                                               MpegEncContext *src)
232 {
233 #define COPY(a) dst->a= src->a
234     COPY(pict_type);
235     COPY(current_picture);
236     COPY(f_code);
237     COPY(b_code);
238     COPY(qscale);
239     COPY(lambda);
240     COPY(lambda2);
241     COPY(picture_in_gop_number);
242     COPY(gop_picture_number);
243     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
244     COPY(progressive_frame);    // FIXME don't set in encode_header
245     COPY(partitioned_frame);    // FIXME don't set in encode_header
246 #undef COPY
247 }
248
249 /**
250  * Set the given MpegEncContext to defaults for encoding.
251  * the changed fields will not depend upon the prior state of the MpegEncContext.
252  */
253 static void mpv_encode_defaults(MpegEncContext *s)
254 {
255     int i;
256     ff_mpv_common_defaults(s);
257
258     for (i = -16; i < 16; i++) {
259         default_fcode_tab[i + MAX_MV] = 1;
260     }
261     s->me.mv_penalty = default_mv_penalty;
262     s->fcode_tab     = default_fcode_tab;
263
264     s->input_picture_number  = 0;
265     s->picture_in_gop_number = 0;
266 }
267
268 av_cold int ff_dct_encode_init(MpegEncContext *s) {
269     if (ARCH_X86)
270         ff_dct_encode_init_x86(s);
271
272     if (CONFIG_H263_ENCODER)
273         ff_h263dsp_init(&s->h263dsp);
274     if (!s->dct_quantize)
275         s->dct_quantize = ff_dct_quantize_c;
276     if (!s->denoise_dct)
277         s->denoise_dct  = denoise_dct_c;
278     s->fast_dct_quantize = s->dct_quantize;
279     if (s->avctx->trellis)
280         s->dct_quantize  = dct_quantize_trellis_c;
281
282     return 0;
283 }
284
285 /* init video encoder */
286 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
287 {
288     MpegEncContext *s = avctx->priv_data;
289     int i, ret, format_supported;
290
291     mpv_encode_defaults(s);
292
293     switch (avctx->codec_id) {
294     case AV_CODEC_ID_MPEG2VIDEO:
295         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
296             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
297             av_log(avctx, AV_LOG_ERROR,
298                    "only YUV420 and YUV422 are supported\n");
299             return -1;
300         }
301         break;
302     case AV_CODEC_ID_MJPEG:
303     case AV_CODEC_ID_AMV:
304         format_supported = 0;
305         /* JPEG color space */
306         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
307             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
308             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
309             (avctx->color_range == AVCOL_RANGE_JPEG &&
310              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
311               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
312               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
313             format_supported = 1;
314         /* MPEG color space */
315         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
316                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
317                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
318                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
319             format_supported = 1;
320
321         if (!format_supported) {
322             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
323             return -1;
324         }
325         break;
326     default:
327         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
328             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
329             return -1;
330         }
331     }
332
333     switch (avctx->pix_fmt) {
334     case AV_PIX_FMT_YUVJ444P:
335     case AV_PIX_FMT_YUV444P:
336         s->chroma_format = CHROMA_444;
337         break;
338     case AV_PIX_FMT_YUVJ422P:
339     case AV_PIX_FMT_YUV422P:
340         s->chroma_format = CHROMA_422;
341         break;
342     case AV_PIX_FMT_YUVJ420P:
343     case AV_PIX_FMT_YUV420P:
344     default:
345         s->chroma_format = CHROMA_420;
346         break;
347     }
348
349     s->bit_rate = avctx->bit_rate;
350     s->width    = avctx->width;
351     s->height   = avctx->height;
352     if (avctx->gop_size > 600 &&
353         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
354         av_log(avctx, AV_LOG_WARNING,
355                "keyframe interval too large!, reducing it from %d to %d\n",
356                avctx->gop_size, 600);
357         avctx->gop_size = 600;
358     }
359     s->gop_size     = avctx->gop_size;
360     s->avctx        = avctx;
361     if (avctx->max_b_frames > MAX_B_FRAMES) {
362         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
363                "is %d.\n", MAX_B_FRAMES);
364         avctx->max_b_frames = MAX_B_FRAMES;
365     }
366     s->max_b_frames = avctx->max_b_frames;
367     s->codec_id     = avctx->codec->id;
368     s->strict_std_compliance = avctx->strict_std_compliance;
369     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
370     s->mpeg_quant         = avctx->mpeg_quant;
371     s->rtp_mode           = !!avctx->rtp_payload_size;
372     s->intra_dc_precision = avctx->intra_dc_precision;
373
374     // workaround some differences between how applications specify dc precision
375     if (s->intra_dc_precision < 0) {
376         s->intra_dc_precision += 8;
377     } else if (s->intra_dc_precision >= 8)
378         s->intra_dc_precision -= 8;
379
380     if (s->intra_dc_precision < 0) {
381         av_log(avctx, AV_LOG_ERROR,
382                 "intra dc precision must be positive, note some applications use"
383                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
384         return AVERROR(EINVAL);
385     }
386
387     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
388         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
389         return AVERROR(EINVAL);
390     }
391     s->user_specified_pts = AV_NOPTS_VALUE;
392
393     if (s->gop_size <= 1) {
394         s->intra_only = 1;
395         s->gop_size   = 12;
396     } else {
397         s->intra_only = 0;
398     }
399
400 #if FF_API_MOTION_EST
401 FF_DISABLE_DEPRECATION_WARNINGS
402     s->me_method = avctx->me_method;
403 FF_ENABLE_DEPRECATION_WARNINGS
404 #endif
405
406     /* Fixed QSCALE */
407     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
408
409 #if FF_API_MPV_OPT
410     FF_DISABLE_DEPRECATION_WARNINGS
411     if (avctx->border_masking != 0.0)
412         s->border_masking = avctx->border_masking;
413     FF_ENABLE_DEPRECATION_WARNINGS
414 #endif
415
416     s->adaptive_quant = (s->avctx->lumi_masking ||
417                          s->avctx->dark_masking ||
418                          s->avctx->temporal_cplx_masking ||
419                          s->avctx->spatial_cplx_masking  ||
420                          s->avctx->p_masking      ||
421                          s->border_masking ||
422                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
423                         !s->fixed_qscale;
424
425     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
426
427     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
428         switch(avctx->codec_id) {
429         case AV_CODEC_ID_MPEG1VIDEO:
430         case AV_CODEC_ID_MPEG2VIDEO:
431             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
432             break;
433         case AV_CODEC_ID_MPEG4:
434         case AV_CODEC_ID_MSMPEG4V1:
435         case AV_CODEC_ID_MSMPEG4V2:
436         case AV_CODEC_ID_MSMPEG4V3:
437             if       (avctx->rc_max_rate >= 15000000) {
438                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
439             } else if(avctx->rc_max_rate >=  2000000) {
440                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
441             } else if(avctx->rc_max_rate >=   384000) {
442                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
443             } else
444                 avctx->rc_buffer_size = 40;
445             avctx->rc_buffer_size *= 16384;
446             break;
447         }
448         if (avctx->rc_buffer_size) {
449             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
450         }
451     }
452
453     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
454         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
455         return -1;
456     }
457
458     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
459         av_log(avctx, AV_LOG_INFO,
460                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
461     }
462
463     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
464         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
465         return -1;
466     }
467
468     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
469         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
470         return -1;
471     }
472
473     if (avctx->rc_max_rate &&
474         avctx->rc_max_rate == avctx->bit_rate &&
475         avctx->rc_max_rate != avctx->rc_min_rate) {
476         av_log(avctx, AV_LOG_INFO,
477                "impossible bitrate constraints, this will fail\n");
478     }
479
480     if (avctx->rc_buffer_size &&
481         avctx->bit_rate * (int64_t)avctx->time_base.num >
482             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
483         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
484         return -1;
485     }
486
487     if (!s->fixed_qscale &&
488         avctx->bit_rate * av_q2d(avctx->time_base) >
489             avctx->bit_rate_tolerance) {
490         av_log(avctx, AV_LOG_WARNING,
491                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, (int64_t)avctx->bit_rate);
492         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
493     }
494
495     if (s->avctx->rc_max_rate &&
496         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
497         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
498          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
499         90000LL * (avctx->rc_buffer_size - 1) >
500             s->avctx->rc_max_rate * 0xFFFFLL) {
501         av_log(avctx, AV_LOG_INFO,
502                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
503                "specified vbv buffer is too large for the given bitrate!\n");
504     }
505
506     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
507         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
508         s->codec_id != AV_CODEC_ID_FLV1) {
509         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
510         return -1;
511     }
512
513     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
514         av_log(avctx, AV_LOG_ERROR,
515                "OBMC is only supported with simple mb decision\n");
516         return -1;
517     }
518
519     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
520         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
521         return -1;
522     }
523
524     if (s->max_b_frames                    &&
525         s->codec_id != AV_CODEC_ID_MPEG4      &&
526         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
527         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
528         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
529         return -1;
530     }
531     if (s->max_b_frames < 0) {
532         av_log(avctx, AV_LOG_ERROR,
533                "max b frames must be 0 or positive for mpegvideo based encoders\n");
534         return -1;
535     }
536
537     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
538          s->codec_id == AV_CODEC_ID_H263  ||
539          s->codec_id == AV_CODEC_ID_H263P) &&
540         (avctx->sample_aspect_ratio.num > 255 ||
541          avctx->sample_aspect_ratio.den > 255)) {
542         av_log(avctx, AV_LOG_WARNING,
543                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
544                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
545         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
546                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
547     }
548
549     if ((s->codec_id == AV_CODEC_ID_H263  ||
550          s->codec_id == AV_CODEC_ID_H263P) &&
551         (avctx->width  > 2048 ||
552          avctx->height > 1152 )) {
553         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
554         return -1;
555     }
556     if ((s->codec_id == AV_CODEC_ID_H263  ||
557          s->codec_id == AV_CODEC_ID_H263P) &&
558         ((avctx->width &3) ||
559          (avctx->height&3) )) {
560         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
561         return -1;
562     }
563
564     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
565         (avctx->width  > 4095 ||
566          avctx->height > 4095 )) {
567         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
568         return -1;
569     }
570
571     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
572         (avctx->width  > 16383 ||
573          avctx->height > 16383 )) {
574         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
575         return -1;
576     }
577
578     if (s->codec_id == AV_CODEC_ID_RV10 &&
579         (avctx->width &15 ||
580          avctx->height&15 )) {
581         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
582         return AVERROR(EINVAL);
583     }
584
585     if (s->codec_id == AV_CODEC_ID_RV20 &&
586         (avctx->width &3 ||
587          avctx->height&3 )) {
588         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
589         return AVERROR(EINVAL);
590     }
591
592     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
593          s->codec_id == AV_CODEC_ID_WMV2) &&
594          avctx->width & 1) {
595          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
596          return -1;
597     }
598
599     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
600         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
601         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
602         return -1;
603     }
604
605     // FIXME mpeg2 uses that too
606     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
607                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
608         av_log(avctx, AV_LOG_ERROR,
609                "mpeg2 style quantization not supported by codec\n");
610         return -1;
611     }
612
613     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
614         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
615         return -1;
616     }
617
618     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
619         s->avctx->mb_decision != FF_MB_DECISION_RD) {
620         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
621         return -1;
622     }
623
624     if (s->avctx->scenechange_threshold < 1000000000 &&
625         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
626         av_log(avctx, AV_LOG_ERROR,
627                "closed gop with scene change detection are not supported yet, "
628                "set threshold to 1000000000\n");
629         return -1;
630     }
631
632     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
633         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
634             av_log(avctx, AV_LOG_ERROR,
635                   "low delay forcing is only available for mpeg2\n");
636             return -1;
637         }
638         if (s->max_b_frames != 0) {
639             av_log(avctx, AV_LOG_ERROR,
640                    "b frames cannot be used with low delay\n");
641             return -1;
642         }
643     }
644
645     if (s->q_scale_type == 1) {
646         if (avctx->qmax > 28) {
647             av_log(avctx, AV_LOG_ERROR,
648                    "non linear quant only supports qmax <= 28 currently\n");
649             return -1;
650         }
651     }
652
653     if (s->avctx->thread_count > 1         &&
654         s->codec_id != AV_CODEC_ID_MPEG4      &&
655         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
656         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
657         s->codec_id != AV_CODEC_ID_MJPEG      &&
658         (s->codec_id != AV_CODEC_ID_H263P)) {
659         av_log(avctx, AV_LOG_ERROR,
660                "multi threaded encoding not supported by codec\n");
661         return -1;
662     }
663
664     if (s->avctx->thread_count < 1) {
665         av_log(avctx, AV_LOG_ERROR,
666                "automatic thread number detection not supported by codec, "
667                "patch welcome\n");
668         return -1;
669     }
670
671     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
672         s->rtp_mode = 1;
673
674     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
675         s->h263_slice_structured = 1;
676
677     if (!avctx->time_base.den || !avctx->time_base.num) {
678         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
679         return -1;
680     }
681
682     if (avctx->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
683         av_log(avctx, AV_LOG_INFO,
684                "notice: b_frame_strategy only affects the first pass\n");
685         avctx->b_frame_strategy = 0;
686     }
687
688     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
689     if (i > 1) {
690         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
691         avctx->time_base.den /= i;
692         avctx->time_base.num /= i;
693         //return -1;
694     }
695
696     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
697         // (a + x * 3 / 8) / x
698         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
699         s->inter_quant_bias = 0;
700     } else {
701         s->intra_quant_bias = 0;
702         // (a - x / 4) / x
703         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
704     }
705
706     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
707         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
708         return AVERROR(EINVAL);
709     }
710
711 #if FF_API_QUANT_BIAS
712 FF_DISABLE_DEPRECATION_WARNINGS
713     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
714         s->intra_quant_bias = avctx->intra_quant_bias;
715     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
716         s->inter_quant_bias = avctx->inter_quant_bias;
717 FF_ENABLE_DEPRECATION_WARNINGS
718 #endif
719
720     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
721
722     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
723         s->avctx->time_base.den > (1 << 16) - 1) {
724         av_log(avctx, AV_LOG_ERROR,
725                "timebase %d/%d not supported by MPEG 4 standard, "
726                "the maximum admitted value for the timebase denominator "
727                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
728                (1 << 16) - 1);
729         return -1;
730     }
731     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
732
733     switch (avctx->codec->id) {
734     case AV_CODEC_ID_MPEG1VIDEO:
735         s->out_format = FMT_MPEG1;
736         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
737         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
738         break;
739     case AV_CODEC_ID_MPEG2VIDEO:
740         s->out_format = FMT_MPEG1;
741         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
742         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
743         s->rtp_mode   = 1;
744         break;
745     case AV_CODEC_ID_MJPEG:
746     case AV_CODEC_ID_AMV:
747         s->out_format = FMT_MJPEG;
748         s->intra_only = 1; /* force intra only for jpeg */
749         if (!CONFIG_MJPEG_ENCODER ||
750             ff_mjpeg_encode_init(s) < 0)
751             return -1;
752         avctx->delay = 0;
753         s->low_delay = 1;
754         break;
755     case AV_CODEC_ID_H261:
756         if (!CONFIG_H261_ENCODER)
757             return -1;
758         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
759             av_log(avctx, AV_LOG_ERROR,
760                    "The specified picture size of %dx%d is not valid for the "
761                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
762                     s->width, s->height);
763             return -1;
764         }
765         s->out_format = FMT_H261;
766         avctx->delay  = 0;
767         s->low_delay  = 1;
768         s->rtp_mode   = 0; /* Sliced encoding not supported */
769         break;
770     case AV_CODEC_ID_H263:
771         if (!CONFIG_H263_ENCODER)
772             return -1;
773         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
774                              s->width, s->height) == 8) {
775             av_log(avctx, AV_LOG_ERROR,
776                    "The specified picture size of %dx%d is not valid for "
777                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
778                    "352x288, 704x576, and 1408x1152. "
779                    "Try H.263+.\n", s->width, s->height);
780             return -1;
781         }
782         s->out_format = FMT_H263;
783         avctx->delay  = 0;
784         s->low_delay  = 1;
785         break;
786     case AV_CODEC_ID_H263P:
787         s->out_format = FMT_H263;
788         s->h263_plus  = 1;
789         /* Fx */
790         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
791         s->modified_quant  = s->h263_aic;
792         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
793         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
794
795         /* /Fx */
796         /* These are just to be sure */
797         avctx->delay = 0;
798         s->low_delay = 1;
799         break;
800     case AV_CODEC_ID_FLV1:
801         s->out_format      = FMT_H263;
802         s->h263_flv        = 2; /* format = 1; 11-bit codes */
803         s->unrestricted_mv = 1;
804         s->rtp_mode  = 0; /* don't allow GOB */
805         avctx->delay = 0;
806         s->low_delay = 1;
807         break;
808     case AV_CODEC_ID_RV10:
809         s->out_format = FMT_H263;
810         avctx->delay  = 0;
811         s->low_delay  = 1;
812         break;
813     case AV_CODEC_ID_RV20:
814         s->out_format      = FMT_H263;
815         avctx->delay       = 0;
816         s->low_delay       = 1;
817         s->modified_quant  = 1;
818         s->h263_aic        = 1;
819         s->h263_plus       = 1;
820         s->loop_filter     = 1;
821         s->unrestricted_mv = 0;
822         break;
823     case AV_CODEC_ID_MPEG4:
824         s->out_format      = FMT_H263;
825         s->h263_pred       = 1;
826         s->unrestricted_mv = 1;
827         s->low_delay       = s->max_b_frames ? 0 : 1;
828         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
829         break;
830     case AV_CODEC_ID_MSMPEG4V2:
831         s->out_format      = FMT_H263;
832         s->h263_pred       = 1;
833         s->unrestricted_mv = 1;
834         s->msmpeg4_version = 2;
835         avctx->delay       = 0;
836         s->low_delay       = 1;
837         break;
838     case AV_CODEC_ID_MSMPEG4V3:
839         s->out_format        = FMT_H263;
840         s->h263_pred         = 1;
841         s->unrestricted_mv   = 1;
842         s->msmpeg4_version   = 3;
843         s->flipflop_rounding = 1;
844         avctx->delay         = 0;
845         s->low_delay         = 1;
846         break;
847     case AV_CODEC_ID_WMV1:
848         s->out_format        = FMT_H263;
849         s->h263_pred         = 1;
850         s->unrestricted_mv   = 1;
851         s->msmpeg4_version   = 4;
852         s->flipflop_rounding = 1;
853         avctx->delay         = 0;
854         s->low_delay         = 1;
855         break;
856     case AV_CODEC_ID_WMV2:
857         s->out_format        = FMT_H263;
858         s->h263_pred         = 1;
859         s->unrestricted_mv   = 1;
860         s->msmpeg4_version   = 5;
861         s->flipflop_rounding = 1;
862         avctx->delay         = 0;
863         s->low_delay         = 1;
864         break;
865     default:
866         return -1;
867     }
868
869     avctx->has_b_frames = !s->low_delay;
870
871     s->encoding = 1;
872
873     s->progressive_frame    =
874     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
875                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
876                                 s->alternate_scan);
877
878     /* init */
879     ff_mpv_idct_init(s);
880     if (ff_mpv_common_init(s) < 0)
881         return -1;
882
883     ff_fdctdsp_init(&s->fdsp, avctx);
884     ff_me_cmp_init(&s->mecc, avctx);
885     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
886     ff_pixblockdsp_init(&s->pdsp, avctx);
887     ff_qpeldsp_init(&s->qdsp);
888
889     if (s->msmpeg4_version) {
890         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
891                           2 * 2 * (MAX_LEVEL + 1) *
892                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
893     }
894     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
895
896     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
897     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
898     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
899     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
900     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
901     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
902     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
903                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
904     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
905                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
906
907     if (s->avctx->noise_reduction) {
908         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
909                           2 * 64 * sizeof(uint16_t), fail);
910     }
911
912     ff_dct_encode_init(s);
913
914     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
915         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
916
917     s->quant_precision = 5;
918
919     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
920     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
921
922     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
923         ff_h261_encode_init(s);
924     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
925         ff_h263_encode_init(s);
926     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
927         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
928             return ret;
929     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
930         && s->out_format == FMT_MPEG1)
931         ff_mpeg1_encode_init(s);
932
933     /* init q matrix */
934     for (i = 0; i < 64; i++) {
935         int j = s->idsp.idct_permutation[i];
936         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
937             s->mpeg_quant) {
938             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
939             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
940         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
941             s->intra_matrix[j] =
942             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
943         } else {
944             /* mpeg1/2 */
945             s->chroma_intra_matrix[j] =
946             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
947             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
948         }
949         if (s->avctx->intra_matrix)
950             s->intra_matrix[j] = s->avctx->intra_matrix[i];
951         if (s->avctx->inter_matrix)
952             s->inter_matrix[j] = s->avctx->inter_matrix[i];
953     }
954
955     /* precompute matrix */
956     /* for mjpeg, we do include qscale in the matrix */
957     if (s->out_format != FMT_MJPEG) {
958         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
959                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
960                           31, 1);
961         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
962                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
963                           31, 0);
964     }
965
966     if (ff_rate_control_init(s) < 0)
967         return -1;
968
969 #if FF_API_ERROR_RATE
970     FF_DISABLE_DEPRECATION_WARNINGS
971     if (avctx->error_rate)
972         s->error_rate = avctx->error_rate;
973     FF_ENABLE_DEPRECATION_WARNINGS;
974 #endif
975
976 #if FF_API_NORMALIZE_AQP
977     FF_DISABLE_DEPRECATION_WARNINGS
978     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
979         s->mpv_flags |= FF_MPV_FLAG_NAQ;
980     FF_ENABLE_DEPRECATION_WARNINGS;
981 #endif
982
983 #if FF_API_MV0
984     FF_DISABLE_DEPRECATION_WARNINGS
985     if (avctx->flags & CODEC_FLAG_MV0)
986         s->mpv_flags |= FF_MPV_FLAG_MV0;
987     FF_ENABLE_DEPRECATION_WARNINGS
988 #endif
989
990 #if FF_API_MPV_OPT
991     FF_DISABLE_DEPRECATION_WARNINGS
992     if (avctx->rc_qsquish != 0.0)
993         s->rc_qsquish = avctx->rc_qsquish;
994     if (avctx->rc_qmod_amp != 0.0)
995         s->rc_qmod_amp = avctx->rc_qmod_amp;
996     if (avctx->rc_qmod_freq)
997         s->rc_qmod_freq = avctx->rc_qmod_freq;
998     if (avctx->rc_buffer_aggressivity != 1.0)
999         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
1000     if (avctx->rc_initial_cplx != 0.0)
1001         s->rc_initial_cplx = avctx->rc_initial_cplx;
1002     if (avctx->lmin)
1003         s->lmin = avctx->lmin;
1004     if (avctx->lmax)
1005         s->lmax = avctx->lmax;
1006
1007     if (avctx->rc_eq) {
1008         av_freep(&s->rc_eq);
1009         s->rc_eq = av_strdup(avctx->rc_eq);
1010         if (!s->rc_eq)
1011             return AVERROR(ENOMEM);
1012     }
1013     FF_ENABLE_DEPRECATION_WARNINGS
1014 #endif
1015
1016     if (avctx->b_frame_strategy == 2) {
1017         for (i = 0; i < s->max_b_frames + 2; i++) {
1018             s->tmp_frames[i] = av_frame_alloc();
1019             if (!s->tmp_frames[i])
1020                 return AVERROR(ENOMEM);
1021
1022             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
1023             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
1024             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
1025
1026             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
1027             if (ret < 0)
1028                 return ret;
1029         }
1030     }
1031
1032     return 0;
1033 fail:
1034     ff_mpv_encode_end(avctx);
1035     return AVERROR_UNKNOWN;
1036 }
1037
1038 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1039 {
1040     MpegEncContext *s = avctx->priv_data;
1041     int i;
1042
1043     ff_rate_control_uninit(s);
1044
1045     ff_mpv_common_end(s);
1046     if (CONFIG_MJPEG_ENCODER &&
1047         s->out_format == FMT_MJPEG)
1048         ff_mjpeg_encode_close(s);
1049
1050     av_freep(&avctx->extradata);
1051
1052     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1053         av_frame_free(&s->tmp_frames[i]);
1054
1055     ff_free_picture_tables(&s->new_picture);
1056     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1057
1058     av_freep(&s->avctx->stats_out);
1059     av_freep(&s->ac_stats);
1060
1061     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1062     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1063     s->q_chroma_intra_matrix=   NULL;
1064     s->q_chroma_intra_matrix16= NULL;
1065     av_freep(&s->q_intra_matrix);
1066     av_freep(&s->q_inter_matrix);
1067     av_freep(&s->q_intra_matrix16);
1068     av_freep(&s->q_inter_matrix16);
1069     av_freep(&s->input_picture);
1070     av_freep(&s->reordered_input_picture);
1071     av_freep(&s->dct_offset);
1072
1073     return 0;
1074 }
1075
1076 static int get_sae(uint8_t *src, int ref, int stride)
1077 {
1078     int x,y;
1079     int acc = 0;
1080
1081     for (y = 0; y < 16; y++) {
1082         for (x = 0; x < 16; x++) {
1083             acc += FFABS(src[x + y * stride] - ref);
1084         }
1085     }
1086
1087     return acc;
1088 }
1089
1090 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1091                            uint8_t *ref, int stride)
1092 {
1093     int x, y, w, h;
1094     int acc = 0;
1095
1096     w = s->width  & ~15;
1097     h = s->height & ~15;
1098
1099     for (y = 0; y < h; y += 16) {
1100         for (x = 0; x < w; x += 16) {
1101             int offset = x + y * stride;
1102             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1103                                       stride, 16);
1104             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1105             int sae  = get_sae(src + offset, mean, stride);
1106
1107             acc += sae + 500 < sad;
1108         }
1109     }
1110     return acc;
1111 }
1112
1113 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1114 {
1115     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1116                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1117                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1118                             &s->linesize, &s->uvlinesize);
1119 }
1120
1121 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1122 {
1123     Picture *pic = NULL;
1124     int64_t pts;
1125     int i, display_picture_number = 0, ret;
1126     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1127                                                  (s->low_delay ? 0 : 1);
1128     int direct = 1;
1129
1130     if (pic_arg) {
1131         pts = pic_arg->pts;
1132         display_picture_number = s->input_picture_number++;
1133
1134         if (pts != AV_NOPTS_VALUE) {
1135             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1136                 int64_t last = s->user_specified_pts;
1137
1138                 if (pts <= last) {
1139                     av_log(s->avctx, AV_LOG_ERROR,
1140                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1141                            pts, last);
1142                     return AVERROR(EINVAL);
1143                 }
1144
1145                 if (!s->low_delay && display_picture_number == 1)
1146                     s->dts_delta = pts - last;
1147             }
1148             s->user_specified_pts = pts;
1149         } else {
1150             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1151                 s->user_specified_pts =
1152                 pts = s->user_specified_pts + 1;
1153                 av_log(s->avctx, AV_LOG_INFO,
1154                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1155                        pts);
1156             } else {
1157                 pts = display_picture_number;
1158             }
1159         }
1160     }
1161
1162     if (pic_arg) {
1163         if (!pic_arg->buf[0] ||
1164             pic_arg->linesize[0] != s->linesize ||
1165             pic_arg->linesize[1] != s->uvlinesize ||
1166             pic_arg->linesize[2] != s->uvlinesize)
1167             direct = 0;
1168         if ((s->width & 15) || (s->height & 15))
1169             direct = 0;
1170         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1171             direct = 0;
1172         if (s->linesize & (STRIDE_ALIGN-1))
1173             direct = 0;
1174
1175         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1176                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1177
1178         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1179         if (i < 0)
1180             return i;
1181
1182         pic = &s->picture[i];
1183         pic->reference = 3;
1184
1185         if (direct) {
1186             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1187                 return ret;
1188         }
1189         ret = alloc_picture(s, pic, direct);
1190         if (ret < 0)
1191             return ret;
1192
1193         if (!direct) {
1194             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1195                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1196                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1197                 // empty
1198             } else {
1199                 int h_chroma_shift, v_chroma_shift;
1200                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1201                                                  &h_chroma_shift,
1202                                                  &v_chroma_shift);
1203
1204                 for (i = 0; i < 3; i++) {
1205                     int src_stride = pic_arg->linesize[i];
1206                     int dst_stride = i ? s->uvlinesize : s->linesize;
1207                     int h_shift = i ? h_chroma_shift : 0;
1208                     int v_shift = i ? v_chroma_shift : 0;
1209                     int w = s->width  >> h_shift;
1210                     int h = s->height >> v_shift;
1211                     uint8_t *src = pic_arg->data[i];
1212                     uint8_t *dst = pic->f->data[i];
1213                     int vpad = 16;
1214
1215                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1216                         && !s->progressive_sequence
1217                         && FFALIGN(s->height, 32) - s->height > 16)
1218                         vpad = 32;
1219
1220                     if (!s->avctx->rc_buffer_size)
1221                         dst += INPLACE_OFFSET;
1222
1223                     if (src_stride == dst_stride)
1224                         memcpy(dst, src, src_stride * h);
1225                     else {
1226                         int h2 = h;
1227                         uint8_t *dst2 = dst;
1228                         while (h2--) {
1229                             memcpy(dst2, src, w);
1230                             dst2 += dst_stride;
1231                             src += src_stride;
1232                         }
1233                     }
1234                     if ((s->width & 15) || (s->height & (vpad-1))) {
1235                         s->mpvencdsp.draw_edges(dst, dst_stride,
1236                                                 w, h,
1237                                                 16 >> h_shift,
1238                                                 vpad >> v_shift,
1239                                                 EDGE_BOTTOM);
1240                     }
1241                 }
1242             }
1243         }
1244         ret = av_frame_copy_props(pic->f, pic_arg);
1245         if (ret < 0)
1246             return ret;
1247
1248         pic->f->display_picture_number = display_picture_number;
1249         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1250     }
1251
1252     /* shift buffer entries */
1253     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1254         s->input_picture[i - 1] = s->input_picture[i];
1255
1256     s->input_picture[encoding_delay] = (Picture*) pic;
1257
1258     return 0;
1259 }
1260
1261 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1262 {
1263     int x, y, plane;
1264     int score = 0;
1265     int64_t score64 = 0;
1266
1267     for (plane = 0; plane < 3; plane++) {
1268         const int stride = p->f->linesize[plane];
1269         const int bw = plane ? 1 : 2;
1270         for (y = 0; y < s->mb_height * bw; y++) {
1271             for (x = 0; x < s->mb_width * bw; x++) {
1272                 int off = p->shared ? 0 : 16;
1273                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1274                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1275                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1276
1277                 switch (FFABS(s->avctx->frame_skip_exp)) {
1278                 case 0: score    =  FFMAX(score, v);          break;
1279                 case 1: score   += FFABS(v);                  break;
1280                 case 2: score64 += v * (int64_t)v;                       break;
1281                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1282                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1283                 }
1284             }
1285         }
1286     }
1287     emms_c();
1288
1289     if (score)
1290         score64 = score;
1291     if (s->avctx->frame_skip_exp < 0)
1292         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1293                       -1.0/s->avctx->frame_skip_exp);
1294
1295     if (score64 < s->avctx->frame_skip_threshold)
1296         return 1;
1297     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1298         return 1;
1299     return 0;
1300 }
1301
1302 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1303 {
1304     AVPacket pkt = { 0 };
1305     int ret, got_output;
1306
1307     av_init_packet(&pkt);
1308     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1309     if (ret < 0)
1310         return ret;
1311
1312     ret = pkt.size;
1313     av_free_packet(&pkt);
1314     return ret;
1315 }
1316
1317 static int estimate_best_b_count(MpegEncContext *s)
1318 {
1319     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1320     AVCodecContext *c = avcodec_alloc_context3(NULL);
1321     const int scale = s->avctx->brd_scale;
1322     int i, j, out_size, p_lambda, b_lambda, lambda2;
1323     int64_t best_rd  = INT64_MAX;
1324     int best_b_count = -1;
1325
1326     if (!c)
1327         return AVERROR(ENOMEM);
1328     av_assert0(scale >= 0 && scale <= 3);
1329
1330     //emms_c();
1331     //s->next_picture_ptr->quality;
1332     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1333     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1334     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1335     if (!b_lambda) // FIXME we should do this somewhere else
1336         b_lambda = p_lambda;
1337     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1338                FF_LAMBDA_SHIFT;
1339
1340     c->width        = s->width  >> scale;
1341     c->height       = s->height >> scale;
1342     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1343     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1344     c->mb_decision  = s->avctx->mb_decision;
1345     c->me_cmp       = s->avctx->me_cmp;
1346     c->mb_cmp       = s->avctx->mb_cmp;
1347     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1348     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1349     c->time_base    = s->avctx->time_base;
1350     c->max_b_frames = s->max_b_frames;
1351
1352     if (avcodec_open2(c, codec, NULL) < 0)
1353         return -1;
1354
1355     for (i = 0; i < s->max_b_frames + 2; i++) {
1356         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1357                                                 s->next_picture_ptr;
1358         uint8_t *data[4];
1359
1360         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1361             pre_input = *pre_input_ptr;
1362             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1363
1364             if (!pre_input.shared && i) {
1365                 data[0] += INPLACE_OFFSET;
1366                 data[1] += INPLACE_OFFSET;
1367                 data[2] += INPLACE_OFFSET;
1368             }
1369
1370             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1371                                        s->tmp_frames[i]->linesize[0],
1372                                        data[0],
1373                                        pre_input.f->linesize[0],
1374                                        c->width, c->height);
1375             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1376                                        s->tmp_frames[i]->linesize[1],
1377                                        data[1],
1378                                        pre_input.f->linesize[1],
1379                                        c->width >> 1, c->height >> 1);
1380             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1381                                        s->tmp_frames[i]->linesize[2],
1382                                        data[2],
1383                                        pre_input.f->linesize[2],
1384                                        c->width >> 1, c->height >> 1);
1385         }
1386     }
1387
1388     for (j = 0; j < s->max_b_frames + 1; j++) {
1389         int64_t rd = 0;
1390
1391         if (!s->input_picture[j])
1392             break;
1393
1394         c->error[0] = c->error[1] = c->error[2] = 0;
1395
1396         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1397         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1398
1399         out_size = encode_frame(c, s->tmp_frames[0]);
1400
1401         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1402
1403         for (i = 0; i < s->max_b_frames + 1; i++) {
1404             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1405
1406             s->tmp_frames[i + 1]->pict_type = is_p ?
1407                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1408             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1409
1410             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1411
1412             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1413         }
1414
1415         /* get the delayed frames */
1416         while (out_size) {
1417             out_size = encode_frame(c, NULL);
1418             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1419         }
1420
1421         rd += c->error[0] + c->error[1] + c->error[2];
1422
1423         if (rd < best_rd) {
1424             best_rd = rd;
1425             best_b_count = j;
1426         }
1427     }
1428
1429     avcodec_close(c);
1430     av_freep(&c);
1431
1432     return best_b_count;
1433 }
1434
1435 static int select_input_picture(MpegEncContext *s)
1436 {
1437     int i, ret;
1438
1439     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1440         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1441     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1442
1443     /* set next picture type & ordering */
1444     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1445         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1446             if (s->picture_in_gop_number < s->gop_size &&
1447                 s->next_picture_ptr &&
1448                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1449                 // FIXME check that te gop check above is +-1 correct
1450                 av_frame_unref(s->input_picture[0]->f);
1451
1452                 ff_vbv_update(s, 0);
1453
1454                 goto no_output_pic;
1455             }
1456         }
1457
1458         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1459             !s->next_picture_ptr || s->intra_only) {
1460             s->reordered_input_picture[0] = s->input_picture[0];
1461             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1462             s->reordered_input_picture[0]->f->coded_picture_number =
1463                 s->coded_picture_number++;
1464         } else {
1465             int b_frames;
1466
1467             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1468                 for (i = 0; i < s->max_b_frames + 1; i++) {
1469                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1470
1471                     if (pict_num >= s->rc_context.num_entries)
1472                         break;
1473                     if (!s->input_picture[i]) {
1474                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1475                         break;
1476                     }
1477
1478                     s->input_picture[i]->f->pict_type =
1479                         s->rc_context.entry[pict_num].new_pict_type;
1480                 }
1481             }
1482
1483             if (s->avctx->b_frame_strategy == 0) {
1484                 b_frames = s->max_b_frames;
1485                 while (b_frames && !s->input_picture[b_frames])
1486                     b_frames--;
1487             } else if (s->avctx->b_frame_strategy == 1) {
1488                 for (i = 1; i < s->max_b_frames + 1; i++) {
1489                     if (s->input_picture[i] &&
1490                         s->input_picture[i]->b_frame_score == 0) {
1491                         s->input_picture[i]->b_frame_score =
1492                             get_intra_count(s,
1493                                             s->input_picture[i    ]->f->data[0],
1494                                             s->input_picture[i - 1]->f->data[0],
1495                                             s->linesize) + 1;
1496                     }
1497                 }
1498                 for (i = 0; i < s->max_b_frames + 1; i++) {
1499                     if (!s->input_picture[i] ||
1500                         s->input_picture[i]->b_frame_score - 1 >
1501                             s->mb_num / s->avctx->b_sensitivity)
1502                         break;
1503                 }
1504
1505                 b_frames = FFMAX(0, i - 1);
1506
1507                 /* reset scores */
1508                 for (i = 0; i < b_frames + 1; i++) {
1509                     s->input_picture[i]->b_frame_score = 0;
1510                 }
1511             } else if (s->avctx->b_frame_strategy == 2) {
1512                 b_frames = estimate_best_b_count(s);
1513             } else {
1514                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1515                 b_frames = 0;
1516             }
1517
1518             emms_c();
1519
1520             for (i = b_frames - 1; i >= 0; i--) {
1521                 int type = s->input_picture[i]->f->pict_type;
1522                 if (type && type != AV_PICTURE_TYPE_B)
1523                     b_frames = i;
1524             }
1525             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1526                 b_frames == s->max_b_frames) {
1527                 av_log(s->avctx, AV_LOG_ERROR,
1528                        "warning, too many b frames in a row\n");
1529             }
1530
1531             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1532                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1533                     s->gop_size > s->picture_in_gop_number) {
1534                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1535                 } else {
1536                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1537                         b_frames = 0;
1538                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1539                 }
1540             }
1541
1542             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1543                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1544                 b_frames--;
1545
1546             s->reordered_input_picture[0] = s->input_picture[b_frames];
1547             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1548                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1549             s->reordered_input_picture[0]->f->coded_picture_number =
1550                 s->coded_picture_number++;
1551             for (i = 0; i < b_frames; i++) {
1552                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1553                 s->reordered_input_picture[i + 1]->f->pict_type =
1554                     AV_PICTURE_TYPE_B;
1555                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1556                     s->coded_picture_number++;
1557             }
1558         }
1559     }
1560 no_output_pic:
1561     if (s->reordered_input_picture[0]) {
1562         s->reordered_input_picture[0]->reference =
1563            s->reordered_input_picture[0]->f->pict_type !=
1564                AV_PICTURE_TYPE_B ? 3 : 0;
1565
1566         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1567         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1568             return ret;
1569
1570         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1571             // input is a shared pix, so we can't modifiy it -> alloc a new
1572             // one & ensure that the shared one is reuseable
1573
1574             Picture *pic;
1575             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1576             if (i < 0)
1577                 return i;
1578             pic = &s->picture[i];
1579
1580             pic->reference = s->reordered_input_picture[0]->reference;
1581             if (alloc_picture(s, pic, 0) < 0) {
1582                 return -1;
1583             }
1584
1585             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1586             if (ret < 0)
1587                 return ret;
1588
1589             /* mark us unused / free shared pic */
1590             av_frame_unref(s->reordered_input_picture[0]->f);
1591             s->reordered_input_picture[0]->shared = 0;
1592
1593             s->current_picture_ptr = pic;
1594         } else {
1595             // input is not a shared pix -> reuse buffer for current_pix
1596             s->current_picture_ptr = s->reordered_input_picture[0];
1597             for (i = 0; i < 4; i++) {
1598                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1599             }
1600         }
1601         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1602         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1603                                        s->current_picture_ptr)) < 0)
1604             return ret;
1605
1606         s->picture_number = s->new_picture.f->display_picture_number;
1607     } else {
1608         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1609     }
1610     return 0;
1611 }
1612
1613 static void frame_end(MpegEncContext *s)
1614 {
1615     if (s->unrestricted_mv &&
1616         s->current_picture.reference &&
1617         !s->intra_only) {
1618         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1619         int hshift = desc->log2_chroma_w;
1620         int vshift = desc->log2_chroma_h;
1621         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1622                                 s->current_picture.f->linesize[0],
1623                                 s->h_edge_pos, s->v_edge_pos,
1624                                 EDGE_WIDTH, EDGE_WIDTH,
1625                                 EDGE_TOP | EDGE_BOTTOM);
1626         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1627                                 s->current_picture.f->linesize[1],
1628                                 s->h_edge_pos >> hshift,
1629                                 s->v_edge_pos >> vshift,
1630                                 EDGE_WIDTH >> hshift,
1631                                 EDGE_WIDTH >> vshift,
1632                                 EDGE_TOP | EDGE_BOTTOM);
1633         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1634                                 s->current_picture.f->linesize[2],
1635                                 s->h_edge_pos >> hshift,
1636                                 s->v_edge_pos >> vshift,
1637                                 EDGE_WIDTH >> hshift,
1638                                 EDGE_WIDTH >> vshift,
1639                                 EDGE_TOP | EDGE_BOTTOM);
1640     }
1641
1642     emms_c();
1643
1644     s->last_pict_type                 = s->pict_type;
1645     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1646     if (s->pict_type!= AV_PICTURE_TYPE_B)
1647         s->last_non_b_pict_type = s->pict_type;
1648
1649 #if FF_API_CODED_FRAME
1650 FF_DISABLE_DEPRECATION_WARNINGS
1651     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1652 FF_ENABLE_DEPRECATION_WARNINGS
1653 #endif
1654 #if FF_API_ERROR_FRAME
1655 FF_DISABLE_DEPRECATION_WARNINGS
1656     memcpy(s->current_picture.f->error, s->current_picture.encoding_error,
1657            sizeof(s->current_picture.encoding_error));
1658 FF_ENABLE_DEPRECATION_WARNINGS
1659 #endif
1660 }
1661
1662 static void update_noise_reduction(MpegEncContext *s)
1663 {
1664     int intra, i;
1665
1666     for (intra = 0; intra < 2; intra++) {
1667         if (s->dct_count[intra] > (1 << 16)) {
1668             for (i = 0; i < 64; i++) {
1669                 s->dct_error_sum[intra][i] >>= 1;
1670             }
1671             s->dct_count[intra] >>= 1;
1672         }
1673
1674         for (i = 0; i < 64; i++) {
1675             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1676                                        s->dct_count[intra] +
1677                                        s->dct_error_sum[intra][i] / 2) /
1678                                       (s->dct_error_sum[intra][i] + 1);
1679         }
1680     }
1681 }
1682
1683 static int frame_start(MpegEncContext *s)
1684 {
1685     int ret;
1686
1687     /* mark & release old frames */
1688     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1689         s->last_picture_ptr != s->next_picture_ptr &&
1690         s->last_picture_ptr->f->buf[0]) {
1691         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1692     }
1693
1694     s->current_picture_ptr->f->pict_type = s->pict_type;
1695     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1696
1697     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1698     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1699                                    s->current_picture_ptr)) < 0)
1700         return ret;
1701
1702     if (s->pict_type != AV_PICTURE_TYPE_B) {
1703         s->last_picture_ptr = s->next_picture_ptr;
1704         if (!s->droppable)
1705             s->next_picture_ptr = s->current_picture_ptr;
1706     }
1707
1708     if (s->last_picture_ptr) {
1709         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1710         if (s->last_picture_ptr->f->buf[0] &&
1711             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1712                                        s->last_picture_ptr)) < 0)
1713             return ret;
1714     }
1715     if (s->next_picture_ptr) {
1716         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1717         if (s->next_picture_ptr->f->buf[0] &&
1718             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1719                                        s->next_picture_ptr)) < 0)
1720             return ret;
1721     }
1722
1723     if (s->picture_structure!= PICT_FRAME) {
1724         int i;
1725         for (i = 0; i < 4; i++) {
1726             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1727                 s->current_picture.f->data[i] +=
1728                     s->current_picture.f->linesize[i];
1729             }
1730             s->current_picture.f->linesize[i] *= 2;
1731             s->last_picture.f->linesize[i]    *= 2;
1732             s->next_picture.f->linesize[i]    *= 2;
1733         }
1734     }
1735
1736     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1737         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1738         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1739     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1740         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1741         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1742     } else {
1743         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1744         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1745     }
1746
1747     if (s->dct_error_sum) {
1748         av_assert2(s->avctx->noise_reduction && s->encoding);
1749         update_noise_reduction(s);
1750     }
1751
1752     return 0;
1753 }
1754
1755 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1756                           const AVFrame *pic_arg, int *got_packet)
1757 {
1758     MpegEncContext *s = avctx->priv_data;
1759     int i, stuffing_count, ret;
1760     int context_count = s->slice_context_count;
1761
1762     s->vbv_ignore_qmax = 0;
1763
1764     s->picture_in_gop_number++;
1765
1766     if (load_input_picture(s, pic_arg) < 0)
1767         return -1;
1768
1769     if (select_input_picture(s) < 0) {
1770         return -1;
1771     }
1772
1773     /* output? */
1774     if (s->new_picture.f->data[0]) {
1775         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1776         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1777                                               :
1778                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1779         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1780             return ret;
1781         if (s->mb_info) {
1782             s->mb_info_ptr = av_packet_new_side_data(pkt,
1783                                  AV_PKT_DATA_H263_MB_INFO,
1784                                  s->mb_width*s->mb_height*12);
1785             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1786         }
1787
1788         for (i = 0; i < context_count; i++) {
1789             int start_y = s->thread_context[i]->start_mb_y;
1790             int   end_y = s->thread_context[i]->  end_mb_y;
1791             int h       = s->mb_height;
1792             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1793             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1794
1795             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1796         }
1797
1798         s->pict_type = s->new_picture.f->pict_type;
1799         //emms_c();
1800         ret = frame_start(s);
1801         if (ret < 0)
1802             return ret;
1803 vbv_retry:
1804         ret = encode_picture(s, s->picture_number);
1805         if (growing_buffer) {
1806             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1807             pkt->data = s->pb.buf;
1808             pkt->size = avctx->internal->byte_buffer_size;
1809         }
1810         if (ret < 0)
1811             return -1;
1812
1813         avctx->header_bits = s->header_bits;
1814         avctx->mv_bits     = s->mv_bits;
1815         avctx->misc_bits   = s->misc_bits;
1816         avctx->i_tex_bits  = s->i_tex_bits;
1817         avctx->p_tex_bits  = s->p_tex_bits;
1818         avctx->i_count     = s->i_count;
1819         // FIXME f/b_count in avctx
1820         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1821         avctx->skip_count  = s->skip_count;
1822
1823         frame_end(s);
1824
1825         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1826             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1827
1828         if (avctx->rc_buffer_size) {
1829             RateControlContext *rcc = &s->rc_context;
1830             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1831             int hq = (s->avctx->mb_decision == FF_MB_DECISION_RD || s->avctx->trellis);
1832             int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
1833
1834             if (put_bits_count(&s->pb) > max_size &&
1835                 s->lambda < s->lmax) {
1836                 s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
1837                                        (s->qscale + 1) / s->qscale);
1838                 if (s->adaptive_quant) {
1839                     int i;
1840                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1841                         s->lambda_table[i] =
1842                             FFMAX(s->lambda_table[i] + min_step,
1843                                   s->lambda_table[i] * (s->qscale + 1) /
1844                                   s->qscale);
1845                 }
1846                 s->mb_skipped = 0;        // done in frame_start()
1847                 // done in encode_picture() so we must undo it
1848                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1849                     if (s->flipflop_rounding          ||
1850                         s->codec_id == AV_CODEC_ID_H263P ||
1851                         s->codec_id == AV_CODEC_ID_MPEG4)
1852                         s->no_rounding ^= 1;
1853                 }
1854                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1855                     s->time_base       = s->last_time_base;
1856                     s->last_non_b_time = s->time - s->pp_time;
1857                 }
1858                 for (i = 0; i < context_count; i++) {
1859                     PutBitContext *pb = &s->thread_context[i]->pb;
1860                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1861                 }
1862                 s->vbv_ignore_qmax = 1;
1863                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1864                 goto vbv_retry;
1865             }
1866
1867             av_assert0(s->avctx->rc_max_rate);
1868         }
1869
1870         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1871             ff_write_pass1_stats(s);
1872
1873         for (i = 0; i < 4; i++) {
1874             s->current_picture_ptr->encoding_error[i] = s->current_picture.encoding_error[i];
1875             avctx->error[i] += s->current_picture_ptr->encoding_error[i];
1876         }
1877         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1878                                        s->current_picture_ptr->encoding_error,
1879                                        (s->avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1880                                        s->pict_type);
1881
1882         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1883             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1884                    avctx->i_tex_bits + avctx->p_tex_bits ==
1885                        put_bits_count(&s->pb));
1886         flush_put_bits(&s->pb);
1887         s->frame_bits  = put_bits_count(&s->pb);
1888
1889         stuffing_count = ff_vbv_update(s, s->frame_bits);
1890         s->stuffing_bits = 8*stuffing_count;
1891         if (stuffing_count) {
1892             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1893                     stuffing_count + 50) {
1894                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1895                 return -1;
1896             }
1897
1898             switch (s->codec_id) {
1899             case AV_CODEC_ID_MPEG1VIDEO:
1900             case AV_CODEC_ID_MPEG2VIDEO:
1901                 while (stuffing_count--) {
1902                     put_bits(&s->pb, 8, 0);
1903                 }
1904             break;
1905             case AV_CODEC_ID_MPEG4:
1906                 put_bits(&s->pb, 16, 0);
1907                 put_bits(&s->pb, 16, 0x1C3);
1908                 stuffing_count -= 4;
1909                 while (stuffing_count--) {
1910                     put_bits(&s->pb, 8, 0xFF);
1911                 }
1912             break;
1913             default:
1914                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1915             }
1916             flush_put_bits(&s->pb);
1917             s->frame_bits  = put_bits_count(&s->pb);
1918         }
1919
1920         /* update mpeg1/2 vbv_delay for CBR */
1921         if (s->avctx->rc_max_rate                          &&
1922             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1923             s->out_format == FMT_MPEG1                     &&
1924             90000LL * (avctx->rc_buffer_size - 1) <=
1925                 s->avctx->rc_max_rate * 0xFFFFLL) {
1926             int vbv_delay, min_delay;
1927             double inbits  = s->avctx->rc_max_rate *
1928                              av_q2d(s->avctx->time_base);
1929             int    minbits = s->frame_bits - 8 *
1930                              (s->vbv_delay_ptr - s->pb.buf - 1);
1931             double bits    = s->rc_context.buffer_index + minbits - inbits;
1932
1933             if (bits < 0)
1934                 av_log(s->avctx, AV_LOG_ERROR,
1935                        "Internal error, negative bits\n");
1936
1937             assert(s->repeat_first_field == 0);
1938
1939             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1940             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1941                         s->avctx->rc_max_rate;
1942
1943             vbv_delay = FFMAX(vbv_delay, min_delay);
1944
1945             av_assert0(vbv_delay < 0xFFFF);
1946
1947             s->vbv_delay_ptr[0] &= 0xF8;
1948             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1949             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1950             s->vbv_delay_ptr[2] &= 0x07;
1951             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1952             avctx->vbv_delay     = vbv_delay * 300;
1953         }
1954         s->total_bits     += s->frame_bits;
1955         avctx->frame_bits  = s->frame_bits;
1956
1957         pkt->pts = s->current_picture.f->pts;
1958         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1959             if (!s->current_picture.f->coded_picture_number)
1960                 pkt->dts = pkt->pts - s->dts_delta;
1961             else
1962                 pkt->dts = s->reordered_pts;
1963             s->reordered_pts = pkt->pts;
1964         } else
1965             pkt->dts = pkt->pts;
1966         if (s->current_picture.f->key_frame)
1967             pkt->flags |= AV_PKT_FLAG_KEY;
1968         if (s->mb_info)
1969             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1970     } else {
1971         s->frame_bits = 0;
1972     }
1973
1974     /* release non-reference frames */
1975     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1976         if (!s->picture[i].reference)
1977             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1978     }
1979
1980     av_assert1((s->frame_bits & 7) == 0);
1981
1982     pkt->size = s->frame_bits / 8;
1983     *got_packet = !!pkt->size;
1984     return 0;
1985 }
1986
1987 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1988                                                 int n, int threshold)
1989 {
1990     static const char tab[64] = {
1991         3, 2, 2, 1, 1, 1, 1, 1,
1992         1, 1, 1, 1, 1, 1, 1, 1,
1993         1, 1, 1, 1, 1, 1, 1, 1,
1994         0, 0, 0, 0, 0, 0, 0, 0,
1995         0, 0, 0, 0, 0, 0, 0, 0,
1996         0, 0, 0, 0, 0, 0, 0, 0,
1997         0, 0, 0, 0, 0, 0, 0, 0,
1998         0, 0, 0, 0, 0, 0, 0, 0
1999     };
2000     int score = 0;
2001     int run = 0;
2002     int i;
2003     int16_t *block = s->block[n];
2004     const int last_index = s->block_last_index[n];
2005     int skip_dc;
2006
2007     if (threshold < 0) {
2008         skip_dc = 0;
2009         threshold = -threshold;
2010     } else
2011         skip_dc = 1;
2012
2013     /* Are all we could set to zero already zero? */
2014     if (last_index <= skip_dc - 1)
2015         return;
2016
2017     for (i = 0; i <= last_index; i++) {
2018         const int j = s->intra_scantable.permutated[i];
2019         const int level = FFABS(block[j]);
2020         if (level == 1) {
2021             if (skip_dc && i == 0)
2022                 continue;
2023             score += tab[run];
2024             run = 0;
2025         } else if (level > 1) {
2026             return;
2027         } else {
2028             run++;
2029         }
2030     }
2031     if (score >= threshold)
2032         return;
2033     for (i = skip_dc; i <= last_index; i++) {
2034         const int j = s->intra_scantable.permutated[i];
2035         block[j] = 0;
2036     }
2037     if (block[0])
2038         s->block_last_index[n] = 0;
2039     else
2040         s->block_last_index[n] = -1;
2041 }
2042
2043 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2044                                int last_index)
2045 {
2046     int i;
2047     const int maxlevel = s->max_qcoeff;
2048     const int minlevel = s->min_qcoeff;
2049     int overflow = 0;
2050
2051     if (s->mb_intra) {
2052         i = 1; // skip clipping of intra dc
2053     } else
2054         i = 0;
2055
2056     for (; i <= last_index; i++) {
2057         const int j = s->intra_scantable.permutated[i];
2058         int level = block[j];
2059
2060         if (level > maxlevel) {
2061             level = maxlevel;
2062             overflow++;
2063         } else if (level < minlevel) {
2064             level = minlevel;
2065             overflow++;
2066         }
2067
2068         block[j] = level;
2069     }
2070
2071     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2072         av_log(s->avctx, AV_LOG_INFO,
2073                "warning, clipping %d dct coefficients to %d..%d\n",
2074                overflow, minlevel, maxlevel);
2075 }
2076
2077 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2078 {
2079     int x, y;
2080     // FIXME optimize
2081     for (y = 0; y < 8; y++) {
2082         for (x = 0; x < 8; x++) {
2083             int x2, y2;
2084             int sum = 0;
2085             int sqr = 0;
2086             int count = 0;
2087
2088             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2089                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2090                     int v = ptr[x2 + y2 * stride];
2091                     sum += v;
2092                     sqr += v * v;
2093                     count++;
2094                 }
2095             }
2096             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2097         }
2098     }
2099 }
2100
2101 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2102                                                 int motion_x, int motion_y,
2103                                                 int mb_block_height,
2104                                                 int mb_block_width,
2105                                                 int mb_block_count)
2106 {
2107     int16_t weight[12][64];
2108     int16_t orig[12][64];
2109     const int mb_x = s->mb_x;
2110     const int mb_y = s->mb_y;
2111     int i;
2112     int skip_dct[12];
2113     int dct_offset = s->linesize * 8; // default for progressive frames
2114     int uv_dct_offset = s->uvlinesize * 8;
2115     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2116     ptrdiff_t wrap_y, wrap_c;
2117
2118     for (i = 0; i < mb_block_count; i++)
2119         skip_dct[i] = s->skipdct;
2120
2121     if (s->adaptive_quant) {
2122         const int last_qp = s->qscale;
2123         const int mb_xy = mb_x + mb_y * s->mb_stride;
2124
2125         s->lambda = s->lambda_table[mb_xy];
2126         update_qscale(s);
2127
2128         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2129             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2130             s->dquant = s->qscale - last_qp;
2131
2132             if (s->out_format == FMT_H263) {
2133                 s->dquant = av_clip(s->dquant, -2, 2);
2134
2135                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2136                     if (!s->mb_intra) {
2137                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2138                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2139                                 s->dquant = 0;
2140                         }
2141                         if (s->mv_type == MV_TYPE_8X8)
2142                             s->dquant = 0;
2143                     }
2144                 }
2145             }
2146         }
2147         ff_set_qscale(s, last_qp + s->dquant);
2148     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2149         ff_set_qscale(s, s->qscale + s->dquant);
2150
2151     wrap_y = s->linesize;
2152     wrap_c = s->uvlinesize;
2153     ptr_y  = s->new_picture.f->data[0] +
2154              (mb_y * 16 * wrap_y)              + mb_x * 16;
2155     ptr_cb = s->new_picture.f->data[1] +
2156              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2157     ptr_cr = s->new_picture.f->data[2] +
2158              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2159
2160     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2161         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2162         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2163         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2164         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2165                                  wrap_y, wrap_y,
2166                                  16, 16, mb_x * 16, mb_y * 16,
2167                                  s->width, s->height);
2168         ptr_y = ebuf;
2169         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2170                                  wrap_c, wrap_c,
2171                                  mb_block_width, mb_block_height,
2172                                  mb_x * mb_block_width, mb_y * mb_block_height,
2173                                  cw, ch);
2174         ptr_cb = ebuf + 16 * wrap_y;
2175         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2176                                  wrap_c, wrap_c,
2177                                  mb_block_width, mb_block_height,
2178                                  mb_x * mb_block_width, mb_y * mb_block_height,
2179                                  cw, ch);
2180         ptr_cr = ebuf + 16 * wrap_y + 16;
2181     }
2182
2183     if (s->mb_intra) {
2184         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2185             int progressive_score, interlaced_score;
2186
2187             s->interlaced_dct = 0;
2188             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2189                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2190                                                      NULL, wrap_y, 8) - 400;
2191
2192             if (progressive_score > 0) {
2193                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2194                                                         NULL, wrap_y * 2, 8) +
2195                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2196                                                         NULL, wrap_y * 2, 8);
2197                 if (progressive_score > interlaced_score) {
2198                     s->interlaced_dct = 1;
2199
2200                     dct_offset = wrap_y;
2201                     uv_dct_offset = wrap_c;
2202                     wrap_y <<= 1;
2203                     if (s->chroma_format == CHROMA_422 ||
2204                         s->chroma_format == CHROMA_444)
2205                         wrap_c <<= 1;
2206                 }
2207             }
2208         }
2209
2210         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2211         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2212         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2213         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2214
2215         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2216             skip_dct[4] = 1;
2217             skip_dct[5] = 1;
2218         } else {
2219             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2220             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2221             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2222                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2223                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2224             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2225                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2226                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2227                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2228                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2229                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2230                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2231             }
2232         }
2233     } else {
2234         op_pixels_func (*op_pix)[4];
2235         qpel_mc_func (*op_qpix)[16];
2236         uint8_t *dest_y, *dest_cb, *dest_cr;
2237
2238         dest_y  = s->dest[0];
2239         dest_cb = s->dest[1];
2240         dest_cr = s->dest[2];
2241
2242         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2243             op_pix  = s->hdsp.put_pixels_tab;
2244             op_qpix = s->qdsp.put_qpel_pixels_tab;
2245         } else {
2246             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2247             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2248         }
2249
2250         if (s->mv_dir & MV_DIR_FORWARD) {
2251             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2252                           s->last_picture.f->data,
2253                           op_pix, op_qpix);
2254             op_pix  = s->hdsp.avg_pixels_tab;
2255             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2256         }
2257         if (s->mv_dir & MV_DIR_BACKWARD) {
2258             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2259                           s->next_picture.f->data,
2260                           op_pix, op_qpix);
2261         }
2262
2263         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2264             int progressive_score, interlaced_score;
2265
2266             s->interlaced_dct = 0;
2267             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2268                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2269                                                      ptr_y + wrap_y * 8,
2270                                                      wrap_y, 8) - 400;
2271
2272             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2273                 progressive_score -= 400;
2274
2275             if (progressive_score > 0) {
2276                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2277                                                         wrap_y * 2, 8) +
2278                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2279                                                         ptr_y + wrap_y,
2280                                                         wrap_y * 2, 8);
2281
2282                 if (progressive_score > interlaced_score) {
2283                     s->interlaced_dct = 1;
2284
2285                     dct_offset = wrap_y;
2286                     uv_dct_offset = wrap_c;
2287                     wrap_y <<= 1;
2288                     if (s->chroma_format == CHROMA_422)
2289                         wrap_c <<= 1;
2290                 }
2291             }
2292         }
2293
2294         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2295         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2296         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2297                             dest_y + dct_offset, wrap_y);
2298         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2299                             dest_y + dct_offset + 8, wrap_y);
2300
2301         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2302             skip_dct[4] = 1;
2303             skip_dct[5] = 1;
2304         } else {
2305             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2306             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2307             if (!s->chroma_y_shift) { /* 422 */
2308                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2309                                     dest_cb + uv_dct_offset, wrap_c);
2310                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2311                                     dest_cr + uv_dct_offset, wrap_c);
2312             }
2313         }
2314         /* pre quantization */
2315         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2316                 2 * s->qscale * s->qscale) {
2317             // FIXME optimize
2318             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2319                 skip_dct[0] = 1;
2320             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2321                 skip_dct[1] = 1;
2322             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2323                                wrap_y, 8) < 20 * s->qscale)
2324                 skip_dct[2] = 1;
2325             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2326                                wrap_y, 8) < 20 * s->qscale)
2327                 skip_dct[3] = 1;
2328             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2329                 skip_dct[4] = 1;
2330             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2331                 skip_dct[5] = 1;
2332             if (!s->chroma_y_shift) { /* 422 */
2333                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2334                                    dest_cb + uv_dct_offset,
2335                                    wrap_c, 8) < 20 * s->qscale)
2336                     skip_dct[6] = 1;
2337                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2338                                    dest_cr + uv_dct_offset,
2339                                    wrap_c, 8) < 20 * s->qscale)
2340                     skip_dct[7] = 1;
2341             }
2342         }
2343     }
2344
2345     if (s->quantizer_noise_shaping) {
2346         if (!skip_dct[0])
2347             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2348         if (!skip_dct[1])
2349             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2350         if (!skip_dct[2])
2351             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2352         if (!skip_dct[3])
2353             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2354         if (!skip_dct[4])
2355             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2356         if (!skip_dct[5])
2357             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2358         if (!s->chroma_y_shift) { /* 422 */
2359             if (!skip_dct[6])
2360                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2361                                   wrap_c);
2362             if (!skip_dct[7])
2363                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2364                                   wrap_c);
2365         }
2366         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2367     }
2368
2369     /* DCT & quantize */
2370     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2371     {
2372         for (i = 0; i < mb_block_count; i++) {
2373             if (!skip_dct[i]) {
2374                 int overflow;
2375                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2376                 // FIXME we could decide to change to quantizer instead of
2377                 // clipping
2378                 // JS: I don't think that would be a good idea it could lower
2379                 //     quality instead of improve it. Just INTRADC clipping
2380                 //     deserves changes in quantizer
2381                 if (overflow)
2382                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2383             } else
2384                 s->block_last_index[i] = -1;
2385         }
2386         if (s->quantizer_noise_shaping) {
2387             for (i = 0; i < mb_block_count; i++) {
2388                 if (!skip_dct[i]) {
2389                     s->block_last_index[i] =
2390                         dct_quantize_refine(s, s->block[i], weight[i],
2391                                             orig[i], i, s->qscale);
2392                 }
2393             }
2394         }
2395
2396         if (s->luma_elim_threshold && !s->mb_intra)
2397             for (i = 0; i < 4; i++)
2398                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2399         if (s->chroma_elim_threshold && !s->mb_intra)
2400             for (i = 4; i < mb_block_count; i++)
2401                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2402
2403         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2404             for (i = 0; i < mb_block_count; i++) {
2405                 if (s->block_last_index[i] == -1)
2406                     s->coded_score[i] = INT_MAX / 256;
2407             }
2408         }
2409     }
2410
2411     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2412         s->block_last_index[4] =
2413         s->block_last_index[5] = 0;
2414         s->block[4][0] =
2415         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2416         if (!s->chroma_y_shift) { /* 422 / 444 */
2417             for (i=6; i<12; i++) {
2418                 s->block_last_index[i] = 0;
2419                 s->block[i][0] = s->block[4][0];
2420             }
2421         }
2422     }
2423
2424     // non c quantize code returns incorrect block_last_index FIXME
2425     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2426         for (i = 0; i < mb_block_count; i++) {
2427             int j;
2428             if (s->block_last_index[i] > 0) {
2429                 for (j = 63; j > 0; j--) {
2430                     if (s->block[i][s->intra_scantable.permutated[j]])
2431                         break;
2432                 }
2433                 s->block_last_index[i] = j;
2434             }
2435         }
2436     }
2437
2438     /* huffman encode */
2439     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2440     case AV_CODEC_ID_MPEG1VIDEO:
2441     case AV_CODEC_ID_MPEG2VIDEO:
2442         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2443             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2444         break;
2445     case AV_CODEC_ID_MPEG4:
2446         if (CONFIG_MPEG4_ENCODER)
2447             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2448         break;
2449     case AV_CODEC_ID_MSMPEG4V2:
2450     case AV_CODEC_ID_MSMPEG4V3:
2451     case AV_CODEC_ID_WMV1:
2452         if (CONFIG_MSMPEG4_ENCODER)
2453             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2454         break;
2455     case AV_CODEC_ID_WMV2:
2456         if (CONFIG_WMV2_ENCODER)
2457             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2458         break;
2459     case AV_CODEC_ID_H261:
2460         if (CONFIG_H261_ENCODER)
2461             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2462         break;
2463     case AV_CODEC_ID_H263:
2464     case AV_CODEC_ID_H263P:
2465     case AV_CODEC_ID_FLV1:
2466     case AV_CODEC_ID_RV10:
2467     case AV_CODEC_ID_RV20:
2468         if (CONFIG_H263_ENCODER)
2469             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2470         break;
2471     case AV_CODEC_ID_MJPEG:
2472     case AV_CODEC_ID_AMV:
2473         if (CONFIG_MJPEG_ENCODER)
2474             ff_mjpeg_encode_mb(s, s->block);
2475         break;
2476     default:
2477         av_assert1(0);
2478     }
2479 }
2480
2481 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2482 {
2483     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2484     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2485     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2486 }
2487
2488 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2489     int i;
2490
2491     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2492
2493     /* mpeg1 */
2494     d->mb_skip_run= s->mb_skip_run;
2495     for(i=0; i<3; i++)
2496         d->last_dc[i] = s->last_dc[i];
2497
2498     /* statistics */
2499     d->mv_bits= s->mv_bits;
2500     d->i_tex_bits= s->i_tex_bits;
2501     d->p_tex_bits= s->p_tex_bits;
2502     d->i_count= s->i_count;
2503     d->f_count= s->f_count;
2504     d->b_count= s->b_count;
2505     d->skip_count= s->skip_count;
2506     d->misc_bits= s->misc_bits;
2507     d->last_bits= 0;
2508
2509     d->mb_skipped= 0;
2510     d->qscale= s->qscale;
2511     d->dquant= s->dquant;
2512
2513     d->esc3_level_length= s->esc3_level_length;
2514 }
2515
2516 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2517     int i;
2518
2519     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2520     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2521
2522     /* mpeg1 */
2523     d->mb_skip_run= s->mb_skip_run;
2524     for(i=0; i<3; i++)
2525         d->last_dc[i] = s->last_dc[i];
2526
2527     /* statistics */
2528     d->mv_bits= s->mv_bits;
2529     d->i_tex_bits= s->i_tex_bits;
2530     d->p_tex_bits= s->p_tex_bits;
2531     d->i_count= s->i_count;
2532     d->f_count= s->f_count;
2533     d->b_count= s->b_count;
2534     d->skip_count= s->skip_count;
2535     d->misc_bits= s->misc_bits;
2536
2537     d->mb_intra= s->mb_intra;
2538     d->mb_skipped= s->mb_skipped;
2539     d->mv_type= s->mv_type;
2540     d->mv_dir= s->mv_dir;
2541     d->pb= s->pb;
2542     if(s->data_partitioning){
2543         d->pb2= s->pb2;
2544         d->tex_pb= s->tex_pb;
2545     }
2546     d->block= s->block;
2547     for(i=0; i<8; i++)
2548         d->block_last_index[i]= s->block_last_index[i];
2549     d->interlaced_dct= s->interlaced_dct;
2550     d->qscale= s->qscale;
2551
2552     d->esc3_level_length= s->esc3_level_length;
2553 }
2554
2555 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2556                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2557                            int *dmin, int *next_block, int motion_x, int motion_y)
2558 {
2559     int score;
2560     uint8_t *dest_backup[3];
2561
2562     copy_context_before_encode(s, backup, type);
2563
2564     s->block= s->blocks[*next_block];
2565     s->pb= pb[*next_block];
2566     if(s->data_partitioning){
2567         s->pb2   = pb2   [*next_block];
2568         s->tex_pb= tex_pb[*next_block];
2569     }
2570
2571     if(*next_block){
2572         memcpy(dest_backup, s->dest, sizeof(s->dest));
2573         s->dest[0] = s->sc.rd_scratchpad;
2574         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2575         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2576         av_assert0(s->linesize >= 32); //FIXME
2577     }
2578
2579     encode_mb(s, motion_x, motion_y);
2580
2581     score= put_bits_count(&s->pb);
2582     if(s->data_partitioning){
2583         score+= put_bits_count(&s->pb2);
2584         score+= put_bits_count(&s->tex_pb);
2585     }
2586
2587     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2588         ff_mpv_decode_mb(s, s->block);
2589
2590         score *= s->lambda2;
2591         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2592     }
2593
2594     if(*next_block){
2595         memcpy(s->dest, dest_backup, sizeof(s->dest));
2596     }
2597
2598     if(score<*dmin){
2599         *dmin= score;
2600         *next_block^=1;
2601
2602         copy_context_after_encode(best, s, type);
2603     }
2604 }
2605
2606 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2607     uint32_t *sq = ff_square_tab + 256;
2608     int acc=0;
2609     int x,y;
2610
2611     if(w==16 && h==16)
2612         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2613     else if(w==8 && h==8)
2614         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2615
2616     for(y=0; y<h; y++){
2617         for(x=0; x<w; x++){
2618             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2619         }
2620     }
2621
2622     av_assert2(acc>=0);
2623
2624     return acc;
2625 }
2626
2627 static int sse_mb(MpegEncContext *s){
2628     int w= 16;
2629     int h= 16;
2630
2631     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2632     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2633
2634     if(w==16 && h==16)
2635       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2636         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2637                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2638                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2639       }else{
2640         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2641                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2642                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2643       }
2644     else
2645         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2646                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2647                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2648 }
2649
2650 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2651     MpegEncContext *s= *(void**)arg;
2652
2653
2654     s->me.pre_pass=1;
2655     s->me.dia_size= s->avctx->pre_dia_size;
2656     s->first_slice_line=1;
2657     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2658         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2659             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2660         }
2661         s->first_slice_line=0;
2662     }
2663
2664     s->me.pre_pass=0;
2665
2666     return 0;
2667 }
2668
2669 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2670     MpegEncContext *s= *(void**)arg;
2671
2672     ff_check_alignment();
2673
2674     s->me.dia_size= s->avctx->dia_size;
2675     s->first_slice_line=1;
2676     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2677         s->mb_x=0; //for block init below
2678         ff_init_block_index(s);
2679         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2680             s->block_index[0]+=2;
2681             s->block_index[1]+=2;
2682             s->block_index[2]+=2;
2683             s->block_index[3]+=2;
2684
2685             /* compute motion vector & mb_type and store in context */
2686             if(s->pict_type==AV_PICTURE_TYPE_B)
2687                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2688             else
2689                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2690         }
2691         s->first_slice_line=0;
2692     }
2693     return 0;
2694 }
2695
2696 static int mb_var_thread(AVCodecContext *c, void *arg){
2697     MpegEncContext *s= *(void**)arg;
2698     int mb_x, mb_y;
2699
2700     ff_check_alignment();
2701
2702     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2703         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2704             int xx = mb_x * 16;
2705             int yy = mb_y * 16;
2706             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2707             int varc;
2708             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2709
2710             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2711                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2712
2713             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2714             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2715             s->me.mb_var_sum_temp    += varc;
2716         }
2717     }
2718     return 0;
2719 }
2720
2721 static void write_slice_end(MpegEncContext *s){
2722     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2723         if(s->partitioned_frame){
2724             ff_mpeg4_merge_partitions(s);
2725         }
2726
2727         ff_mpeg4_stuffing(&s->pb);
2728     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2729         ff_mjpeg_encode_stuffing(s);
2730     }
2731
2732     avpriv_align_put_bits(&s->pb);
2733     flush_put_bits(&s->pb);
2734
2735     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2736         s->misc_bits+= get_bits_diff(s);
2737 }
2738
2739 static void write_mb_info(MpegEncContext *s)
2740 {
2741     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2742     int offset = put_bits_count(&s->pb);
2743     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2744     int gobn = s->mb_y / s->gob_index;
2745     int pred_x, pred_y;
2746     if (CONFIG_H263_ENCODER)
2747         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2748     bytestream_put_le32(&ptr, offset);
2749     bytestream_put_byte(&ptr, s->qscale);
2750     bytestream_put_byte(&ptr, gobn);
2751     bytestream_put_le16(&ptr, mba);
2752     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2753     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2754     /* 4MV not implemented */
2755     bytestream_put_byte(&ptr, 0); /* hmv2 */
2756     bytestream_put_byte(&ptr, 0); /* vmv2 */
2757 }
2758
2759 static void update_mb_info(MpegEncContext *s, int startcode)
2760 {
2761     if (!s->mb_info)
2762         return;
2763     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2764         s->mb_info_size += 12;
2765         s->prev_mb_info = s->last_mb_info;
2766     }
2767     if (startcode) {
2768         s->prev_mb_info = put_bits_count(&s->pb)/8;
2769         /* This might have incremented mb_info_size above, and we return without
2770          * actually writing any info into that slot yet. But in that case,
2771          * this will be called again at the start of the after writing the
2772          * start code, actually writing the mb info. */
2773         return;
2774     }
2775
2776     s->last_mb_info = put_bits_count(&s->pb)/8;
2777     if (!s->mb_info_size)
2778         s->mb_info_size += 12;
2779     write_mb_info(s);
2780 }
2781
2782 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2783 {
2784     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2785         && s->slice_context_count == 1
2786         && s->pb.buf == s->avctx->internal->byte_buffer) {
2787         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2788         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2789
2790         uint8_t *new_buffer = NULL;
2791         int new_buffer_size = 0;
2792
2793         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2794                               s->avctx->internal->byte_buffer_size + size_increase);
2795         if (!new_buffer)
2796             return AVERROR(ENOMEM);
2797
2798         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2799         av_free(s->avctx->internal->byte_buffer);
2800         s->avctx->internal->byte_buffer      = new_buffer;
2801         s->avctx->internal->byte_buffer_size = new_buffer_size;
2802         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2803         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2804         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2805     }
2806     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2807         return AVERROR(EINVAL);
2808     return 0;
2809 }
2810
2811 static int encode_thread(AVCodecContext *c, void *arg){
2812     MpegEncContext *s= *(void**)arg;
2813     int mb_x, mb_y, pdif = 0;
2814     int chr_h= 16>>s->chroma_y_shift;
2815     int i, j;
2816     MpegEncContext best_s = { 0 }, backup_s;
2817     uint8_t bit_buf[2][MAX_MB_BYTES];
2818     uint8_t bit_buf2[2][MAX_MB_BYTES];
2819     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2820     PutBitContext pb[2], pb2[2], tex_pb[2];
2821
2822     ff_check_alignment();
2823
2824     for(i=0; i<2; i++){
2825         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2826         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2827         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2828     }
2829
2830     s->last_bits= put_bits_count(&s->pb);
2831     s->mv_bits=0;
2832     s->misc_bits=0;
2833     s->i_tex_bits=0;
2834     s->p_tex_bits=0;
2835     s->i_count=0;
2836     s->f_count=0;
2837     s->b_count=0;
2838     s->skip_count=0;
2839
2840     for(i=0; i<3; i++){
2841         /* init last dc values */
2842         /* note: quant matrix value (8) is implied here */
2843         s->last_dc[i] = 128 << s->intra_dc_precision;
2844
2845         s->current_picture.encoding_error[i] = 0;
2846     }
2847     if(s->codec_id==AV_CODEC_ID_AMV){
2848         s->last_dc[0] = 128*8/13;
2849         s->last_dc[1] = 128*8/14;
2850         s->last_dc[2] = 128*8/14;
2851     }
2852     s->mb_skip_run = 0;
2853     memset(s->last_mv, 0, sizeof(s->last_mv));
2854
2855     s->last_mv_dir = 0;
2856
2857     switch(s->codec_id){
2858     case AV_CODEC_ID_H263:
2859     case AV_CODEC_ID_H263P:
2860     case AV_CODEC_ID_FLV1:
2861         if (CONFIG_H263_ENCODER)
2862             s->gob_index = H263_GOB_HEIGHT(s->height);
2863         break;
2864     case AV_CODEC_ID_MPEG4:
2865         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2866             ff_mpeg4_init_partitions(s);
2867         break;
2868     }
2869
2870     s->resync_mb_x=0;
2871     s->resync_mb_y=0;
2872     s->first_slice_line = 1;
2873     s->ptr_lastgob = s->pb.buf;
2874     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2875         s->mb_x=0;
2876         s->mb_y= mb_y;
2877
2878         ff_set_qscale(s, s->qscale);
2879         ff_init_block_index(s);
2880
2881         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2882             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2883             int mb_type= s->mb_type[xy];
2884 //            int d;
2885             int dmin= INT_MAX;
2886             int dir;
2887             int size_increase =  s->avctx->internal->byte_buffer_size/4
2888                                + s->mb_width*MAX_MB_BYTES;
2889
2890             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2891             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2892                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2893                 return -1;
2894             }
2895             if(s->data_partitioning){
2896                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2897                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2898                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2899                     return -1;
2900                 }
2901             }
2902
2903             s->mb_x = mb_x;
2904             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2905             ff_update_block_index(s);
2906
2907             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2908                 ff_h261_reorder_mb_index(s);
2909                 xy= s->mb_y*s->mb_stride + s->mb_x;
2910                 mb_type= s->mb_type[xy];
2911             }
2912
2913             /* write gob / video packet header  */
2914             if(s->rtp_mode){
2915                 int current_packet_size, is_gob_start;
2916
2917                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2918
2919                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2920
2921                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2922
2923                 switch(s->codec_id){
2924                 case AV_CODEC_ID_H263:
2925                 case AV_CODEC_ID_H263P:
2926                     if(!s->h263_slice_structured)
2927                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2928                     break;
2929                 case AV_CODEC_ID_MPEG2VIDEO:
2930                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2931                 case AV_CODEC_ID_MPEG1VIDEO:
2932                     if(s->mb_skip_run) is_gob_start=0;
2933                     break;
2934                 case AV_CODEC_ID_MJPEG:
2935                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2936                     break;
2937                 }
2938
2939                 if(is_gob_start){
2940                     if(s->start_mb_y != mb_y || mb_x!=0){
2941                         write_slice_end(s);
2942
2943                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2944                             ff_mpeg4_init_partitions(s);
2945                         }
2946                     }
2947
2948                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2949                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2950
2951                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2952                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2953                         int d = 100 / s->error_rate;
2954                         if(r % d == 0){
2955                             current_packet_size=0;
2956                             s->pb.buf_ptr= s->ptr_lastgob;
2957                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2958                         }
2959                     }
2960
2961                     if (s->avctx->rtp_callback){
2962                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2963                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2964                     }
2965                     update_mb_info(s, 1);
2966
2967                     switch(s->codec_id){
2968                     case AV_CODEC_ID_MPEG4:
2969                         if (CONFIG_MPEG4_ENCODER) {
2970                             ff_mpeg4_encode_video_packet_header(s);
2971                             ff_mpeg4_clean_buffers(s);
2972                         }
2973                     break;
2974                     case AV_CODEC_ID_MPEG1VIDEO:
2975                     case AV_CODEC_ID_MPEG2VIDEO:
2976                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2977                             ff_mpeg1_encode_slice_header(s);
2978                             ff_mpeg1_clean_buffers(s);
2979                         }
2980                     break;
2981                     case AV_CODEC_ID_H263:
2982                     case AV_CODEC_ID_H263P:
2983                         if (CONFIG_H263_ENCODER)
2984                             ff_h263_encode_gob_header(s, mb_y);
2985                     break;
2986                     }
2987
2988                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2989                         int bits= put_bits_count(&s->pb);
2990                         s->misc_bits+= bits - s->last_bits;
2991                         s->last_bits= bits;
2992                     }
2993
2994                     s->ptr_lastgob += current_packet_size;
2995                     s->first_slice_line=1;
2996                     s->resync_mb_x=mb_x;
2997                     s->resync_mb_y=mb_y;
2998                 }
2999             }
3000
3001             if(  (s->resync_mb_x   == s->mb_x)
3002                && s->resync_mb_y+1 == s->mb_y){
3003                 s->first_slice_line=0;
3004             }
3005
3006             s->mb_skipped=0;
3007             s->dquant=0; //only for QP_RD
3008
3009             update_mb_info(s, 0);
3010
3011             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
3012                 int next_block=0;
3013                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3014
3015                 copy_context_before_encode(&backup_s, s, -1);
3016                 backup_s.pb= s->pb;
3017                 best_s.data_partitioning= s->data_partitioning;
3018                 best_s.partitioned_frame= s->partitioned_frame;
3019                 if(s->data_partitioning){
3020                     backup_s.pb2= s->pb2;
3021                     backup_s.tex_pb= s->tex_pb;
3022                 }
3023
3024                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
3025                     s->mv_dir = MV_DIR_FORWARD;
3026                     s->mv_type = MV_TYPE_16X16;
3027                     s->mb_intra= 0;
3028                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3029                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3030                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
3031                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3032                 }
3033                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
3034                     s->mv_dir = MV_DIR_FORWARD;
3035                     s->mv_type = MV_TYPE_FIELD;
3036                     s->mb_intra= 0;
3037                     for(i=0; i<2; i++){
3038                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3039                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3040                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3041                     }
3042                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3043                                  &dmin, &next_block, 0, 0);
3044                 }
3045                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3046                     s->mv_dir = MV_DIR_FORWARD;
3047                     s->mv_type = MV_TYPE_16X16;
3048                     s->mb_intra= 0;
3049                     s->mv[0][0][0] = 0;
3050                     s->mv[0][0][1] = 0;
3051                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3052                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3053                 }
3054                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3055                     s->mv_dir = MV_DIR_FORWARD;
3056                     s->mv_type = MV_TYPE_8X8;
3057                     s->mb_intra= 0;
3058                     for(i=0; i<4; i++){
3059                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3060                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3061                     }
3062                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3063                                  &dmin, &next_block, 0, 0);
3064                 }
3065                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3066                     s->mv_dir = MV_DIR_FORWARD;
3067                     s->mv_type = MV_TYPE_16X16;
3068                     s->mb_intra= 0;
3069                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3070                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3071                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3072                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3073                 }
3074                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3075                     s->mv_dir = MV_DIR_BACKWARD;
3076                     s->mv_type = MV_TYPE_16X16;
3077                     s->mb_intra= 0;
3078                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3079                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3080                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3081                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3082                 }
3083                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3084                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3085                     s->mv_type = MV_TYPE_16X16;
3086                     s->mb_intra= 0;
3087                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3088                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3089                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3090                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3091                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3092                                  &dmin, &next_block, 0, 0);
3093                 }
3094                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3095                     s->mv_dir = MV_DIR_FORWARD;
3096                     s->mv_type = MV_TYPE_FIELD;
3097                     s->mb_intra= 0;
3098                     for(i=0; i<2; i++){
3099                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3100                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3101                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3102                     }
3103                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3104                                  &dmin, &next_block, 0, 0);
3105                 }
3106                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3107                     s->mv_dir = MV_DIR_BACKWARD;
3108                     s->mv_type = MV_TYPE_FIELD;
3109                     s->mb_intra= 0;
3110                     for(i=0; i<2; i++){
3111                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3112                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3113                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3114                     }
3115                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3116                                  &dmin, &next_block, 0, 0);
3117                 }
3118                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3119                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3120                     s->mv_type = MV_TYPE_FIELD;
3121                     s->mb_intra= 0;
3122                     for(dir=0; dir<2; dir++){
3123                         for(i=0; i<2; i++){
3124                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3125                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3126                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3127                         }
3128                     }
3129                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3130                                  &dmin, &next_block, 0, 0);
3131                 }
3132                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3133                     s->mv_dir = 0;
3134                     s->mv_type = MV_TYPE_16X16;
3135                     s->mb_intra= 1;
3136                     s->mv[0][0][0] = 0;
3137                     s->mv[0][0][1] = 0;
3138                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3139                                  &dmin, &next_block, 0, 0);
3140                     if(s->h263_pred || s->h263_aic){
3141                         if(best_s.mb_intra)
3142                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3143                         else
3144                             ff_clean_intra_table_entries(s); //old mode?
3145                     }
3146                 }
3147
3148                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3149                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3150                         const int last_qp= backup_s.qscale;
3151                         int qpi, qp, dc[6];
3152                         int16_t ac[6][16];
3153                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3154                         static const int dquant_tab[4]={-1,1,-2,2};
3155                         int storecoefs = s->mb_intra && s->dc_val[0];
3156
3157                         av_assert2(backup_s.dquant == 0);
3158
3159                         //FIXME intra
3160                         s->mv_dir= best_s.mv_dir;
3161                         s->mv_type = MV_TYPE_16X16;
3162                         s->mb_intra= best_s.mb_intra;
3163                         s->mv[0][0][0] = best_s.mv[0][0][0];
3164                         s->mv[0][0][1] = best_s.mv[0][0][1];
3165                         s->mv[1][0][0] = best_s.mv[1][0][0];
3166                         s->mv[1][0][1] = best_s.mv[1][0][1];
3167
3168                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3169                         for(; qpi<4; qpi++){
3170                             int dquant= dquant_tab[qpi];
3171                             qp= last_qp + dquant;
3172                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3173                                 continue;
3174                             backup_s.dquant= dquant;
3175                             if(storecoefs){
3176                                 for(i=0; i<6; i++){
3177                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3178                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3179                                 }
3180                             }
3181
3182                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3183                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3184                             if(best_s.qscale != qp){
3185                                 if(storecoefs){
3186                                     for(i=0; i<6; i++){
3187                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3188                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3189                                     }
3190                                 }
3191                             }
3192                         }
3193                     }
3194                 }
3195                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3196                     int mx= s->b_direct_mv_table[xy][0];
3197                     int my= s->b_direct_mv_table[xy][1];
3198
3199                     backup_s.dquant = 0;
3200                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3201                     s->mb_intra= 0;
3202                     ff_mpeg4_set_direct_mv(s, mx, my);
3203                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3204                                  &dmin, &next_block, mx, my);
3205                 }
3206                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3207                     backup_s.dquant = 0;
3208                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3209                     s->mb_intra= 0;
3210                     ff_mpeg4_set_direct_mv(s, 0, 0);
3211                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3212                                  &dmin, &next_block, 0, 0);
3213                 }
3214                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3215                     int coded=0;
3216                     for(i=0; i<6; i++)
3217                         coded |= s->block_last_index[i];
3218                     if(coded){
3219                         int mx,my;
3220                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3221                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3222                             mx=my=0; //FIXME find the one we actually used
3223                             ff_mpeg4_set_direct_mv(s, mx, my);
3224                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3225                             mx= s->mv[1][0][0];
3226                             my= s->mv[1][0][1];
3227                         }else{
3228                             mx= s->mv[0][0][0];
3229                             my= s->mv[0][0][1];
3230                         }
3231
3232                         s->mv_dir= best_s.mv_dir;
3233                         s->mv_type = best_s.mv_type;
3234                         s->mb_intra= 0;
3235 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3236                         s->mv[0][0][1] = best_s.mv[0][0][1];
3237                         s->mv[1][0][0] = best_s.mv[1][0][0];
3238                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3239                         backup_s.dquant= 0;
3240                         s->skipdct=1;
3241                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3242                                         &dmin, &next_block, mx, my);
3243                         s->skipdct=0;
3244                     }
3245                 }
3246
3247                 s->current_picture.qscale_table[xy] = best_s.qscale;
3248
3249                 copy_context_after_encode(s, &best_s, -1);
3250
3251                 pb_bits_count= put_bits_count(&s->pb);
3252                 flush_put_bits(&s->pb);
3253                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3254                 s->pb= backup_s.pb;
3255
3256                 if(s->data_partitioning){
3257                     pb2_bits_count= put_bits_count(&s->pb2);
3258                     flush_put_bits(&s->pb2);
3259                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3260                     s->pb2= backup_s.pb2;
3261
3262                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3263                     flush_put_bits(&s->tex_pb);
3264                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3265                     s->tex_pb= backup_s.tex_pb;
3266                 }
3267                 s->last_bits= put_bits_count(&s->pb);
3268
3269                 if (CONFIG_H263_ENCODER &&
3270                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3271                     ff_h263_update_motion_val(s);
3272
3273                 if(next_block==0){ //FIXME 16 vs linesize16
3274                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3275                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3276                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3277                 }
3278
3279                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3280                     ff_mpv_decode_mb(s, s->block);
3281             } else {
3282                 int motion_x = 0, motion_y = 0;
3283                 s->mv_type=MV_TYPE_16X16;
3284                 // only one MB-Type possible
3285
3286                 switch(mb_type){
3287                 case CANDIDATE_MB_TYPE_INTRA:
3288                     s->mv_dir = 0;
3289                     s->mb_intra= 1;
3290                     motion_x= s->mv[0][0][0] = 0;
3291                     motion_y= s->mv[0][0][1] = 0;
3292                     break;
3293                 case CANDIDATE_MB_TYPE_INTER:
3294                     s->mv_dir = MV_DIR_FORWARD;
3295                     s->mb_intra= 0;
3296                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3297                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3298                     break;
3299                 case CANDIDATE_MB_TYPE_INTER_I:
3300                     s->mv_dir = MV_DIR_FORWARD;
3301                     s->mv_type = MV_TYPE_FIELD;
3302                     s->mb_intra= 0;
3303                     for(i=0; i<2; i++){
3304                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3305                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3306                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3307                     }
3308                     break;
3309                 case CANDIDATE_MB_TYPE_INTER4V:
3310                     s->mv_dir = MV_DIR_FORWARD;
3311                     s->mv_type = MV_TYPE_8X8;
3312                     s->mb_intra= 0;
3313                     for(i=0; i<4; i++){
3314                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3315                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3316                     }
3317                     break;
3318                 case CANDIDATE_MB_TYPE_DIRECT:
3319                     if (CONFIG_MPEG4_ENCODER) {
3320                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3321                         s->mb_intra= 0;
3322                         motion_x=s->b_direct_mv_table[xy][0];
3323                         motion_y=s->b_direct_mv_table[xy][1];
3324                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3325                     }
3326                     break;
3327                 case CANDIDATE_MB_TYPE_DIRECT0:
3328                     if (CONFIG_MPEG4_ENCODER) {
3329                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3330                         s->mb_intra= 0;
3331                         ff_mpeg4_set_direct_mv(s, 0, 0);
3332                     }
3333                     break;
3334                 case CANDIDATE_MB_TYPE_BIDIR:
3335                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3336                     s->mb_intra= 0;
3337                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3338                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3339                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3340                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3341                     break;
3342                 case CANDIDATE_MB_TYPE_BACKWARD:
3343                     s->mv_dir = MV_DIR_BACKWARD;
3344                     s->mb_intra= 0;
3345                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3346                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3347                     break;
3348                 case CANDIDATE_MB_TYPE_FORWARD:
3349                     s->mv_dir = MV_DIR_FORWARD;
3350                     s->mb_intra= 0;
3351                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3352                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3353                     break;
3354                 case CANDIDATE_MB_TYPE_FORWARD_I:
3355                     s->mv_dir = MV_DIR_FORWARD;
3356                     s->mv_type = MV_TYPE_FIELD;
3357                     s->mb_intra= 0;
3358                     for(i=0; i<2; i++){
3359                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3360                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3361                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3362                     }
3363                     break;
3364                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3365                     s->mv_dir = MV_DIR_BACKWARD;
3366                     s->mv_type = MV_TYPE_FIELD;
3367                     s->mb_intra= 0;
3368                     for(i=0; i<2; i++){
3369                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3370                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3371                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3372                     }
3373                     break;
3374                 case CANDIDATE_MB_TYPE_BIDIR_I:
3375                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3376                     s->mv_type = MV_TYPE_FIELD;
3377                     s->mb_intra= 0;
3378                     for(dir=0; dir<2; dir++){
3379                         for(i=0; i<2; i++){
3380                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3381                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3382                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3383                         }
3384                     }
3385                     break;
3386                 default:
3387                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3388                 }
3389
3390                 encode_mb(s, motion_x, motion_y);
3391
3392                 // RAL: Update last macroblock type
3393                 s->last_mv_dir = s->mv_dir;
3394
3395                 if (CONFIG_H263_ENCODER &&
3396                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3397                     ff_h263_update_motion_val(s);
3398
3399                 ff_mpv_decode_mb(s, s->block);
3400             }
3401
3402             /* clean the MV table in IPS frames for direct mode in B frames */
3403             if(s->mb_intra /* && I,P,S_TYPE */){
3404                 s->p_mv_table[xy][0]=0;
3405                 s->p_mv_table[xy][1]=0;
3406             }
3407
3408             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3409                 int w= 16;
3410                 int h= 16;
3411
3412                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3413                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3414
3415                 s->current_picture.encoding_error[0] += sse(
3416                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3417                     s->dest[0], w, h, s->linesize);
3418                 s->current_picture.encoding_error[1] += sse(
3419                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3420                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3421                 s->current_picture.encoding_error[2] += sse(
3422                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3423                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3424             }
3425             if(s->loop_filter){
3426                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3427                     ff_h263_loop_filter(s);
3428             }
3429             ff_dlog(s->avctx, "MB %d %d bits\n",
3430                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3431         }
3432     }
3433
3434     //not beautiful here but we must write it before flushing so it has to be here
3435     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3436         ff_msmpeg4_encode_ext_header(s);
3437
3438     write_slice_end(s);
3439
3440     /* Send the last GOB if RTP */
3441     if (s->avctx->rtp_callback) {
3442         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3443         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3444         /* Call the RTP callback to send the last GOB */
3445         emms_c();
3446         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3447     }
3448
3449     return 0;
3450 }
3451
3452 #define MERGE(field) dst->field += src->field; src->field=0
3453 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3454     MERGE(me.scene_change_score);
3455     MERGE(me.mc_mb_var_sum_temp);
3456     MERGE(me.mb_var_sum_temp);
3457 }
3458
3459 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3460     int i;
3461
3462     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3463     MERGE(dct_count[1]);
3464     MERGE(mv_bits);
3465     MERGE(i_tex_bits);
3466     MERGE(p_tex_bits);
3467     MERGE(i_count);
3468     MERGE(f_count);
3469     MERGE(b_count);
3470     MERGE(skip_count);
3471     MERGE(misc_bits);
3472     MERGE(er.error_count);
3473     MERGE(padding_bug_score);
3474     MERGE(current_picture.encoding_error[0]);
3475     MERGE(current_picture.encoding_error[1]);
3476     MERGE(current_picture.encoding_error[2]);
3477
3478     if(dst->avctx->noise_reduction){
3479         for(i=0; i<64; i++){
3480             MERGE(dct_error_sum[0][i]);
3481             MERGE(dct_error_sum[1][i]);
3482         }
3483     }
3484
3485     assert(put_bits_count(&src->pb) % 8 ==0);
3486     assert(put_bits_count(&dst->pb) % 8 ==0);
3487     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3488     flush_put_bits(&dst->pb);
3489 }
3490
3491 static int estimate_qp(MpegEncContext *s, int dry_run){
3492     if (s->next_lambda){
3493         s->current_picture_ptr->f->quality =
3494         s->current_picture.f->quality = s->next_lambda;
3495         if(!dry_run) s->next_lambda= 0;
3496     } else if (!s->fixed_qscale) {
3497         s->current_picture_ptr->f->quality =
3498         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3499         if (s->current_picture.f->quality < 0)
3500             return -1;
3501     }
3502
3503     if(s->adaptive_quant){
3504         switch(s->codec_id){
3505         case AV_CODEC_ID_MPEG4:
3506             if (CONFIG_MPEG4_ENCODER)
3507                 ff_clean_mpeg4_qscales(s);
3508             break;
3509         case AV_CODEC_ID_H263:
3510         case AV_CODEC_ID_H263P:
3511         case AV_CODEC_ID_FLV1:
3512             if (CONFIG_H263_ENCODER)
3513                 ff_clean_h263_qscales(s);
3514             break;
3515         default:
3516             ff_init_qscale_tab(s);
3517         }
3518
3519         s->lambda= s->lambda_table[0];
3520         //FIXME broken
3521     }else
3522         s->lambda = s->current_picture.f->quality;
3523     update_qscale(s);
3524     return 0;
3525 }
3526
3527 /* must be called before writing the header */
3528 static void set_frame_distances(MpegEncContext * s){
3529     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3530     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3531
3532     if(s->pict_type==AV_PICTURE_TYPE_B){
3533         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3534         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3535     }else{
3536         s->pp_time= s->time - s->last_non_b_time;
3537         s->last_non_b_time= s->time;
3538         assert(s->picture_number==0 || s->pp_time > 0);
3539     }
3540 }
3541
3542 static int encode_picture(MpegEncContext *s, int picture_number)
3543 {
3544     int i, ret;
3545     int bits;
3546     int context_count = s->slice_context_count;
3547
3548     s->picture_number = picture_number;
3549
3550     /* Reset the average MB variance */
3551     s->me.mb_var_sum_temp    =
3552     s->me.mc_mb_var_sum_temp = 0;
3553
3554     /* we need to initialize some time vars before we can encode b-frames */
3555     // RAL: Condition added for MPEG1VIDEO
3556     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3557         set_frame_distances(s);
3558     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3559         ff_set_mpeg4_time(s);
3560
3561     s->me.scene_change_score=0;
3562
3563 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3564
3565     if(s->pict_type==AV_PICTURE_TYPE_I){
3566         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3567         else                        s->no_rounding=0;
3568     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3569         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3570             s->no_rounding ^= 1;
3571     }
3572
3573     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3574         if (estimate_qp(s,1) < 0)
3575             return -1;
3576         ff_get_2pass_fcode(s);
3577     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3578         if(s->pict_type==AV_PICTURE_TYPE_B)
3579             s->lambda= s->last_lambda_for[s->pict_type];
3580         else
3581             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3582         update_qscale(s);
3583     }
3584
3585     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3586         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3587         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3588         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3589         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3590     }
3591
3592     s->mb_intra=0; //for the rate distortion & bit compare functions
3593     for(i=1; i<context_count; i++){
3594         ret = ff_update_duplicate_context(s->thread_context[i], s);
3595         if (ret < 0)
3596             return ret;
3597     }
3598
3599     if(ff_init_me(s)<0)
3600         return -1;
3601
3602     /* Estimate motion for every MB */
3603     if(s->pict_type != AV_PICTURE_TYPE_I){
3604         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3605         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3606         if (s->pict_type != AV_PICTURE_TYPE_B) {
3607             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3608                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3609             }
3610         }
3611
3612         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3613     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3614         /* I-Frame */
3615         for(i=0; i<s->mb_stride*s->mb_height; i++)
3616             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3617
3618         if(!s->fixed_qscale){
3619             /* finding spatial complexity for I-frame rate control */
3620             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3621         }
3622     }
3623     for(i=1; i<context_count; i++){
3624         merge_context_after_me(s, s->thread_context[i]);
3625     }
3626     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3627     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3628     emms_c();
3629
3630     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3631         s->pict_type= AV_PICTURE_TYPE_I;
3632         for(i=0; i<s->mb_stride*s->mb_height; i++)
3633             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3634         if(s->msmpeg4_version >= 3)
3635             s->no_rounding=1;
3636         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3637                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3638     }
3639
3640     if(!s->umvplus){
3641         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3642             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3643
3644             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3645                 int a,b;
3646                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3647                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3648                 s->f_code= FFMAX3(s->f_code, a, b);
3649             }
3650
3651             ff_fix_long_p_mvs(s);
3652             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3653             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3654                 int j;
3655                 for(i=0; i<2; i++){
3656                     for(j=0; j<2; j++)
3657                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3658                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3659                 }
3660             }
3661         }
3662
3663         if(s->pict_type==AV_PICTURE_TYPE_B){
3664             int a, b;
3665
3666             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3667             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3668             s->f_code = FFMAX(a, b);
3669
3670             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3671             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3672             s->b_code = FFMAX(a, b);
3673
3674             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3675             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3676             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3677             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3678             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3679                 int dir, j;
3680                 for(dir=0; dir<2; dir++){
3681                     for(i=0; i<2; i++){
3682                         for(j=0; j<2; j++){
3683                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3684                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3685                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3686                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3687                         }
3688                     }
3689                 }
3690             }
3691         }
3692     }
3693
3694     if (estimate_qp(s, 0) < 0)
3695         return -1;
3696
3697     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3698         s->pict_type == AV_PICTURE_TYPE_I &&
3699         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3700         s->qscale= 3; //reduce clipping problems
3701
3702     if (s->out_format == FMT_MJPEG) {
3703         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3704         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3705
3706         if (s->avctx->intra_matrix) {
3707             chroma_matrix =
3708             luma_matrix = s->avctx->intra_matrix;
3709         }
3710         if (s->avctx->chroma_intra_matrix)
3711             chroma_matrix = s->avctx->chroma_intra_matrix;
3712
3713         /* for mjpeg, we do include qscale in the matrix */
3714         for(i=1;i<64;i++){
3715             int j = s->idsp.idct_permutation[i];
3716
3717             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3718             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3719         }
3720         s->y_dc_scale_table=
3721         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3722         s->chroma_intra_matrix[0] =
3723         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3724         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3725                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3726         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3727                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3728         s->qscale= 8;
3729     }
3730     if(s->codec_id == AV_CODEC_ID_AMV){
3731         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3732         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3733         for(i=1;i<64;i++){
3734             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3735
3736             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3737             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3738         }
3739         s->y_dc_scale_table= y;
3740         s->c_dc_scale_table= c;
3741         s->intra_matrix[0] = 13;
3742         s->chroma_intra_matrix[0] = 14;
3743         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3744                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3745         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3746                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3747         s->qscale= 8;
3748     }
3749
3750     //FIXME var duplication
3751     s->current_picture_ptr->f->key_frame =
3752     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3753     s->current_picture_ptr->f->pict_type =
3754     s->current_picture.f->pict_type = s->pict_type;
3755
3756     if (s->current_picture.f->key_frame)
3757         s->picture_in_gop_number=0;
3758
3759     s->mb_x = s->mb_y = 0;
3760     s->last_bits= put_bits_count(&s->pb);
3761     switch(s->out_format) {
3762     case FMT_MJPEG:
3763         if (CONFIG_MJPEG_ENCODER)
3764             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3765                                            s->intra_matrix, s->chroma_intra_matrix);
3766         break;
3767     case FMT_H261:
3768         if (CONFIG_H261_ENCODER)
3769             ff_h261_encode_picture_header(s, picture_number);
3770         break;
3771     case FMT_H263:
3772         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3773             ff_wmv2_encode_picture_header(s, picture_number);
3774         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3775             ff_msmpeg4_encode_picture_header(s, picture_number);
3776         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3777             ff_mpeg4_encode_picture_header(s, picture_number);
3778         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3779             ret = ff_rv10_encode_picture_header(s, picture_number);
3780             if (ret < 0)
3781                 return ret;
3782         }
3783         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3784             ff_rv20_encode_picture_header(s, picture_number);
3785         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3786             ff_flv_encode_picture_header(s, picture_number);
3787         else if (CONFIG_H263_ENCODER)
3788             ff_h263_encode_picture_header(s, picture_number);
3789         break;
3790     case FMT_MPEG1:
3791         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3792             ff_mpeg1_encode_picture_header(s, picture_number);
3793         break;
3794     default:
3795         av_assert0(0);
3796     }
3797     bits= put_bits_count(&s->pb);
3798     s->header_bits= bits - s->last_bits;
3799
3800     for(i=1; i<context_count; i++){
3801         update_duplicate_context_after_me(s->thread_context[i], s);
3802     }
3803     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3804     for(i=1; i<context_count; i++){
3805         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3806             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3807         merge_context_after_encode(s, s->thread_context[i]);
3808     }
3809     emms_c();
3810     return 0;
3811 }
3812
3813 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3814     const int intra= s->mb_intra;
3815     int i;
3816
3817     s->dct_count[intra]++;
3818
3819     for(i=0; i<64; i++){
3820         int level= block[i];
3821
3822         if(level){
3823             if(level>0){
3824                 s->dct_error_sum[intra][i] += level;
3825                 level -= s->dct_offset[intra][i];
3826                 if(level<0) level=0;
3827             }else{
3828                 s->dct_error_sum[intra][i] -= level;
3829                 level += s->dct_offset[intra][i];
3830                 if(level>0) level=0;
3831             }
3832             block[i]= level;
3833         }
3834     }
3835 }
3836
3837 static int dct_quantize_trellis_c(MpegEncContext *s,
3838                                   int16_t *block, int n,
3839                                   int qscale, int *overflow){
3840     const int *qmat;
3841     const uint16_t *matrix;
3842     const uint8_t *scantable= s->intra_scantable.scantable;
3843     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3844     int max=0;
3845     unsigned int threshold1, threshold2;
3846     int bias=0;
3847     int run_tab[65];
3848     int level_tab[65];
3849     int score_tab[65];
3850     int survivor[65];
3851     int survivor_count;
3852     int last_run=0;
3853     int last_level=0;
3854     int last_score= 0;
3855     int last_i;
3856     int coeff[2][64];
3857     int coeff_count[64];
3858     int qmul, qadd, start_i, last_non_zero, i, dc;
3859     const int esc_length= s->ac_esc_length;
3860     uint8_t * length;
3861     uint8_t * last_length;
3862     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3863     int mpeg2_qscale;
3864
3865     s->fdsp.fdct(block);
3866
3867     if(s->dct_error_sum)
3868         s->denoise_dct(s, block);
3869     qmul= qscale*16;
3870     qadd= ((qscale-1)|1)*8;
3871
3872     if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
3873     else                 mpeg2_qscale = qscale << 1;
3874
3875     if (s->mb_intra) {
3876         int q;
3877         if (!s->h263_aic) {
3878             if (n < 4)
3879                 q = s->y_dc_scale;
3880             else
3881                 q = s->c_dc_scale;
3882             q = q << 3;
3883         } else{
3884             /* For AIC we skip quant/dequant of INTRADC */
3885             q = 1 << 3;
3886             qadd=0;
3887         }
3888
3889         /* note: block[0] is assumed to be positive */
3890         block[0] = (block[0] + (q >> 1)) / q;
3891         start_i = 1;
3892         last_non_zero = 0;
3893         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3894         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3895         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3896             bias= 1<<(QMAT_SHIFT-1);
3897
3898         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3899             length     = s->intra_chroma_ac_vlc_length;
3900             last_length= s->intra_chroma_ac_vlc_last_length;
3901         } else {
3902             length     = s->intra_ac_vlc_length;
3903             last_length= s->intra_ac_vlc_last_length;
3904         }
3905     } else {
3906         start_i = 0;
3907         last_non_zero = -1;
3908         qmat = s->q_inter_matrix[qscale];
3909         matrix = s->inter_matrix;
3910         length     = s->inter_ac_vlc_length;
3911         last_length= s->inter_ac_vlc_last_length;
3912     }
3913     last_i= start_i;
3914
3915     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3916     threshold2= (threshold1<<1);
3917
3918     for(i=63; i>=start_i; i--) {
3919         const int j = scantable[i];
3920         int level = block[j] * qmat[j];
3921
3922         if(((unsigned)(level+threshold1))>threshold2){
3923             last_non_zero = i;
3924             break;
3925         }
3926     }
3927
3928     for(i=start_i; i<=last_non_zero; i++) {
3929         const int j = scantable[i];
3930         int level = block[j] * qmat[j];
3931
3932 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3933 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3934         if(((unsigned)(level+threshold1))>threshold2){
3935             if(level>0){
3936                 level= (bias + level)>>QMAT_SHIFT;
3937                 coeff[0][i]= level;
3938                 coeff[1][i]= level-1;
3939 //                coeff[2][k]= level-2;
3940             }else{
3941                 level= (bias - level)>>QMAT_SHIFT;
3942                 coeff[0][i]= -level;
3943                 coeff[1][i]= -level+1;
3944 //                coeff[2][k]= -level+2;
3945             }
3946             coeff_count[i]= FFMIN(level, 2);
3947             av_assert2(coeff_count[i]);
3948             max |=level;
3949         }else{
3950             coeff[0][i]= (level>>31)|1;
3951             coeff_count[i]= 1;
3952         }
3953     }
3954
3955     *overflow= s->max_qcoeff < max; //overflow might have happened
3956
3957     if(last_non_zero < start_i){
3958         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3959         return last_non_zero;
3960     }
3961
3962     score_tab[start_i]= 0;
3963     survivor[0]= start_i;
3964     survivor_count= 1;
3965
3966     for(i=start_i; i<=last_non_zero; i++){
3967         int level_index, j, zero_distortion;
3968         int dct_coeff= FFABS(block[ scantable[i] ]);
3969         int best_score=256*256*256*120;
3970
3971         if (s->fdsp.fdct == ff_fdct_ifast)
3972             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3973         zero_distortion= dct_coeff*dct_coeff;
3974
3975         for(level_index=0; level_index < coeff_count[i]; level_index++){
3976             int distortion;
3977             int level= coeff[level_index][i];
3978             const int alevel= FFABS(level);
3979             int unquant_coeff;
3980
3981             av_assert2(level);
3982
3983             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3984                 unquant_coeff= alevel*qmul + qadd;
3985             } else if(s->out_format == FMT_MJPEG) {
3986                 j = s->idsp.idct_permutation[scantable[i]];
3987                 unquant_coeff = alevel * matrix[j] * 8;
3988             }else{ //MPEG1
3989                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3990                 if(s->mb_intra){
3991                         unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
3992                         unquant_coeff =   (unquant_coeff - 1) | 1;
3993                 }else{
3994                         unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
3995                         unquant_coeff =   (unquant_coeff - 1) | 1;
3996                 }
3997                 unquant_coeff<<= 3;
3998             }
3999
4000             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
4001             level+=64;
4002             if((level&(~127)) == 0){
4003                 for(j=survivor_count-1; j>=0; j--){
4004                     int run= i - survivor[j];
4005                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4006                     score += score_tab[i-run];
4007
4008                     if(score < best_score){
4009                         best_score= score;
4010                         run_tab[i+1]= run;
4011                         level_tab[i+1]= level-64;
4012                     }
4013                 }
4014
4015                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4016                     for(j=survivor_count-1; j>=0; j--){
4017                         int run= i - survivor[j];
4018                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4019                         score += score_tab[i-run];
4020                         if(score < last_score){
4021                             last_score= score;
4022                             last_run= run;
4023                             last_level= level-64;
4024                             last_i= i+1;
4025                         }
4026                     }
4027                 }
4028             }else{
4029                 distortion += esc_length*lambda;
4030                 for(j=survivor_count-1; j>=0; j--){
4031                     int run= i - survivor[j];
4032                     int score= distortion + score_tab[i-run];
4033
4034                     if(score < best_score){
4035                         best_score= score;
4036                         run_tab[i+1]= run;
4037                         level_tab[i+1]= level-64;
4038                     }
4039                 }
4040
4041                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4042                   for(j=survivor_count-1; j>=0; j--){
4043                         int run= i - survivor[j];
4044                         int score= distortion + score_tab[i-run];
4045                         if(score < last_score){
4046                             last_score= score;
4047                             last_run= run;
4048                             last_level= level-64;
4049                             last_i= i+1;
4050                         }
4051                     }
4052                 }
4053             }
4054         }
4055
4056         score_tab[i+1]= best_score;
4057
4058         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4059         if(last_non_zero <= 27){
4060             for(; survivor_count; survivor_count--){
4061                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4062                     break;
4063             }
4064         }else{
4065             for(; survivor_count; survivor_count--){
4066                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4067                     break;
4068             }
4069         }
4070
4071         survivor[ survivor_count++ ]= i+1;
4072     }
4073
4074     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4075         last_score= 256*256*256*120;
4076         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4077             int score= score_tab[i];
4078             if(i) score += lambda*2; //FIXME exacter?
4079
4080             if(score < last_score){
4081                 last_score= score;
4082                 last_i= i;
4083                 last_level= level_tab[i];
4084                 last_run= run_tab[i];
4085             }
4086         }
4087     }
4088
4089     s->coded_score[n] = last_score;
4090
4091     dc= FFABS(block[0]);
4092     last_non_zero= last_i - 1;
4093     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4094
4095     if(last_non_zero < start_i)
4096         return last_non_zero;
4097
4098     if(last_non_zero == 0 && start_i == 0){
4099         int best_level= 0;
4100         int best_score= dc * dc;
4101
4102         for(i=0; i<coeff_count[0]; i++){
4103             int level= coeff[i][0];
4104             int alevel= FFABS(level);
4105             int unquant_coeff, score, distortion;
4106
4107             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4108                     unquant_coeff= (alevel*qmul + qadd)>>3;
4109             }else{ //MPEG1
4110                     unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
4111                     unquant_coeff =   (unquant_coeff - 1) | 1;
4112             }
4113             unquant_coeff = (unquant_coeff + 4) >> 3;
4114             unquant_coeff<<= 3 + 3;
4115
4116             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4117             level+=64;
4118             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4119             else                    score= distortion + esc_length*lambda;
4120
4121             if(score < best_score){
4122                 best_score= score;
4123                 best_level= level - 64;
4124             }
4125         }
4126         block[0]= best_level;
4127         s->coded_score[n] = best_score - dc*dc;
4128         if(best_level == 0) return -1;
4129         else                return last_non_zero;
4130     }
4131
4132     i= last_i;
4133     av_assert2(last_level);
4134
4135     block[ perm_scantable[last_non_zero] ]= last_level;
4136     i -= last_run + 1;
4137
4138     for(; i>start_i; i -= run_tab[i] + 1){
4139         block[ perm_scantable[i-1] ]= level_tab[i];
4140     }
4141
4142     return last_non_zero;
4143 }
4144
4145 //#define REFINE_STATS 1
4146 static int16_t basis[64][64];
4147
4148 static void build_basis(uint8_t *perm){
4149     int i, j, x, y;
4150     emms_c();
4151     for(i=0; i<8; i++){
4152         for(j=0; j<8; j++){
4153             for(y=0; y<8; y++){
4154                 for(x=0; x<8; x++){
4155                     double s= 0.25*(1<<BASIS_SHIFT);
4156                     int index= 8*i + j;
4157                     int perm_index= perm[index];
4158                     if(i==0) s*= sqrt(0.5);
4159                     if(j==0) s*= sqrt(0.5);
4160                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4161                 }
4162             }
4163         }
4164     }
4165 }
4166
4167 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4168                         int16_t *block, int16_t *weight, int16_t *orig,
4169                         int n, int qscale){
4170     int16_t rem[64];
4171     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4172     const uint8_t *scantable= s->intra_scantable.scantable;
4173     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4174 //    unsigned int threshold1, threshold2;
4175 //    int bias=0;
4176     int run_tab[65];
4177     int prev_run=0;
4178     int prev_level=0;
4179     int qmul, qadd, start_i, last_non_zero, i, dc;
4180     uint8_t * length;
4181     uint8_t * last_length;
4182     int lambda;
4183     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4184 #ifdef REFINE_STATS
4185 static int count=0;
4186 static int after_last=0;
4187 static int to_zero=0;
4188 static int from_zero=0;
4189 static int raise=0;
4190 static int lower=0;
4191 static int messed_sign=0;
4192 #endif
4193
4194     if(basis[0][0] == 0)
4195         build_basis(s->idsp.idct_permutation);
4196
4197     qmul= qscale*2;
4198     qadd= (qscale-1)|1;
4199     if (s->mb_intra) {
4200         if (!s->h263_aic) {
4201             if (n < 4)
4202                 q = s->y_dc_scale;
4203             else
4204                 q = s->c_dc_scale;
4205         } else{
4206             /* For AIC we skip quant/dequant of INTRADC */
4207             q = 1;
4208             qadd=0;
4209         }
4210         q <<= RECON_SHIFT-3;
4211         /* note: block[0] is assumed to be positive */
4212         dc= block[0]*q;
4213 //        block[0] = (block[0] + (q >> 1)) / q;
4214         start_i = 1;
4215 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4216 //            bias= 1<<(QMAT_SHIFT-1);
4217         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4218             length     = s->intra_chroma_ac_vlc_length;
4219             last_length= s->intra_chroma_ac_vlc_last_length;
4220         } else {
4221             length     = s->intra_ac_vlc_length;
4222             last_length= s->intra_ac_vlc_last_length;
4223         }
4224     } else {
4225         dc= 0;
4226         start_i = 0;
4227         length     = s->inter_ac_vlc_length;
4228         last_length= s->inter_ac_vlc_last_length;
4229     }
4230     last_non_zero = s->block_last_index[n];
4231
4232 #ifdef REFINE_STATS
4233 {START_TIMER
4234 #endif
4235     dc += (1<<(RECON_SHIFT-1));
4236     for(i=0; i<64; i++){
4237         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4238     }
4239 #ifdef REFINE_STATS
4240 STOP_TIMER("memset rem[]")}
4241 #endif
4242     sum=0;
4243     for(i=0; i<64; i++){
4244         int one= 36;
4245         int qns=4;
4246         int w;
4247
4248         w= FFABS(weight[i]) + qns*one;
4249         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4250
4251         weight[i] = w;
4252 //        w=weight[i] = (63*qns + (w/2)) / w;
4253
4254         av_assert2(w>0);
4255         av_assert2(w<(1<<6));
4256         sum += w*w;
4257     }
4258     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4259 #ifdef REFINE_STATS
4260 {START_TIMER
4261 #endif
4262     run=0;
4263     rle_index=0;
4264     for(i=start_i; i<=last_non_zero; i++){
4265         int j= perm_scantable[i];
4266         const int level= block[j];
4267         int coeff;
4268
4269         if(level){
4270             if(level<0) coeff= qmul*level - qadd;
4271             else        coeff= qmul*level + qadd;
4272             run_tab[rle_index++]=run;
4273             run=0;
4274
4275             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4276         }else{
4277             run++;
4278         }
4279     }
4280 #ifdef REFINE_STATS
4281 if(last_non_zero>0){
4282 STOP_TIMER("init rem[]")
4283 }
4284 }
4285
4286 {START_TIMER
4287 #endif
4288     for(;;){
4289         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4290         int best_coeff=0;
4291         int best_change=0;
4292         int run2, best_unquant_change=0, analyze_gradient;
4293 #ifdef REFINE_STATS
4294 {START_TIMER
4295 #endif
4296         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4297
4298         if(analyze_gradient){
4299 #ifdef REFINE_STATS
4300 {START_TIMER
4301 #endif
4302             for(i=0; i<64; i++){
4303                 int w= weight[i];
4304
4305                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4306             }
4307 #ifdef REFINE_STATS
4308 STOP_TIMER("rem*w*w")}
4309 {START_TIMER
4310 #endif
4311             s->fdsp.fdct(d1);
4312 #ifdef REFINE_STATS
4313 STOP_TIMER("dct")}
4314 #endif
4315         }
4316
4317         if(start_i){
4318             const int level= block[0];
4319             int change, old_coeff;
4320
4321             av_assert2(s->mb_intra);
4322
4323             old_coeff= q*level;
4324
4325             for(change=-1; change<=1; change+=2){
4326                 int new_level= level + change;
4327                 int score, new_coeff;
4328
4329                 new_coeff= q*new_level;
4330                 if(new_coeff >= 2048 || new_coeff < 0)
4331                     continue;
4332
4333                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4334                                                   new_coeff - old_coeff);
4335                 if(score<best_score){
4336                     best_score= score;
4337                     best_coeff= 0;
4338                     best_change= change;
4339                     best_unquant_change= new_coeff - old_coeff;
4340                 }
4341             }
4342         }
4343
4344         run=0;
4345         rle_index=0;
4346         run2= run_tab[rle_index++];
4347         prev_level=0;
4348         prev_run=0;
4349
4350         for(i=start_i; i<64; i++){
4351             int j= perm_scantable[i];
4352             const int level= block[j];
4353             int change, old_coeff;
4354
4355             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4356                 break;
4357
4358             if(level){
4359                 if(level<0) old_coeff= qmul*level - qadd;
4360                 else        old_coeff= qmul*level + qadd;
4361                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4362             }else{
4363                 old_coeff=0;
4364                 run2--;
4365                 av_assert2(run2>=0 || i >= last_non_zero );
4366             }
4367
4368             for(change=-1; change<=1; change+=2){
4369                 int new_level= level + change;
4370                 int score, new_coeff, unquant_change;
4371
4372                 score=0;
4373                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4374                    continue;
4375
4376                 if(new_level){
4377                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4378                     else            new_coeff= qmul*new_level + qadd;
4379                     if(new_coeff >= 2048 || new_coeff <= -2048)
4380                         continue;
4381                     //FIXME check for overflow
4382
4383                     if(level){
4384                         if(level < 63 && level > -63){
4385                             if(i < last_non_zero)
4386                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4387                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4388                             else
4389                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4390                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4391                         }
4392                     }else{
4393                         av_assert2(FFABS(new_level)==1);
4394
4395                         if(analyze_gradient){
4396                             int g= d1[ scantable[i] ];
4397                             if(g && (g^new_level) >= 0)
4398                                 continue;
4399                         }
4400
4401                         if(i < last_non_zero){
4402                             int next_i= i + run2 + 1;
4403                             int next_level= block[ perm_scantable[next_i] ] + 64;
4404
4405                             if(next_level&(~127))
4406                                 next_level= 0;
4407
4408                             if(next_i < last_non_zero)
4409                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4410                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4411                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4412                             else
4413                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4414                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4415                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4416                         }else{
4417                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4418                             if(prev_level){
4419                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4420                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4421                             }
4422                         }
4423                     }
4424                 }else{
4425                     new_coeff=0;
4426                     av_assert2(FFABS(level)==1);
4427
4428                     if(i < last_non_zero){
4429                         int next_i= i + run2 + 1;
4430                         int next_level= block[ perm_scantable[next_i] ] + 64;
4431
4432                         if(next_level&(~127))
4433                             next_level= 0;
4434
4435                         if(next_i < last_non_zero)
4436                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4437                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4438                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4439                         else
4440                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4441                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4442                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4443                     }else{
4444                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4445                         if(prev_level){
4446                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4447                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4448                         }
4449                     }
4450                 }
4451
4452                 score *= lambda;
4453
4454                 unquant_change= new_coeff - old_coeff;
4455                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4456
4457                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4458                                                    unquant_change);
4459                 if(score<best_score){
4460                     best_score= score;
4461                     best_coeff= i;
4462                     best_change= change;
4463                     best_unquant_change= unquant_change;
4464                 }
4465             }
4466             if(level){
4467                 prev_level= level + 64;
4468                 if(prev_level&(~127))
4469                     prev_level= 0;
4470                 prev_run= run;
4471                 run=0;
4472             }else{
4473                 run++;
4474             }
4475         }
4476 #ifdef REFINE_STATS
4477 STOP_TIMER("iterative step")}
4478 #endif
4479
4480         if(best_change){
4481             int j= perm_scantable[ best_coeff ];
4482
4483             block[j] += best_change;
4484
4485             if(best_coeff > last_non_zero){
4486                 last_non_zero= best_coeff;
4487                 av_assert2(block[j]);
4488 #ifdef REFINE_STATS
4489 after_last++;
4490 #endif
4491             }else{
4492 #ifdef REFINE_STATS
4493 if(block[j]){
4494     if(block[j] - best_change){
4495         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4496             raise++;
4497         }else{
4498             lower++;
4499         }
4500     }else{
4501         from_zero++;
4502     }
4503 }else{
4504     to_zero++;
4505 }
4506 #endif
4507                 for(; last_non_zero>=start_i; last_non_zero--){
4508                     if(block[perm_scantable[last_non_zero]])
4509                         break;
4510                 }
4511             }
4512 #ifdef REFINE_STATS
4513 count++;
4514 if(256*256*256*64 % count == 0){
4515     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4516 }
4517 #endif
4518             run=0;
4519             rle_index=0;
4520             for(i=start_i; i<=last_non_zero; i++){
4521                 int j= perm_scantable[i];
4522                 const int level= block[j];
4523
4524                  if(level){
4525                      run_tab[rle_index++]=run;
4526                      run=0;
4527                  }else{
4528                      run++;
4529                  }
4530             }
4531
4532             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4533         }else{
4534             break;
4535         }
4536     }
4537 #ifdef REFINE_STATS
4538 if(last_non_zero>0){
4539 STOP_TIMER("iterative search")
4540 }
4541 }
4542 #endif
4543
4544     return last_non_zero;
4545 }
4546
4547 /**
4548  * Permute an 8x8 block according to permuatation.
4549  * @param block the block which will be permuted according to
4550  *              the given permutation vector
4551  * @param permutation the permutation vector
4552  * @param last the last non zero coefficient in scantable order, used to
4553  *             speed the permutation up
4554  * @param scantable the used scantable, this is only used to speed the
4555  *                  permutation up, the block is not (inverse) permutated
4556  *                  to scantable order!
4557  */
4558 static void block_permute(int16_t *block, uint8_t *permutation,
4559                           const uint8_t *scantable, int last)
4560 {
4561     int i;
4562     int16_t temp[64];
4563
4564     if (last <= 0)
4565         return;
4566     //FIXME it is ok but not clean and might fail for some permutations
4567     // if (permutation[1] == 1)
4568     // return;
4569
4570     for (i = 0; i <= last; i++) {
4571         const int j = scantable[i];
4572         temp[j] = block[j];
4573         block[j] = 0;
4574     }
4575
4576     for (i = 0; i <= last; i++) {
4577         const int j = scantable[i];
4578         const int perm_j = permutation[j];
4579         block[perm_j] = temp[j];
4580     }
4581 }
4582
4583 int ff_dct_quantize_c(MpegEncContext *s,
4584                         int16_t *block, int n,
4585                         int qscale, int *overflow)
4586 {
4587     int i, j, level, last_non_zero, q, start_i;
4588     const int *qmat;
4589     const uint8_t *scantable= s->intra_scantable.scantable;
4590     int bias;
4591     int max=0;
4592     unsigned int threshold1, threshold2;
4593
4594     s->fdsp.fdct(block);
4595
4596     if(s->dct_error_sum)
4597         s->denoise_dct(s, block);
4598
4599     if (s->mb_intra) {
4600         if (!s->h263_aic) {
4601             if (n < 4)
4602                 q = s->y_dc_scale;
4603             else
4604                 q = s->c_dc_scale;
4605             q = q << 3;
4606         } else
4607             /* For AIC we skip quant/dequant of INTRADC */
4608             q = 1 << 3;
4609
4610         /* note: block[0] is assumed to be positive */
4611         block[0] = (block[0] + (q >> 1)) / q;
4612         start_i = 1;
4613         last_non_zero = 0;
4614         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4615         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4616     } else {
4617         start_i = 0;
4618         last_non_zero = -1;
4619         qmat = s->q_inter_matrix[qscale];
4620         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4621     }
4622     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4623     threshold2= (threshold1<<1);
4624     for(i=63;i>=start_i;i--) {
4625         j = scantable[i];
4626         level = block[j] * qmat[j];
4627
4628         if(((unsigned)(level+threshold1))>threshold2){
4629             last_non_zero = i;
4630             break;
4631         }else{
4632             block[j]=0;
4633         }
4634     }
4635     for(i=start_i; i<=last_non_zero; i++) {
4636         j = scantable[i];
4637         level = block[j] * qmat[j];
4638
4639 //        if(   bias+level >= (1<<QMAT_SHIFT)
4640 //           || bias-level >= (1<<QMAT_SHIFT)){
4641         if(((unsigned)(level+threshold1))>threshold2){
4642             if(level>0){
4643                 level= (bias + level)>>QMAT_SHIFT;
4644                 block[j]= level;
4645             }else{
4646                 level= (bias - level)>>QMAT_SHIFT;
4647                 block[j]= -level;
4648             }
4649             max |=level;
4650         }else{
4651             block[j]=0;
4652         }
4653     }
4654     *overflow= s->max_qcoeff < max; //overflow might have happened
4655
4656     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4657     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4658         block_permute(block, s->idsp.idct_permutation,
4659                       scantable, last_non_zero);
4660
4661     return last_non_zero;
4662 }
4663
4664 #define OFFSET(x) offsetof(MpegEncContext, x)
4665 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4666 static const AVOption h263_options[] = {
4667     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4668     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4669     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4670     FF_MPV_COMMON_OPTS
4671     { NULL },
4672 };
4673
4674 static const AVClass h263_class = {
4675     .class_name = "H.263 encoder",
4676     .item_name  = av_default_item_name,
4677     .option     = h263_options,
4678     .version    = LIBAVUTIL_VERSION_INT,
4679 };
4680
4681 AVCodec ff_h263_encoder = {
4682     .name           = "h263",
4683     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4684     .type           = AVMEDIA_TYPE_VIDEO,
4685     .id             = AV_CODEC_ID_H263,
4686     .priv_data_size = sizeof(MpegEncContext),
4687     .init           = ff_mpv_encode_init,
4688     .encode2        = ff_mpv_encode_picture,
4689     .close          = ff_mpv_encode_end,
4690     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4691     .priv_class     = &h263_class,
4692 };
4693
4694 static const AVOption h263p_options[] = {
4695     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4696     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4697     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4698     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4699     FF_MPV_COMMON_OPTS
4700     { NULL },
4701 };
4702 static const AVClass h263p_class = {
4703     .class_name = "H.263p encoder",
4704     .item_name  = av_default_item_name,
4705     .option     = h263p_options,
4706     .version    = LIBAVUTIL_VERSION_INT,
4707 };
4708
4709 AVCodec ff_h263p_encoder = {
4710     .name           = "h263p",
4711     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4712     .type           = AVMEDIA_TYPE_VIDEO,
4713     .id             = AV_CODEC_ID_H263P,
4714     .priv_data_size = sizeof(MpegEncContext),
4715     .init           = ff_mpv_encode_init,
4716     .encode2        = ff_mpv_encode_picture,
4717     .close          = ff_mpv_encode_end,
4718     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4719     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4720     .priv_class     = &h263p_class,
4721 };
4722
4723 static const AVClass msmpeg4v2_class = {
4724     .class_name = "msmpeg4v2 encoder",
4725     .item_name  = av_default_item_name,
4726     .option     = ff_mpv_generic_options,
4727     .version    = LIBAVUTIL_VERSION_INT,
4728 };
4729
4730 AVCodec ff_msmpeg4v2_encoder = {
4731     .name           = "msmpeg4v2",
4732     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4733     .type           = AVMEDIA_TYPE_VIDEO,
4734     .id             = AV_CODEC_ID_MSMPEG4V2,
4735     .priv_data_size = sizeof(MpegEncContext),
4736     .init           = ff_mpv_encode_init,
4737     .encode2        = ff_mpv_encode_picture,
4738     .close          = ff_mpv_encode_end,
4739     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4740     .priv_class     = &msmpeg4v2_class,
4741 };
4742
4743 static const AVClass msmpeg4v3_class = {
4744     .class_name = "msmpeg4v3 encoder",
4745     .item_name  = av_default_item_name,
4746     .option     = ff_mpv_generic_options,
4747     .version    = LIBAVUTIL_VERSION_INT,
4748 };
4749
4750 AVCodec ff_msmpeg4v3_encoder = {
4751     .name           = "msmpeg4",
4752     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4753     .type           = AVMEDIA_TYPE_VIDEO,
4754     .id             = AV_CODEC_ID_MSMPEG4V3,
4755     .priv_data_size = sizeof(MpegEncContext),
4756     .init           = ff_mpv_encode_init,
4757     .encode2        = ff_mpv_encode_picture,
4758     .close          = ff_mpv_encode_end,
4759     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4760     .priv_class     = &msmpeg4v3_class,
4761 };
4762
4763 static const AVClass wmv1_class = {
4764     .class_name = "wmv1 encoder",
4765     .item_name  = av_default_item_name,
4766     .option     = ff_mpv_generic_options,
4767     .version    = LIBAVUTIL_VERSION_INT,
4768 };
4769
4770 AVCodec ff_wmv1_encoder = {
4771     .name           = "wmv1",
4772     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4773     .type           = AVMEDIA_TYPE_VIDEO,
4774     .id             = AV_CODEC_ID_WMV1,
4775     .priv_data_size = sizeof(MpegEncContext),
4776     .init           = ff_mpv_encode_init,
4777     .encode2        = ff_mpv_encode_picture,
4778     .close          = ff_mpv_encode_end,
4779     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4780     .priv_class     = &wmv1_class,
4781 };