]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit '40cf1bbacc6220a0aa6bed5c331871d43f9ce370'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "mpegvideodata.h"
44 #include "h261.h"
45 #include "h263.h"
46 #include "h263data.h"
47 #include "mjpegenc_common.h"
48 #include "mathops.h"
49 #include "mpegutils.h"
50 #include "mjpegenc.h"
51 #include "msmpeg4.h"
52 #include "pixblockdsp.h"
53 #include "qpeldsp.h"
54 #include "faandct.h"
55 #include "thread.h"
56 #include "aandcttab.h"
57 #include "flv.h"
58 #include "mpeg4video.h"
59 #include "internal.h"
60 #include "bytestream.h"
61 #include "wmv2.h"
62 #include "rv10.h"
63 #include <limits.h>
64 #include "sp5x.h"
65
66 #define QUANT_BIAS_SHIFT 8
67
68 #define QMAT_SHIFT_MMX 16
69 #define QMAT_SHIFT 21
70
71 static int encode_picture(MpegEncContext *s, int picture_number);
72 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
73 static int sse_mb(MpegEncContext *s);
74 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
75 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
76
77 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
78 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
79
80 const AVOption ff_mpv_generic_options[] = {
81     FF_MPV_COMMON_OPTS
82     { NULL },
83 };
84
85 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
86                        uint16_t (*qmat16)[2][64],
87                        const uint16_t *quant_matrix,
88                        int bias, int qmin, int qmax, int intra)
89 {
90     FDCTDSPContext *fdsp = &s->fdsp;
91     int qscale;
92     int shift = 0;
93
94     for (qscale = qmin; qscale <= qmax; qscale++) {
95         int i;
96         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
97 #if CONFIG_FAANDCT
98             fdsp->fdct == ff_faandct            ||
99 #endif /* CONFIG_FAANDCT */
100             fdsp->fdct == ff_jpeg_fdct_islow_10) {
101             for (i = 0; i < 64; i++) {
102                 const int j = s->idsp.idct_permutation[i];
103                 int64_t den = (int64_t) qscale * quant_matrix[j];
104                 /* 16 <= qscale * quant_matrix[i] <= 7905
105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
106                  *             19952 <=              x  <= 249205026
107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
108                  *           3444240 >= (1 << 36) / (x) >= 275 */
109
110                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
111             }
112         } else if (fdsp->fdct == ff_fdct_ifast) {
113             for (i = 0; i < 64; i++) {
114                 const int j = s->idsp.idct_permutation[i];
115                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
116                 /* 16 <= qscale * quant_matrix[i] <= 7905
117                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
118                  *             19952 <=              x  <= 249205026
119                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
120                  *           3444240 >= (1 << 36) / (x) >= 275 */
121
122                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
123             }
124         } else {
125             for (i = 0; i < 64; i++) {
126                 const int j = s->idsp.idct_permutation[i];
127                 int64_t den = (int64_t) qscale * quant_matrix[j];
128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
129                  * Assume x = qscale * quant_matrix[i]
130                  * So             16 <=              x  <= 7905
131                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
132                  * so          32768 >= (1 << 19) / (x) >= 67 */
133                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
134                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
135                 //                    (qscale * quant_matrix[i]);
136                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
137
138                 if (qmat16[qscale][0][i] == 0 ||
139                     qmat16[qscale][0][i] == 128 * 256)
140                     qmat16[qscale][0][i] = 128 * 256 - 1;
141                 qmat16[qscale][1][i] =
142                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
143                                 qmat16[qscale][0][i]);
144             }
145         }
146
147         for (i = intra; i < 64; i++) {
148             int64_t max = 8191;
149             if (fdsp->fdct == ff_fdct_ifast) {
150                 max = (8191LL * ff_aanscales[i]) >> 14;
151             }
152             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
153                 shift++;
154             }
155         }
156     }
157     if (shift) {
158         av_log(NULL, AV_LOG_INFO,
159                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
160                QMAT_SHIFT - shift);
161     }
162 }
163
164 static inline void update_qscale(MpegEncContext *s)
165 {
166     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
167                 (FF_LAMBDA_SHIFT + 7);
168     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
169
170     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
171                  FF_LAMBDA_SHIFT;
172 }
173
174 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
175 {
176     int i;
177
178     if (matrix) {
179         put_bits(pb, 1, 1);
180         for (i = 0; i < 64; i++) {
181             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
182         }
183     } else
184         put_bits(pb, 1, 0);
185 }
186
187 /**
188  * init s->current_picture.qscale_table from s->lambda_table
189  */
190 void ff_init_qscale_tab(MpegEncContext *s)
191 {
192     int8_t * const qscale_table = s->current_picture.qscale_table;
193     int i;
194
195     for (i = 0; i < s->mb_num; i++) {
196         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
197         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
198         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
199                                                   s->avctx->qmax);
200     }
201 }
202
203 static void update_duplicate_context_after_me(MpegEncContext *dst,
204                                               MpegEncContext *src)
205 {
206 #define COPY(a) dst->a= src->a
207     COPY(pict_type);
208     COPY(current_picture);
209     COPY(f_code);
210     COPY(b_code);
211     COPY(qscale);
212     COPY(lambda);
213     COPY(lambda2);
214     COPY(picture_in_gop_number);
215     COPY(gop_picture_number);
216     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
217     COPY(progressive_frame);    // FIXME don't set in encode_header
218     COPY(partitioned_frame);    // FIXME don't set in encode_header
219 #undef COPY
220 }
221
222 /**
223  * Set the given MpegEncContext to defaults for encoding.
224  * the changed fields will not depend upon the prior state of the MpegEncContext.
225  */
226 static void mpv_encode_defaults(MpegEncContext *s)
227 {
228     int i;
229     ff_mpv_common_defaults(s);
230
231     for (i = -16; i < 16; i++) {
232         default_fcode_tab[i + MAX_MV] = 1;
233     }
234     s->me.mv_penalty = default_mv_penalty;
235     s->fcode_tab     = default_fcode_tab;
236
237     s->input_picture_number  = 0;
238     s->picture_in_gop_number = 0;
239 }
240
241 av_cold int ff_dct_encode_init(MpegEncContext *s) {
242     if (ARCH_X86)
243         ff_dct_encode_init_x86(s);
244
245     if (CONFIG_H263_ENCODER)
246         ff_h263dsp_init(&s->h263dsp);
247     if (!s->dct_quantize)
248         s->dct_quantize = ff_dct_quantize_c;
249     if (!s->denoise_dct)
250         s->denoise_dct  = denoise_dct_c;
251     s->fast_dct_quantize = s->dct_quantize;
252     if (s->avctx->trellis)
253         s->dct_quantize  = dct_quantize_trellis_c;
254
255     return 0;
256 }
257
258 /* init video encoder */
259 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
260 {
261     MpegEncContext *s = avctx->priv_data;
262     int i, ret, format_supported;
263
264     mpv_encode_defaults(s);
265
266     switch (avctx->codec_id) {
267     case AV_CODEC_ID_MPEG2VIDEO:
268         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
269             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
270             av_log(avctx, AV_LOG_ERROR,
271                    "only YUV420 and YUV422 are supported\n");
272             return -1;
273         }
274         break;
275     case AV_CODEC_ID_MJPEG:
276     case AV_CODEC_ID_AMV:
277         format_supported = 0;
278         /* JPEG color space */
279         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
280             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
281             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
282             (avctx->color_range == AVCOL_RANGE_JPEG &&
283              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
284               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
285               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
286             format_supported = 1;
287         /* MPEG color space */
288         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
289                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
290                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
291                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
292             format_supported = 1;
293
294         if (!format_supported) {
295             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
296             return -1;
297         }
298         break;
299     default:
300         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
301             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
302             return -1;
303         }
304     }
305
306     switch (avctx->pix_fmt) {
307     case AV_PIX_FMT_YUVJ444P:
308     case AV_PIX_FMT_YUV444P:
309         s->chroma_format = CHROMA_444;
310         break;
311     case AV_PIX_FMT_YUVJ422P:
312     case AV_PIX_FMT_YUV422P:
313         s->chroma_format = CHROMA_422;
314         break;
315     case AV_PIX_FMT_YUVJ420P:
316     case AV_PIX_FMT_YUV420P:
317     default:
318         s->chroma_format = CHROMA_420;
319         break;
320     }
321
322     s->bit_rate = avctx->bit_rate;
323     s->width    = avctx->width;
324     s->height   = avctx->height;
325     if (avctx->gop_size > 600 &&
326         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
327         av_log(avctx, AV_LOG_WARNING,
328                "keyframe interval too large!, reducing it from %d to %d\n",
329                avctx->gop_size, 600);
330         avctx->gop_size = 600;
331     }
332     s->gop_size     = avctx->gop_size;
333     s->avctx        = avctx;
334     if (avctx->max_b_frames > MAX_B_FRAMES) {
335         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
336                "is %d.\n", MAX_B_FRAMES);
337         avctx->max_b_frames = MAX_B_FRAMES;
338     }
339     s->max_b_frames = avctx->max_b_frames;
340     s->codec_id     = avctx->codec->id;
341     s->strict_std_compliance = avctx->strict_std_compliance;
342     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
343     s->mpeg_quant         = avctx->mpeg_quant;
344     s->rtp_mode           = !!avctx->rtp_payload_size;
345     s->intra_dc_precision = avctx->intra_dc_precision;
346
347     // workaround some differences between how applications specify dc precision
348     if (s->intra_dc_precision < 0) {
349         s->intra_dc_precision += 8;
350     } else if (s->intra_dc_precision >= 8)
351         s->intra_dc_precision -= 8;
352
353     if (s->intra_dc_precision < 0) {
354         av_log(avctx, AV_LOG_ERROR,
355                 "intra dc precision must be positive, note some applications use"
356                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
357         return AVERROR(EINVAL);
358     }
359
360     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
361         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
362         return AVERROR(EINVAL);
363     }
364     s->user_specified_pts = AV_NOPTS_VALUE;
365
366     if (s->gop_size <= 1) {
367         s->intra_only = 1;
368         s->gop_size   = 12;
369     } else {
370         s->intra_only = 0;
371     }
372
373     s->me_method = avctx->me_method;
374
375     /* Fixed QSCALE */
376     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
377
378 #if FF_API_MPV_OPT
379     FF_DISABLE_DEPRECATION_WARNINGS
380     if (avctx->border_masking != 0.0)
381         s->border_masking = avctx->border_masking;
382     FF_ENABLE_DEPRECATION_WARNINGS
383 #endif
384
385     s->adaptive_quant = (s->avctx->lumi_masking ||
386                          s->avctx->dark_masking ||
387                          s->avctx->temporal_cplx_masking ||
388                          s->avctx->spatial_cplx_masking  ||
389                          s->avctx->p_masking      ||
390                          s->border_masking ||
391                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
392                         !s->fixed_qscale;
393
394     s->loop_filter = !!(s->avctx->flags & CODEC_FLAG_LOOP_FILTER);
395
396     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
397         switch(avctx->codec_id) {
398         case AV_CODEC_ID_MPEG1VIDEO:
399         case AV_CODEC_ID_MPEG2VIDEO:
400             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
401             break;
402         case AV_CODEC_ID_MPEG4:
403         case AV_CODEC_ID_MSMPEG4V1:
404         case AV_CODEC_ID_MSMPEG4V2:
405         case AV_CODEC_ID_MSMPEG4V3:
406             if       (avctx->rc_max_rate >= 15000000) {
407                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
408             } else if(avctx->rc_max_rate >=  2000000) {
409                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
410             } else if(avctx->rc_max_rate >=   384000) {
411                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
412             } else
413                 avctx->rc_buffer_size = 40;
414             avctx->rc_buffer_size *= 16384;
415             break;
416         }
417         if (avctx->rc_buffer_size) {
418             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
419         }
420     }
421
422     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
423         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
424         return -1;
425     }
426
427     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
428         av_log(avctx, AV_LOG_INFO,
429                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
430     }
431
432     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
433         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
434         return -1;
435     }
436
437     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
438         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
439         return -1;
440     }
441
442     if (avctx->rc_max_rate &&
443         avctx->rc_max_rate == avctx->bit_rate &&
444         avctx->rc_max_rate != avctx->rc_min_rate) {
445         av_log(avctx, AV_LOG_INFO,
446                "impossible bitrate constraints, this will fail\n");
447     }
448
449     if (avctx->rc_buffer_size &&
450         avctx->bit_rate * (int64_t)avctx->time_base.num >
451             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
452         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
453         return -1;
454     }
455
456     if (!s->fixed_qscale &&
457         avctx->bit_rate * av_q2d(avctx->time_base) >
458             avctx->bit_rate_tolerance) {
459         av_log(avctx, AV_LOG_WARNING,
460                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
461         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
462     }
463
464     if (s->avctx->rc_max_rate &&
465         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
466         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
467          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
468         90000LL * (avctx->rc_buffer_size - 1) >
469             s->avctx->rc_max_rate * 0xFFFFLL) {
470         av_log(avctx, AV_LOG_INFO,
471                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
472                "specified vbv buffer is too large for the given bitrate!\n");
473     }
474
475     if ((s->avctx->flags & CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
476         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
477         s->codec_id != AV_CODEC_ID_FLV1) {
478         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
479         return -1;
480     }
481
482     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
483         av_log(avctx, AV_LOG_ERROR,
484                "OBMC is only supported with simple mb decision\n");
485         return -1;
486     }
487
488     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
489         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
490         return -1;
491     }
492
493     if (s->max_b_frames                    &&
494         s->codec_id != AV_CODEC_ID_MPEG4      &&
495         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
496         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
497         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
498         return -1;
499     }
500     if (s->max_b_frames < 0) {
501         av_log(avctx, AV_LOG_ERROR,
502                "max b frames must be 0 or positive for mpegvideo based encoders\n");
503         return -1;
504     }
505
506     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
507          s->codec_id == AV_CODEC_ID_H263  ||
508          s->codec_id == AV_CODEC_ID_H263P) &&
509         (avctx->sample_aspect_ratio.num > 255 ||
510          avctx->sample_aspect_ratio.den > 255)) {
511         av_log(avctx, AV_LOG_WARNING,
512                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
513                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
514         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
515                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
516     }
517
518     if ((s->codec_id == AV_CODEC_ID_H263  ||
519          s->codec_id == AV_CODEC_ID_H263P) &&
520         (avctx->width  > 2048 ||
521          avctx->height > 1152 )) {
522         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
523         return -1;
524     }
525     if ((s->codec_id == AV_CODEC_ID_H263  ||
526          s->codec_id == AV_CODEC_ID_H263P) &&
527         ((avctx->width &3) ||
528          (avctx->height&3) )) {
529         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
530         return -1;
531     }
532
533     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
534         (avctx->width  > 4095 ||
535          avctx->height > 4095 )) {
536         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
537         return -1;
538     }
539
540     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
541         (avctx->width  > 16383 ||
542          avctx->height > 16383 )) {
543         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
544         return -1;
545     }
546
547     if (s->codec_id == AV_CODEC_ID_RV10 &&
548         (avctx->width &15 ||
549          avctx->height&15 )) {
550         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
551         return AVERROR(EINVAL);
552     }
553
554     if (s->codec_id == AV_CODEC_ID_RV20 &&
555         (avctx->width &3 ||
556          avctx->height&3 )) {
557         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
558         return AVERROR(EINVAL);
559     }
560
561     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
562          s->codec_id == AV_CODEC_ID_WMV2) &&
563          avctx->width & 1) {
564          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
565          return -1;
566     }
567
568     if ((s->avctx->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
569         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
570         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
571         return -1;
572     }
573
574     // FIXME mpeg2 uses that too
575     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
576                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
577         av_log(avctx, AV_LOG_ERROR,
578                "mpeg2 style quantization not supported by codec\n");
579         return -1;
580     }
581
582     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
583         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
584         return -1;
585     }
586
587     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
588         s->avctx->mb_decision != FF_MB_DECISION_RD) {
589         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
590         return -1;
591     }
592
593     if (s->avctx->scenechange_threshold < 1000000000 &&
594         (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)) {
595         av_log(avctx, AV_LOG_ERROR,
596                "closed gop with scene change detection are not supported yet, "
597                "set threshold to 1000000000\n");
598         return -1;
599     }
600
601     if (s->avctx->flags & CODEC_FLAG_LOW_DELAY) {
602         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
603             av_log(avctx, AV_LOG_ERROR,
604                   "low delay forcing is only available for mpeg2\n");
605             return -1;
606         }
607         if (s->max_b_frames != 0) {
608             av_log(avctx, AV_LOG_ERROR,
609                    "b frames cannot be used with low delay\n");
610             return -1;
611         }
612     }
613
614     if (s->q_scale_type == 1) {
615         if (avctx->qmax > 12) {
616             av_log(avctx, AV_LOG_ERROR,
617                    "non linear quant only supports qmax <= 12 currently\n");
618             return -1;
619         }
620     }
621
622     if (s->avctx->thread_count > 1         &&
623         s->codec_id != AV_CODEC_ID_MPEG4      &&
624         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
625         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
626         s->codec_id != AV_CODEC_ID_MJPEG      &&
627         (s->codec_id != AV_CODEC_ID_H263P)) {
628         av_log(avctx, AV_LOG_ERROR,
629                "multi threaded encoding not supported by codec\n");
630         return -1;
631     }
632
633     if (s->avctx->thread_count < 1) {
634         av_log(avctx, AV_LOG_ERROR,
635                "automatic thread number detection not supported by codec, "
636                "patch welcome\n");
637         return -1;
638     }
639
640     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
641         s->rtp_mode = 1;
642
643     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
644         s->h263_slice_structured = 1;
645
646     if (!avctx->time_base.den || !avctx->time_base.num) {
647         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
648         return -1;
649     }
650
651     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
652         av_log(avctx, AV_LOG_INFO,
653                "notice: b_frame_strategy only affects the first pass\n");
654         avctx->b_frame_strategy = 0;
655     }
656
657     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
658     if (i > 1) {
659         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
660         avctx->time_base.den /= i;
661         avctx->time_base.num /= i;
662         //return -1;
663     }
664
665     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
666         // (a + x * 3 / 8) / x
667         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
668         s->inter_quant_bias = 0;
669     } else {
670         s->intra_quant_bias = 0;
671         // (a - x / 4) / x
672         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
673     }
674
675     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
676         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
677         return AVERROR(EINVAL);
678     }
679
680 #if FF_API_QUANT_BIAS
681 FF_DISABLE_DEPRECATION_WARNINGS
682     if (s->intra_quant_bias == FF_DEFAULT_QUANT_BIAS &&
683         avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
684         s->intra_quant_bias = avctx->intra_quant_bias;
685     if (s->inter_quant_bias == FF_DEFAULT_QUANT_BIAS &&
686         avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
687         s->inter_quant_bias = avctx->inter_quant_bias;
688 FF_ENABLE_DEPRECATION_WARNINGS
689 #endif
690
691     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
692
693     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
694         s->avctx->time_base.den > (1 << 16) - 1) {
695         av_log(avctx, AV_LOG_ERROR,
696                "timebase %d/%d not supported by MPEG 4 standard, "
697                "the maximum admitted value for the timebase denominator "
698                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
699                (1 << 16) - 1);
700         return -1;
701     }
702     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
703
704     switch (avctx->codec->id) {
705     case AV_CODEC_ID_MPEG1VIDEO:
706         s->out_format = FMT_MPEG1;
707         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
708         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
709         break;
710     case AV_CODEC_ID_MPEG2VIDEO:
711         s->out_format = FMT_MPEG1;
712         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
713         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
714         s->rtp_mode   = 1;
715         break;
716     case AV_CODEC_ID_MJPEG:
717     case AV_CODEC_ID_AMV:
718         s->out_format = FMT_MJPEG;
719         s->intra_only = 1; /* force intra only for jpeg */
720         if (!CONFIG_MJPEG_ENCODER ||
721             ff_mjpeg_encode_init(s) < 0)
722             return -1;
723         avctx->delay = 0;
724         s->low_delay = 1;
725         break;
726     case AV_CODEC_ID_H261:
727         if (!CONFIG_H261_ENCODER)
728             return -1;
729         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
730             av_log(avctx, AV_LOG_ERROR,
731                    "The specified picture size of %dx%d is not valid for the "
732                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
733                     s->width, s->height);
734             return -1;
735         }
736         s->out_format = FMT_H261;
737         avctx->delay  = 0;
738         s->low_delay  = 1;
739         s->rtp_mode   = 0; /* Sliced encoding not supported */
740         break;
741     case AV_CODEC_ID_H263:
742         if (!CONFIG_H263_ENCODER)
743             return -1;
744         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
745                              s->width, s->height) == 8) {
746             av_log(avctx, AV_LOG_ERROR,
747                    "The specified picture size of %dx%d is not valid for "
748                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
749                    "352x288, 704x576, and 1408x1152. "
750                    "Try H.263+.\n", s->width, s->height);
751             return -1;
752         }
753         s->out_format = FMT_H263;
754         avctx->delay  = 0;
755         s->low_delay  = 1;
756         break;
757     case AV_CODEC_ID_H263P:
758         s->out_format = FMT_H263;
759         s->h263_plus  = 1;
760         /* Fx */
761         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
762         s->modified_quant  = s->h263_aic;
763         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
764         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
765
766         /* /Fx */
767         /* These are just to be sure */
768         avctx->delay = 0;
769         s->low_delay = 1;
770         break;
771     case AV_CODEC_ID_FLV1:
772         s->out_format      = FMT_H263;
773         s->h263_flv        = 2; /* format = 1; 11-bit codes */
774         s->unrestricted_mv = 1;
775         s->rtp_mode  = 0; /* don't allow GOB */
776         avctx->delay = 0;
777         s->low_delay = 1;
778         break;
779     case AV_CODEC_ID_RV10:
780         s->out_format = FMT_H263;
781         avctx->delay  = 0;
782         s->low_delay  = 1;
783         break;
784     case AV_CODEC_ID_RV20:
785         s->out_format      = FMT_H263;
786         avctx->delay       = 0;
787         s->low_delay       = 1;
788         s->modified_quant  = 1;
789         s->h263_aic        = 1;
790         s->h263_plus       = 1;
791         s->loop_filter     = 1;
792         s->unrestricted_mv = 0;
793         break;
794     case AV_CODEC_ID_MPEG4:
795         s->out_format      = FMT_H263;
796         s->h263_pred       = 1;
797         s->unrestricted_mv = 1;
798         s->low_delay       = s->max_b_frames ? 0 : 1;
799         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
800         break;
801     case AV_CODEC_ID_MSMPEG4V2:
802         s->out_format      = FMT_H263;
803         s->h263_pred       = 1;
804         s->unrestricted_mv = 1;
805         s->msmpeg4_version = 2;
806         avctx->delay       = 0;
807         s->low_delay       = 1;
808         break;
809     case AV_CODEC_ID_MSMPEG4V3:
810         s->out_format        = FMT_H263;
811         s->h263_pred         = 1;
812         s->unrestricted_mv   = 1;
813         s->msmpeg4_version   = 3;
814         s->flipflop_rounding = 1;
815         avctx->delay         = 0;
816         s->low_delay         = 1;
817         break;
818     case AV_CODEC_ID_WMV1:
819         s->out_format        = FMT_H263;
820         s->h263_pred         = 1;
821         s->unrestricted_mv   = 1;
822         s->msmpeg4_version   = 4;
823         s->flipflop_rounding = 1;
824         avctx->delay         = 0;
825         s->low_delay         = 1;
826         break;
827     case AV_CODEC_ID_WMV2:
828         s->out_format        = FMT_H263;
829         s->h263_pred         = 1;
830         s->unrestricted_mv   = 1;
831         s->msmpeg4_version   = 5;
832         s->flipflop_rounding = 1;
833         avctx->delay         = 0;
834         s->low_delay         = 1;
835         break;
836     default:
837         return -1;
838     }
839
840     avctx->has_b_frames = !s->low_delay;
841
842     s->encoding = 1;
843
844     s->progressive_frame    =
845     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
846                                                 CODEC_FLAG_INTERLACED_ME) ||
847                                 s->alternate_scan);
848
849     /* init */
850     ff_mpv_idct_init(s);
851     if (ff_mpv_common_init(s) < 0)
852         return -1;
853
854     ff_fdctdsp_init(&s->fdsp, avctx);
855     ff_me_cmp_init(&s->mecc, avctx);
856     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
857     ff_pixblockdsp_init(&s->pdsp, avctx);
858     ff_qpeldsp_init(&s->qdsp);
859
860     if (s->msmpeg4_version) {
861         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
862                           2 * 2 * (MAX_LEVEL + 1) *
863                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
864     }
865     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
866
867     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
868     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
869     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
870     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
871     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
872     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
873     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
874                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
875     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
876                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
877
878     if (s->avctx->noise_reduction) {
879         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
880                           2 * 64 * sizeof(uint16_t), fail);
881     }
882
883     ff_dct_encode_init(s);
884
885     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
886         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
887
888     s->quant_precision = 5;
889
890     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
891     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
892
893     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
894         ff_h261_encode_init(s);
895     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
896         ff_h263_encode_init(s);
897     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
898         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
899             return ret;
900     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
901         && s->out_format == FMT_MPEG1)
902         ff_mpeg1_encode_init(s);
903
904     /* init q matrix */
905     for (i = 0; i < 64; i++) {
906         int j = s->idsp.idct_permutation[i];
907         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
908             s->mpeg_quant) {
909             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
910             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
911         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
912             s->intra_matrix[j] =
913             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
914         } else {
915             /* mpeg1/2 */
916             s->chroma_intra_matrix[j] =
917             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
918             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
919         }
920         if (s->avctx->intra_matrix)
921             s->intra_matrix[j] = s->avctx->intra_matrix[i];
922         if (s->avctx->inter_matrix)
923             s->inter_matrix[j] = s->avctx->inter_matrix[i];
924     }
925
926     /* precompute matrix */
927     /* for mjpeg, we do include qscale in the matrix */
928     if (s->out_format != FMT_MJPEG) {
929         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
930                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
931                           31, 1);
932         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
933                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
934                           31, 0);
935     }
936
937     if (ff_rate_control_init(s) < 0)
938         return -1;
939
940 #if FF_API_ERROR_RATE
941     FF_DISABLE_DEPRECATION_WARNINGS
942     if (avctx->error_rate)
943         s->error_rate = avctx->error_rate;
944     FF_ENABLE_DEPRECATION_WARNINGS;
945 #endif
946
947 #if FF_API_NORMALIZE_AQP
948     FF_DISABLE_DEPRECATION_WARNINGS
949     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
950         s->mpv_flags |= FF_MPV_FLAG_NAQ;
951     FF_ENABLE_DEPRECATION_WARNINGS;
952 #endif
953
954 #if FF_API_MV0
955     FF_DISABLE_DEPRECATION_WARNINGS
956     if (avctx->flags & CODEC_FLAG_MV0)
957         s->mpv_flags |= FF_MPV_FLAG_MV0;
958     FF_ENABLE_DEPRECATION_WARNINGS
959 #endif
960
961 #if FF_API_MPV_OPT
962     FF_DISABLE_DEPRECATION_WARNINGS
963     if (avctx->rc_qsquish != 0.0)
964         s->rc_qsquish = avctx->rc_qsquish;
965     if (avctx->rc_qmod_amp != 0.0)
966         s->rc_qmod_amp = avctx->rc_qmod_amp;
967     if (avctx->rc_qmod_freq)
968         s->rc_qmod_freq = avctx->rc_qmod_freq;
969     if (avctx->rc_buffer_aggressivity != 1.0)
970         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
971     if (avctx->rc_initial_cplx != 0.0)
972         s->rc_initial_cplx = avctx->rc_initial_cplx;
973     if (avctx->lmin)
974         s->lmin = avctx->lmin;
975     if (avctx->lmax)
976         s->lmax = avctx->lmax;
977
978     if (avctx->rc_eq) {
979         av_freep(&s->rc_eq);
980         s->rc_eq = av_strdup(avctx->rc_eq);
981         if (!s->rc_eq)
982             return AVERROR(ENOMEM);
983     }
984     FF_ENABLE_DEPRECATION_WARNINGS
985 #endif
986
987     if (avctx->b_frame_strategy == 2) {
988         for (i = 0; i < s->max_b_frames + 2; i++) {
989             s->tmp_frames[i] = av_frame_alloc();
990             if (!s->tmp_frames[i])
991                 return AVERROR(ENOMEM);
992
993             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
994             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
995             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
996
997             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
998             if (ret < 0)
999                 return ret;
1000         }
1001     }
1002
1003     return 0;
1004 fail:
1005     ff_mpv_encode_end(avctx);
1006     return AVERROR_UNKNOWN;
1007 }
1008
1009 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1010 {
1011     MpegEncContext *s = avctx->priv_data;
1012     int i;
1013
1014     ff_rate_control_uninit(s);
1015
1016     ff_mpv_common_end(s);
1017     if (CONFIG_MJPEG_ENCODER &&
1018         s->out_format == FMT_MJPEG)
1019         ff_mjpeg_encode_close(s);
1020
1021     av_freep(&avctx->extradata);
1022
1023     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1024         av_frame_free(&s->tmp_frames[i]);
1025
1026     ff_free_picture_tables(&s->new_picture);
1027     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1028
1029     av_freep(&s->avctx->stats_out);
1030     av_freep(&s->ac_stats);
1031
1032     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1033     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1034     s->q_chroma_intra_matrix=   NULL;
1035     s->q_chroma_intra_matrix16= NULL;
1036     av_freep(&s->q_intra_matrix);
1037     av_freep(&s->q_inter_matrix);
1038     av_freep(&s->q_intra_matrix16);
1039     av_freep(&s->q_inter_matrix16);
1040     av_freep(&s->input_picture);
1041     av_freep(&s->reordered_input_picture);
1042     av_freep(&s->dct_offset);
1043
1044     return 0;
1045 }
1046
1047 static int get_sae(uint8_t *src, int ref, int stride)
1048 {
1049     int x,y;
1050     int acc = 0;
1051
1052     for (y = 0; y < 16; y++) {
1053         for (x = 0; x < 16; x++) {
1054             acc += FFABS(src[x + y * stride] - ref);
1055         }
1056     }
1057
1058     return acc;
1059 }
1060
1061 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1062                            uint8_t *ref, int stride)
1063 {
1064     int x, y, w, h;
1065     int acc = 0;
1066
1067     w = s->width  & ~15;
1068     h = s->height & ~15;
1069
1070     for (y = 0; y < h; y += 16) {
1071         for (x = 0; x < w; x += 16) {
1072             int offset = x + y * stride;
1073             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1074                                       stride, 16);
1075             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1076             int sae  = get_sae(src + offset, mean, stride);
1077
1078             acc += sae + 500 < sad;
1079         }
1080     }
1081     return acc;
1082 }
1083
1084 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1085 {
1086     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1087                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1088                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1089                             &s->linesize, &s->uvlinesize);
1090 }
1091
1092 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1093 {
1094     Picture *pic = NULL;
1095     int64_t pts;
1096     int i, display_picture_number = 0, ret;
1097     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1098                                                  (s->low_delay ? 0 : 1);
1099     int direct = 1;
1100
1101     if (pic_arg) {
1102         pts = pic_arg->pts;
1103         display_picture_number = s->input_picture_number++;
1104
1105         if (pts != AV_NOPTS_VALUE) {
1106             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1107                 int64_t last = s->user_specified_pts;
1108
1109                 if (pts <= last) {
1110                     av_log(s->avctx, AV_LOG_ERROR,
1111                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1112                            pts, last);
1113                     return AVERROR(EINVAL);
1114                 }
1115
1116                 if (!s->low_delay && display_picture_number == 1)
1117                     s->dts_delta = pts - last;
1118             }
1119             s->user_specified_pts = pts;
1120         } else {
1121             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1122                 s->user_specified_pts =
1123                 pts = s->user_specified_pts + 1;
1124                 av_log(s->avctx, AV_LOG_INFO,
1125                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1126                        pts);
1127             } else {
1128                 pts = display_picture_number;
1129             }
1130         }
1131     }
1132
1133     if (pic_arg) {
1134         if (!pic_arg->buf[0] ||
1135             pic_arg->linesize[0] != s->linesize ||
1136             pic_arg->linesize[1] != s->uvlinesize ||
1137             pic_arg->linesize[2] != s->uvlinesize)
1138             direct = 0;
1139         if ((s->width & 15) || (s->height & 15))
1140             direct = 0;
1141         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1142             direct = 0;
1143         if (s->linesize & (STRIDE_ALIGN-1))
1144             direct = 0;
1145
1146         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1147                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1148
1149         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1150         if (i < 0)
1151             return i;
1152
1153         pic = &s->picture[i];
1154         pic->reference = 3;
1155
1156         if (direct) {
1157             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1158                 return ret;
1159         }
1160         ret = alloc_picture(s, pic, direct);
1161         if (ret < 0)
1162             return ret;
1163
1164         if (!direct) {
1165             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1166                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1167                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1168                 // empty
1169             } else {
1170                 int h_chroma_shift, v_chroma_shift;
1171                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1172                                                  &h_chroma_shift,
1173                                                  &v_chroma_shift);
1174
1175                 for (i = 0; i < 3; i++) {
1176                     int src_stride = pic_arg->linesize[i];
1177                     int dst_stride = i ? s->uvlinesize : s->linesize;
1178                     int h_shift = i ? h_chroma_shift : 0;
1179                     int v_shift = i ? v_chroma_shift : 0;
1180                     int w = s->width  >> h_shift;
1181                     int h = s->height >> v_shift;
1182                     uint8_t *src = pic_arg->data[i];
1183                     uint8_t *dst = pic->f->data[i];
1184                     int vpad = 16;
1185
1186                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1187                         && !s->progressive_sequence
1188                         && FFALIGN(s->height, 32) - s->height > 16)
1189                         vpad = 32;
1190
1191                     if (!s->avctx->rc_buffer_size)
1192                         dst += INPLACE_OFFSET;
1193
1194                     if (src_stride == dst_stride)
1195                         memcpy(dst, src, src_stride * h);
1196                     else {
1197                         int h2 = h;
1198                         uint8_t *dst2 = dst;
1199                         while (h2--) {
1200                             memcpy(dst2, src, w);
1201                             dst2 += dst_stride;
1202                             src += src_stride;
1203                         }
1204                     }
1205                     if ((s->width & 15) || (s->height & (vpad-1))) {
1206                         s->mpvencdsp.draw_edges(dst, dst_stride,
1207                                                 w, h,
1208                                                 16 >> h_shift,
1209                                                 vpad >> v_shift,
1210                                                 EDGE_BOTTOM);
1211                     }
1212                 }
1213             }
1214         }
1215         ret = av_frame_copy_props(pic->f, pic_arg);
1216         if (ret < 0)
1217             return ret;
1218
1219         pic->f->display_picture_number = display_picture_number;
1220         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1221     }
1222
1223     /* shift buffer entries */
1224     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1225         s->input_picture[i - 1] = s->input_picture[i];
1226
1227     s->input_picture[encoding_delay] = (Picture*) pic;
1228
1229     return 0;
1230 }
1231
1232 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1233 {
1234     int x, y, plane;
1235     int score = 0;
1236     int64_t score64 = 0;
1237
1238     for (plane = 0; plane < 3; plane++) {
1239         const int stride = p->f->linesize[plane];
1240         const int bw = plane ? 1 : 2;
1241         for (y = 0; y < s->mb_height * bw; y++) {
1242             for (x = 0; x < s->mb_width * bw; x++) {
1243                 int off = p->shared ? 0 : 16;
1244                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1245                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1246                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1247
1248                 switch (FFABS(s->avctx->frame_skip_exp)) {
1249                 case 0: score    =  FFMAX(score, v);          break;
1250                 case 1: score   += FFABS(v);                  break;
1251                 case 2: score64 += v * (int64_t)v;                       break;
1252                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1253                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1254                 }
1255             }
1256         }
1257     }
1258     emms_c();
1259
1260     if (score)
1261         score64 = score;
1262     if (s->avctx->frame_skip_exp < 0)
1263         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1264                       -1.0/s->avctx->frame_skip_exp);
1265
1266     if (score64 < s->avctx->frame_skip_threshold)
1267         return 1;
1268     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1269         return 1;
1270     return 0;
1271 }
1272
1273 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1274 {
1275     AVPacket pkt = { 0 };
1276     int ret, got_output;
1277
1278     av_init_packet(&pkt);
1279     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1280     if (ret < 0)
1281         return ret;
1282
1283     ret = pkt.size;
1284     av_free_packet(&pkt);
1285     return ret;
1286 }
1287
1288 static int estimate_best_b_count(MpegEncContext *s)
1289 {
1290     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1291     AVCodecContext *c = avcodec_alloc_context3(NULL);
1292     const int scale = s->avctx->brd_scale;
1293     int i, j, out_size, p_lambda, b_lambda, lambda2;
1294     int64_t best_rd  = INT64_MAX;
1295     int best_b_count = -1;
1296
1297     if (!c)
1298         return AVERROR(ENOMEM);
1299     av_assert0(scale >= 0 && scale <= 3);
1300
1301     //emms_c();
1302     //s->next_picture_ptr->quality;
1303     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1304     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1305     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1306     if (!b_lambda) // FIXME we should do this somewhere else
1307         b_lambda = p_lambda;
1308     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1309                FF_LAMBDA_SHIFT;
1310
1311     c->width        = s->width  >> scale;
1312     c->height       = s->height >> scale;
1313     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1314     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1315     c->mb_decision  = s->avctx->mb_decision;
1316     c->me_cmp       = s->avctx->me_cmp;
1317     c->mb_cmp       = s->avctx->mb_cmp;
1318     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1319     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1320     c->time_base    = s->avctx->time_base;
1321     c->max_b_frames = s->max_b_frames;
1322
1323     if (avcodec_open2(c, codec, NULL) < 0)
1324         return -1;
1325
1326     for (i = 0; i < s->max_b_frames + 2; i++) {
1327         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1328                                                 s->next_picture_ptr;
1329         uint8_t *data[4];
1330
1331         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1332             pre_input = *pre_input_ptr;
1333             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1334
1335             if (!pre_input.shared && i) {
1336                 data[0] += INPLACE_OFFSET;
1337                 data[1] += INPLACE_OFFSET;
1338                 data[2] += INPLACE_OFFSET;
1339             }
1340
1341             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1342                                        s->tmp_frames[i]->linesize[0],
1343                                        data[0],
1344                                        pre_input.f->linesize[0],
1345                                        c->width, c->height);
1346             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1347                                        s->tmp_frames[i]->linesize[1],
1348                                        data[1],
1349                                        pre_input.f->linesize[1],
1350                                        c->width >> 1, c->height >> 1);
1351             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1352                                        s->tmp_frames[i]->linesize[2],
1353                                        data[2],
1354                                        pre_input.f->linesize[2],
1355                                        c->width >> 1, c->height >> 1);
1356         }
1357     }
1358
1359     for (j = 0; j < s->max_b_frames + 1; j++) {
1360         int64_t rd = 0;
1361
1362         if (!s->input_picture[j])
1363             break;
1364
1365         c->error[0] = c->error[1] = c->error[2] = 0;
1366
1367         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1368         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1369
1370         out_size = encode_frame(c, s->tmp_frames[0]);
1371
1372         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1373
1374         for (i = 0; i < s->max_b_frames + 1; i++) {
1375             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1376
1377             s->tmp_frames[i + 1]->pict_type = is_p ?
1378                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1379             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1380
1381             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1382
1383             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1384         }
1385
1386         /* get the delayed frames */
1387         while (out_size) {
1388             out_size = encode_frame(c, NULL);
1389             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1390         }
1391
1392         rd += c->error[0] + c->error[1] + c->error[2];
1393
1394         if (rd < best_rd) {
1395             best_rd = rd;
1396             best_b_count = j;
1397         }
1398     }
1399
1400     avcodec_close(c);
1401     av_freep(&c);
1402
1403     return best_b_count;
1404 }
1405
1406 static int select_input_picture(MpegEncContext *s)
1407 {
1408     int i, ret;
1409
1410     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1411         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1412     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1413
1414     /* set next picture type & ordering */
1415     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1416         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1417             if (s->picture_in_gop_number < s->gop_size &&
1418                 s->next_picture_ptr &&
1419                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1420                 // FIXME check that te gop check above is +-1 correct
1421                 av_frame_unref(s->input_picture[0]->f);
1422
1423                 ff_vbv_update(s, 0);
1424
1425                 goto no_output_pic;
1426             }
1427         }
1428
1429         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1430             !s->next_picture_ptr || s->intra_only) {
1431             s->reordered_input_picture[0] = s->input_picture[0];
1432             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1433             s->reordered_input_picture[0]->f->coded_picture_number =
1434                 s->coded_picture_number++;
1435         } else {
1436             int b_frames;
1437
1438             if (s->avctx->flags & CODEC_FLAG_PASS2) {
1439                 for (i = 0; i < s->max_b_frames + 1; i++) {
1440                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1441
1442                     if (pict_num >= s->rc_context.num_entries)
1443                         break;
1444                     if (!s->input_picture[i]) {
1445                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1446                         break;
1447                     }
1448
1449                     s->input_picture[i]->f->pict_type =
1450                         s->rc_context.entry[pict_num].new_pict_type;
1451                 }
1452             }
1453
1454             if (s->avctx->b_frame_strategy == 0) {
1455                 b_frames = s->max_b_frames;
1456                 while (b_frames && !s->input_picture[b_frames])
1457                     b_frames--;
1458             } else if (s->avctx->b_frame_strategy == 1) {
1459                 for (i = 1; i < s->max_b_frames + 1; i++) {
1460                     if (s->input_picture[i] &&
1461                         s->input_picture[i]->b_frame_score == 0) {
1462                         s->input_picture[i]->b_frame_score =
1463                             get_intra_count(s,
1464                                             s->input_picture[i    ]->f->data[0],
1465                                             s->input_picture[i - 1]->f->data[0],
1466                                             s->linesize) + 1;
1467                     }
1468                 }
1469                 for (i = 0; i < s->max_b_frames + 1; i++) {
1470                     if (!s->input_picture[i] ||
1471                         s->input_picture[i]->b_frame_score - 1 >
1472                             s->mb_num / s->avctx->b_sensitivity)
1473                         break;
1474                 }
1475
1476                 b_frames = FFMAX(0, i - 1);
1477
1478                 /* reset scores */
1479                 for (i = 0; i < b_frames + 1; i++) {
1480                     s->input_picture[i]->b_frame_score = 0;
1481                 }
1482             } else if (s->avctx->b_frame_strategy == 2) {
1483                 b_frames = estimate_best_b_count(s);
1484             } else {
1485                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1486                 b_frames = 0;
1487             }
1488
1489             emms_c();
1490
1491             for (i = b_frames - 1; i >= 0; i--) {
1492                 int type = s->input_picture[i]->f->pict_type;
1493                 if (type && type != AV_PICTURE_TYPE_B)
1494                     b_frames = i;
1495             }
1496             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1497                 b_frames == s->max_b_frames) {
1498                 av_log(s->avctx, AV_LOG_ERROR,
1499                        "warning, too many b frames in a row\n");
1500             }
1501
1502             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1503                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1504                     s->gop_size > s->picture_in_gop_number) {
1505                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1506                 } else {
1507                     if (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)
1508                         b_frames = 0;
1509                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1510                 }
1511             }
1512
1513             if ((s->avctx->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1514                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1515                 b_frames--;
1516
1517             s->reordered_input_picture[0] = s->input_picture[b_frames];
1518             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1519                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1520             s->reordered_input_picture[0]->f->coded_picture_number =
1521                 s->coded_picture_number++;
1522             for (i = 0; i < b_frames; i++) {
1523                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1524                 s->reordered_input_picture[i + 1]->f->pict_type =
1525                     AV_PICTURE_TYPE_B;
1526                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1527                     s->coded_picture_number++;
1528             }
1529         }
1530     }
1531 no_output_pic:
1532     if (s->reordered_input_picture[0]) {
1533         s->reordered_input_picture[0]->reference =
1534            s->reordered_input_picture[0]->f->pict_type !=
1535                AV_PICTURE_TYPE_B ? 3 : 0;
1536
1537         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1538         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1539             return ret;
1540
1541         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1542             // input is a shared pix, so we can't modifiy it -> alloc a new
1543             // one & ensure that the shared one is reuseable
1544
1545             Picture *pic;
1546             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1547             if (i < 0)
1548                 return i;
1549             pic = &s->picture[i];
1550
1551             pic->reference = s->reordered_input_picture[0]->reference;
1552             if (alloc_picture(s, pic, 0) < 0) {
1553                 return -1;
1554             }
1555
1556             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1557             if (ret < 0)
1558                 return ret;
1559
1560             /* mark us unused / free shared pic */
1561             av_frame_unref(s->reordered_input_picture[0]->f);
1562             s->reordered_input_picture[0]->shared = 0;
1563
1564             s->current_picture_ptr = pic;
1565         } else {
1566             // input is not a shared pix -> reuse buffer for current_pix
1567             s->current_picture_ptr = s->reordered_input_picture[0];
1568             for (i = 0; i < 4; i++) {
1569                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1570             }
1571         }
1572         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1573         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1574                                        s->current_picture_ptr)) < 0)
1575             return ret;
1576
1577         s->picture_number = s->new_picture.f->display_picture_number;
1578     } else {
1579         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1580     }
1581     return 0;
1582 }
1583
1584 static void frame_end(MpegEncContext *s)
1585 {
1586     if (s->unrestricted_mv &&
1587         s->current_picture.reference &&
1588         !s->intra_only) {
1589         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1590         int hshift = desc->log2_chroma_w;
1591         int vshift = desc->log2_chroma_h;
1592         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1593                                 s->current_picture.f->linesize[0],
1594                                 s->h_edge_pos, s->v_edge_pos,
1595                                 EDGE_WIDTH, EDGE_WIDTH,
1596                                 EDGE_TOP | EDGE_BOTTOM);
1597         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1598                                 s->current_picture.f->linesize[1],
1599                                 s->h_edge_pos >> hshift,
1600                                 s->v_edge_pos >> vshift,
1601                                 EDGE_WIDTH >> hshift,
1602                                 EDGE_WIDTH >> vshift,
1603                                 EDGE_TOP | EDGE_BOTTOM);
1604         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1605                                 s->current_picture.f->linesize[2],
1606                                 s->h_edge_pos >> hshift,
1607                                 s->v_edge_pos >> vshift,
1608                                 EDGE_WIDTH >> hshift,
1609                                 EDGE_WIDTH >> vshift,
1610                                 EDGE_TOP | EDGE_BOTTOM);
1611     }
1612
1613     emms_c();
1614
1615     s->last_pict_type                 = s->pict_type;
1616     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1617     if (s->pict_type!= AV_PICTURE_TYPE_B)
1618         s->last_non_b_pict_type = s->pict_type;
1619
1620 #if FF_API_CODED_FRAME
1621 FF_DISABLE_DEPRECATION_WARNINGS
1622     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1623 FF_ENABLE_DEPRECATION_WARNINGS
1624 #endif
1625 }
1626
1627 static void update_noise_reduction(MpegEncContext *s)
1628 {
1629     int intra, i;
1630
1631     for (intra = 0; intra < 2; intra++) {
1632         if (s->dct_count[intra] > (1 << 16)) {
1633             for (i = 0; i < 64; i++) {
1634                 s->dct_error_sum[intra][i] >>= 1;
1635             }
1636             s->dct_count[intra] >>= 1;
1637         }
1638
1639         for (i = 0; i < 64; i++) {
1640             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1641                                        s->dct_count[intra] +
1642                                        s->dct_error_sum[intra][i] / 2) /
1643                                       (s->dct_error_sum[intra][i] + 1);
1644         }
1645     }
1646 }
1647
1648 static int frame_start(MpegEncContext *s)
1649 {
1650     int ret;
1651
1652     /* mark & release old frames */
1653     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1654         s->last_picture_ptr != s->next_picture_ptr &&
1655         s->last_picture_ptr->f->buf[0]) {
1656         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1657     }
1658
1659     s->current_picture_ptr->f->pict_type = s->pict_type;
1660     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1661
1662     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1663     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1664                                    s->current_picture_ptr)) < 0)
1665         return ret;
1666
1667     if (s->pict_type != AV_PICTURE_TYPE_B) {
1668         s->last_picture_ptr = s->next_picture_ptr;
1669         if (!s->droppable)
1670             s->next_picture_ptr = s->current_picture_ptr;
1671     }
1672
1673     if (s->last_picture_ptr) {
1674         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1675         if (s->last_picture_ptr->f->buf[0] &&
1676             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1677                                        s->last_picture_ptr)) < 0)
1678             return ret;
1679     }
1680     if (s->next_picture_ptr) {
1681         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1682         if (s->next_picture_ptr->f->buf[0] &&
1683             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1684                                        s->next_picture_ptr)) < 0)
1685             return ret;
1686     }
1687
1688     if (s->picture_structure!= PICT_FRAME) {
1689         int i;
1690         for (i = 0; i < 4; i++) {
1691             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1692                 s->current_picture.f->data[i] +=
1693                     s->current_picture.f->linesize[i];
1694             }
1695             s->current_picture.f->linesize[i] *= 2;
1696             s->last_picture.f->linesize[i]    *= 2;
1697             s->next_picture.f->linesize[i]    *= 2;
1698         }
1699     }
1700
1701     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1702         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1703         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1704     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1705         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1706         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1707     } else {
1708         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1709         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1710     }
1711
1712     if (s->dct_error_sum) {
1713         av_assert2(s->avctx->noise_reduction && s->encoding);
1714         update_noise_reduction(s);
1715     }
1716
1717     return 0;
1718 }
1719
1720 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1721                           const AVFrame *pic_arg, int *got_packet)
1722 {
1723     MpegEncContext *s = avctx->priv_data;
1724     int i, stuffing_count, ret;
1725     int context_count = s->slice_context_count;
1726
1727     s->picture_in_gop_number++;
1728
1729     if (load_input_picture(s, pic_arg) < 0)
1730         return -1;
1731
1732     if (select_input_picture(s) < 0) {
1733         return -1;
1734     }
1735
1736     /* output? */
1737     if (s->new_picture.f->data[0]) {
1738         uint8_t *sd;
1739         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1740         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1741                                               :
1742                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1743         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1744             return ret;
1745         if (s->mb_info) {
1746             s->mb_info_ptr = av_packet_new_side_data(pkt,
1747                                  AV_PKT_DATA_H263_MB_INFO,
1748                                  s->mb_width*s->mb_height*12);
1749             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1750         }
1751
1752         for (i = 0; i < context_count; i++) {
1753             int start_y = s->thread_context[i]->start_mb_y;
1754             int   end_y = s->thread_context[i]->  end_mb_y;
1755             int h       = s->mb_height;
1756             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1757             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1758
1759             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1760         }
1761
1762         s->pict_type = s->new_picture.f->pict_type;
1763         //emms_c();
1764         ret = frame_start(s);
1765         if (ret < 0)
1766             return ret;
1767 vbv_retry:
1768         ret = encode_picture(s, s->picture_number);
1769         if (growing_buffer) {
1770             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1771             pkt->data = s->pb.buf;
1772             pkt->size = avctx->internal->byte_buffer_size;
1773         }
1774         if (ret < 0)
1775             return -1;
1776
1777         avctx->header_bits = s->header_bits;
1778         avctx->mv_bits     = s->mv_bits;
1779         avctx->misc_bits   = s->misc_bits;
1780         avctx->i_tex_bits  = s->i_tex_bits;
1781         avctx->p_tex_bits  = s->p_tex_bits;
1782         avctx->i_count     = s->i_count;
1783         // FIXME f/b_count in avctx
1784         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1785         avctx->skip_count  = s->skip_count;
1786
1787         frame_end(s);
1788
1789         sd = av_packet_new_side_data(pkt, AV_PKT_DATA_QUALITY_FACTOR,
1790                                      sizeof(int));
1791         if (!sd)
1792             return AVERROR(ENOMEM);
1793         *(int *)sd = s->current_picture.f->quality;
1794
1795         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1796             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1797
1798         if (avctx->rc_buffer_size) {
1799             RateControlContext *rcc = &s->rc_context;
1800             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1801
1802             if (put_bits_count(&s->pb) > max_size &&
1803                 s->lambda < s->lmax) {
1804                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1805                                        (s->qscale + 1) / s->qscale);
1806                 if (s->adaptive_quant) {
1807                     int i;
1808                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1809                         s->lambda_table[i] =
1810                             FFMAX(s->lambda_table[i] + 1,
1811                                   s->lambda_table[i] * (s->qscale + 1) /
1812                                   s->qscale);
1813                 }
1814                 s->mb_skipped = 0;        // done in frame_start()
1815                 // done in encode_picture() so we must undo it
1816                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1817                     if (s->flipflop_rounding          ||
1818                         s->codec_id == AV_CODEC_ID_H263P ||
1819                         s->codec_id == AV_CODEC_ID_MPEG4)
1820                         s->no_rounding ^= 1;
1821                 }
1822                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1823                     s->time_base       = s->last_time_base;
1824                     s->last_non_b_time = s->time - s->pp_time;
1825                 }
1826                 for (i = 0; i < context_count; i++) {
1827                     PutBitContext *pb = &s->thread_context[i]->pb;
1828                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1829                 }
1830                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1831                 goto vbv_retry;
1832             }
1833
1834             av_assert0(s->avctx->rc_max_rate);
1835         }
1836
1837         if (s->avctx->flags & CODEC_FLAG_PASS1)
1838             ff_write_pass1_stats(s);
1839
1840         for (i = 0; i < 4; i++) {
1841             s->current_picture_ptr->f->error[i] =
1842             s->current_picture.f->error[i] =
1843                 s->current_picture.error[i];
1844             avctx->error[i] += s->current_picture_ptr->f->error[i];
1845         }
1846
1847         if (s->avctx->flags & CODEC_FLAG_PASS1)
1848             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1849                    avctx->i_tex_bits + avctx->p_tex_bits ==
1850                        put_bits_count(&s->pb));
1851         flush_put_bits(&s->pb);
1852         s->frame_bits  = put_bits_count(&s->pb);
1853
1854         stuffing_count = ff_vbv_update(s, s->frame_bits);
1855         s->stuffing_bits = 8*stuffing_count;
1856         if (stuffing_count) {
1857             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1858                     stuffing_count + 50) {
1859                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1860                 return -1;
1861             }
1862
1863             switch (s->codec_id) {
1864             case AV_CODEC_ID_MPEG1VIDEO:
1865             case AV_CODEC_ID_MPEG2VIDEO:
1866                 while (stuffing_count--) {
1867                     put_bits(&s->pb, 8, 0);
1868                 }
1869             break;
1870             case AV_CODEC_ID_MPEG4:
1871                 put_bits(&s->pb, 16, 0);
1872                 put_bits(&s->pb, 16, 0x1C3);
1873                 stuffing_count -= 4;
1874                 while (stuffing_count--) {
1875                     put_bits(&s->pb, 8, 0xFF);
1876                 }
1877             break;
1878             default:
1879                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1880             }
1881             flush_put_bits(&s->pb);
1882             s->frame_bits  = put_bits_count(&s->pb);
1883         }
1884
1885         /* update mpeg1/2 vbv_delay for CBR */
1886         if (s->avctx->rc_max_rate                          &&
1887             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1888             s->out_format == FMT_MPEG1                     &&
1889             90000LL * (avctx->rc_buffer_size - 1) <=
1890                 s->avctx->rc_max_rate * 0xFFFFLL) {
1891             int vbv_delay, min_delay;
1892             double inbits  = s->avctx->rc_max_rate *
1893                              av_q2d(s->avctx->time_base);
1894             int    minbits = s->frame_bits - 8 *
1895                              (s->vbv_delay_ptr - s->pb.buf - 1);
1896             double bits    = s->rc_context.buffer_index + minbits - inbits;
1897
1898             if (bits < 0)
1899                 av_log(s->avctx, AV_LOG_ERROR,
1900                        "Internal error, negative bits\n");
1901
1902             assert(s->repeat_first_field == 0);
1903
1904             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1905             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1906                         s->avctx->rc_max_rate;
1907
1908             vbv_delay = FFMAX(vbv_delay, min_delay);
1909
1910             av_assert0(vbv_delay < 0xFFFF);
1911
1912             s->vbv_delay_ptr[0] &= 0xF8;
1913             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1914             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1915             s->vbv_delay_ptr[2] &= 0x07;
1916             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1917             avctx->vbv_delay     = vbv_delay * 300;
1918         }
1919         s->total_bits     += s->frame_bits;
1920         avctx->frame_bits  = s->frame_bits;
1921
1922         pkt->pts = s->current_picture.f->pts;
1923         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1924             if (!s->current_picture.f->coded_picture_number)
1925                 pkt->dts = pkt->pts - s->dts_delta;
1926             else
1927                 pkt->dts = s->reordered_pts;
1928             s->reordered_pts = pkt->pts;
1929         } else
1930             pkt->dts = pkt->pts;
1931         if (s->current_picture.f->key_frame)
1932             pkt->flags |= AV_PKT_FLAG_KEY;
1933         if (s->mb_info)
1934             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1935     } else {
1936         s->frame_bits = 0;
1937     }
1938
1939     /* release non-reference frames */
1940     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1941         if (!s->picture[i].reference)
1942             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1943     }
1944
1945     av_assert1((s->frame_bits & 7) == 0);
1946
1947     pkt->size = s->frame_bits / 8;
1948     *got_packet = !!pkt->size;
1949     return 0;
1950 }
1951
1952 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1953                                                 int n, int threshold)
1954 {
1955     static const char tab[64] = {
1956         3, 2, 2, 1, 1, 1, 1, 1,
1957         1, 1, 1, 1, 1, 1, 1, 1,
1958         1, 1, 1, 1, 1, 1, 1, 1,
1959         0, 0, 0, 0, 0, 0, 0, 0,
1960         0, 0, 0, 0, 0, 0, 0, 0,
1961         0, 0, 0, 0, 0, 0, 0, 0,
1962         0, 0, 0, 0, 0, 0, 0, 0,
1963         0, 0, 0, 0, 0, 0, 0, 0
1964     };
1965     int score = 0;
1966     int run = 0;
1967     int i;
1968     int16_t *block = s->block[n];
1969     const int last_index = s->block_last_index[n];
1970     int skip_dc;
1971
1972     if (threshold < 0) {
1973         skip_dc = 0;
1974         threshold = -threshold;
1975     } else
1976         skip_dc = 1;
1977
1978     /* Are all we could set to zero already zero? */
1979     if (last_index <= skip_dc - 1)
1980         return;
1981
1982     for (i = 0; i <= last_index; i++) {
1983         const int j = s->intra_scantable.permutated[i];
1984         const int level = FFABS(block[j]);
1985         if (level == 1) {
1986             if (skip_dc && i == 0)
1987                 continue;
1988             score += tab[run];
1989             run = 0;
1990         } else if (level > 1) {
1991             return;
1992         } else {
1993             run++;
1994         }
1995     }
1996     if (score >= threshold)
1997         return;
1998     for (i = skip_dc; i <= last_index; i++) {
1999         const int j = s->intra_scantable.permutated[i];
2000         block[j] = 0;
2001     }
2002     if (block[0])
2003         s->block_last_index[n] = 0;
2004     else
2005         s->block_last_index[n] = -1;
2006 }
2007
2008 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2009                                int last_index)
2010 {
2011     int i;
2012     const int maxlevel = s->max_qcoeff;
2013     const int minlevel = s->min_qcoeff;
2014     int overflow = 0;
2015
2016     if (s->mb_intra) {
2017         i = 1; // skip clipping of intra dc
2018     } else
2019         i = 0;
2020
2021     for (; i <= last_index; i++) {
2022         const int j = s->intra_scantable.permutated[i];
2023         int level = block[j];
2024
2025         if (level > maxlevel) {
2026             level = maxlevel;
2027             overflow++;
2028         } else if (level < minlevel) {
2029             level = minlevel;
2030             overflow++;
2031         }
2032
2033         block[j] = level;
2034     }
2035
2036     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2037         av_log(s->avctx, AV_LOG_INFO,
2038                "warning, clipping %d dct coefficients to %d..%d\n",
2039                overflow, minlevel, maxlevel);
2040 }
2041
2042 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2043 {
2044     int x, y;
2045     // FIXME optimize
2046     for (y = 0; y < 8; y++) {
2047         for (x = 0; x < 8; x++) {
2048             int x2, y2;
2049             int sum = 0;
2050             int sqr = 0;
2051             int count = 0;
2052
2053             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2054                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2055                     int v = ptr[x2 + y2 * stride];
2056                     sum += v;
2057                     sqr += v * v;
2058                     count++;
2059                 }
2060             }
2061             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2062         }
2063     }
2064 }
2065
2066 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2067                                                 int motion_x, int motion_y,
2068                                                 int mb_block_height,
2069                                                 int mb_block_width,
2070                                                 int mb_block_count)
2071 {
2072     int16_t weight[12][64];
2073     int16_t orig[12][64];
2074     const int mb_x = s->mb_x;
2075     const int mb_y = s->mb_y;
2076     int i;
2077     int skip_dct[12];
2078     int dct_offset = s->linesize * 8; // default for progressive frames
2079     int uv_dct_offset = s->uvlinesize * 8;
2080     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2081     ptrdiff_t wrap_y, wrap_c;
2082
2083     for (i = 0; i < mb_block_count; i++)
2084         skip_dct[i] = s->skipdct;
2085
2086     if (s->adaptive_quant) {
2087         const int last_qp = s->qscale;
2088         const int mb_xy = mb_x + mb_y * s->mb_stride;
2089
2090         s->lambda = s->lambda_table[mb_xy];
2091         update_qscale(s);
2092
2093         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2094             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2095             s->dquant = s->qscale - last_qp;
2096
2097             if (s->out_format == FMT_H263) {
2098                 s->dquant = av_clip(s->dquant, -2, 2);
2099
2100                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2101                     if (!s->mb_intra) {
2102                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2103                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2104                                 s->dquant = 0;
2105                         }
2106                         if (s->mv_type == MV_TYPE_8X8)
2107                             s->dquant = 0;
2108                     }
2109                 }
2110             }
2111         }
2112         ff_set_qscale(s, last_qp + s->dquant);
2113     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2114         ff_set_qscale(s, s->qscale + s->dquant);
2115
2116     wrap_y = s->linesize;
2117     wrap_c = s->uvlinesize;
2118     ptr_y  = s->new_picture.f->data[0] +
2119              (mb_y * 16 * wrap_y)              + mb_x * 16;
2120     ptr_cb = s->new_picture.f->data[1] +
2121              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2122     ptr_cr = s->new_picture.f->data[2] +
2123              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2124
2125     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2126         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2127         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2128         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2129         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2130                                  wrap_y, wrap_y,
2131                                  16, 16, mb_x * 16, mb_y * 16,
2132                                  s->width, s->height);
2133         ptr_y = ebuf;
2134         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2135                                  wrap_c, wrap_c,
2136                                  mb_block_width, mb_block_height,
2137                                  mb_x * mb_block_width, mb_y * mb_block_height,
2138                                  cw, ch);
2139         ptr_cb = ebuf + 16 * wrap_y;
2140         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2141                                  wrap_c, wrap_c,
2142                                  mb_block_width, mb_block_height,
2143                                  mb_x * mb_block_width, mb_y * mb_block_height,
2144                                  cw, ch);
2145         ptr_cr = ebuf + 16 * wrap_y + 16;
2146     }
2147
2148     if (s->mb_intra) {
2149         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2150             int progressive_score, interlaced_score;
2151
2152             s->interlaced_dct = 0;
2153             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2154                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2155                                                      NULL, wrap_y, 8) - 400;
2156
2157             if (progressive_score > 0) {
2158                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2159                                                         NULL, wrap_y * 2, 8) +
2160                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2161                                                         NULL, wrap_y * 2, 8);
2162                 if (progressive_score > interlaced_score) {
2163                     s->interlaced_dct = 1;
2164
2165                     dct_offset = wrap_y;
2166                     uv_dct_offset = wrap_c;
2167                     wrap_y <<= 1;
2168                     if (s->chroma_format == CHROMA_422 ||
2169                         s->chroma_format == CHROMA_444)
2170                         wrap_c <<= 1;
2171                 }
2172             }
2173         }
2174
2175         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2176         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2177         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2178         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2179
2180         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2181             skip_dct[4] = 1;
2182             skip_dct[5] = 1;
2183         } else {
2184             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2185             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2186             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2187                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2188                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2189             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2190                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2191                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2192                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2193                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2194                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2195                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2196             }
2197         }
2198     } else {
2199         op_pixels_func (*op_pix)[4];
2200         qpel_mc_func (*op_qpix)[16];
2201         uint8_t *dest_y, *dest_cb, *dest_cr;
2202
2203         dest_y  = s->dest[0];
2204         dest_cb = s->dest[1];
2205         dest_cr = s->dest[2];
2206
2207         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2208             op_pix  = s->hdsp.put_pixels_tab;
2209             op_qpix = s->qdsp.put_qpel_pixels_tab;
2210         } else {
2211             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2212             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2213         }
2214
2215         if (s->mv_dir & MV_DIR_FORWARD) {
2216             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2217                           s->last_picture.f->data,
2218                           op_pix, op_qpix);
2219             op_pix  = s->hdsp.avg_pixels_tab;
2220             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2221         }
2222         if (s->mv_dir & MV_DIR_BACKWARD) {
2223             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2224                           s->next_picture.f->data,
2225                           op_pix, op_qpix);
2226         }
2227
2228         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2229             int progressive_score, interlaced_score;
2230
2231             s->interlaced_dct = 0;
2232             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2233                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2234                                                      ptr_y + wrap_y * 8,
2235                                                      wrap_y, 8) - 400;
2236
2237             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2238                 progressive_score -= 400;
2239
2240             if (progressive_score > 0) {
2241                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2242                                                         wrap_y * 2, 8) +
2243                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2244                                                         ptr_y + wrap_y,
2245                                                         wrap_y * 2, 8);
2246
2247                 if (progressive_score > interlaced_score) {
2248                     s->interlaced_dct = 1;
2249
2250                     dct_offset = wrap_y;
2251                     uv_dct_offset = wrap_c;
2252                     wrap_y <<= 1;
2253                     if (s->chroma_format == CHROMA_422)
2254                         wrap_c <<= 1;
2255                 }
2256             }
2257         }
2258
2259         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2260         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2261         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2262                             dest_y + dct_offset, wrap_y);
2263         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2264                             dest_y + dct_offset + 8, wrap_y);
2265
2266         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2267             skip_dct[4] = 1;
2268             skip_dct[5] = 1;
2269         } else {
2270             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2271             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2272             if (!s->chroma_y_shift) { /* 422 */
2273                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2274                                     dest_cb + uv_dct_offset, wrap_c);
2275                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2276                                     dest_cr + uv_dct_offset, wrap_c);
2277             }
2278         }
2279         /* pre quantization */
2280         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2281                 2 * s->qscale * s->qscale) {
2282             // FIXME optimize
2283             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2284                 skip_dct[0] = 1;
2285             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2286                 skip_dct[1] = 1;
2287             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2288                                wrap_y, 8) < 20 * s->qscale)
2289                 skip_dct[2] = 1;
2290             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2291                                wrap_y, 8) < 20 * s->qscale)
2292                 skip_dct[3] = 1;
2293             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2294                 skip_dct[4] = 1;
2295             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2296                 skip_dct[5] = 1;
2297             if (!s->chroma_y_shift) { /* 422 */
2298                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2299                                    dest_cb + uv_dct_offset,
2300                                    wrap_c, 8) < 20 * s->qscale)
2301                     skip_dct[6] = 1;
2302                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2303                                    dest_cr + uv_dct_offset,
2304                                    wrap_c, 8) < 20 * s->qscale)
2305                     skip_dct[7] = 1;
2306             }
2307         }
2308     }
2309
2310     if (s->quantizer_noise_shaping) {
2311         if (!skip_dct[0])
2312             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2313         if (!skip_dct[1])
2314             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2315         if (!skip_dct[2])
2316             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2317         if (!skip_dct[3])
2318             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2319         if (!skip_dct[4])
2320             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2321         if (!skip_dct[5])
2322             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2323         if (!s->chroma_y_shift) { /* 422 */
2324             if (!skip_dct[6])
2325                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2326                                   wrap_c);
2327             if (!skip_dct[7])
2328                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2329                                   wrap_c);
2330         }
2331         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2332     }
2333
2334     /* DCT & quantize */
2335     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2336     {
2337         for (i = 0; i < mb_block_count; i++) {
2338             if (!skip_dct[i]) {
2339                 int overflow;
2340                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2341                 // FIXME we could decide to change to quantizer instead of
2342                 // clipping
2343                 // JS: I don't think that would be a good idea it could lower
2344                 //     quality instead of improve it. Just INTRADC clipping
2345                 //     deserves changes in quantizer
2346                 if (overflow)
2347                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2348             } else
2349                 s->block_last_index[i] = -1;
2350         }
2351         if (s->quantizer_noise_shaping) {
2352             for (i = 0; i < mb_block_count; i++) {
2353                 if (!skip_dct[i]) {
2354                     s->block_last_index[i] =
2355                         dct_quantize_refine(s, s->block[i], weight[i],
2356                                             orig[i], i, s->qscale);
2357                 }
2358             }
2359         }
2360
2361         if (s->luma_elim_threshold && !s->mb_intra)
2362             for (i = 0; i < 4; i++)
2363                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2364         if (s->chroma_elim_threshold && !s->mb_intra)
2365             for (i = 4; i < mb_block_count; i++)
2366                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2367
2368         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2369             for (i = 0; i < mb_block_count; i++) {
2370                 if (s->block_last_index[i] == -1)
2371                     s->coded_score[i] = INT_MAX / 256;
2372             }
2373         }
2374     }
2375
2376     if ((s->avctx->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2377         s->block_last_index[4] =
2378         s->block_last_index[5] = 0;
2379         s->block[4][0] =
2380         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2381         if (!s->chroma_y_shift) { /* 422 / 444 */
2382             for (i=6; i<12; i++) {
2383                 s->block_last_index[i] = 0;
2384                 s->block[i][0] = s->block[4][0];
2385             }
2386         }
2387     }
2388
2389     // non c quantize code returns incorrect block_last_index FIXME
2390     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2391         for (i = 0; i < mb_block_count; i++) {
2392             int j;
2393             if (s->block_last_index[i] > 0) {
2394                 for (j = 63; j > 0; j--) {
2395                     if (s->block[i][s->intra_scantable.permutated[j]])
2396                         break;
2397                 }
2398                 s->block_last_index[i] = j;
2399             }
2400         }
2401     }
2402
2403     /* huffman encode */
2404     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2405     case AV_CODEC_ID_MPEG1VIDEO:
2406     case AV_CODEC_ID_MPEG2VIDEO:
2407         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2408             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2409         break;
2410     case AV_CODEC_ID_MPEG4:
2411         if (CONFIG_MPEG4_ENCODER)
2412             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2413         break;
2414     case AV_CODEC_ID_MSMPEG4V2:
2415     case AV_CODEC_ID_MSMPEG4V3:
2416     case AV_CODEC_ID_WMV1:
2417         if (CONFIG_MSMPEG4_ENCODER)
2418             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2419         break;
2420     case AV_CODEC_ID_WMV2:
2421         if (CONFIG_WMV2_ENCODER)
2422             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2423         break;
2424     case AV_CODEC_ID_H261:
2425         if (CONFIG_H261_ENCODER)
2426             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2427         break;
2428     case AV_CODEC_ID_H263:
2429     case AV_CODEC_ID_H263P:
2430     case AV_CODEC_ID_FLV1:
2431     case AV_CODEC_ID_RV10:
2432     case AV_CODEC_ID_RV20:
2433         if (CONFIG_H263_ENCODER)
2434             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2435         break;
2436     case AV_CODEC_ID_MJPEG:
2437     case AV_CODEC_ID_AMV:
2438         if (CONFIG_MJPEG_ENCODER)
2439             ff_mjpeg_encode_mb(s, s->block);
2440         break;
2441     default:
2442         av_assert1(0);
2443     }
2444 }
2445
2446 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2447 {
2448     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2449     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2450     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2451 }
2452
2453 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2454     int i;
2455
2456     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2457
2458     /* mpeg1 */
2459     d->mb_skip_run= s->mb_skip_run;
2460     for(i=0; i<3; i++)
2461         d->last_dc[i] = s->last_dc[i];
2462
2463     /* statistics */
2464     d->mv_bits= s->mv_bits;
2465     d->i_tex_bits= s->i_tex_bits;
2466     d->p_tex_bits= s->p_tex_bits;
2467     d->i_count= s->i_count;
2468     d->f_count= s->f_count;
2469     d->b_count= s->b_count;
2470     d->skip_count= s->skip_count;
2471     d->misc_bits= s->misc_bits;
2472     d->last_bits= 0;
2473
2474     d->mb_skipped= 0;
2475     d->qscale= s->qscale;
2476     d->dquant= s->dquant;
2477
2478     d->esc3_level_length= s->esc3_level_length;
2479 }
2480
2481 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2482     int i;
2483
2484     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2485     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2486
2487     /* mpeg1 */
2488     d->mb_skip_run= s->mb_skip_run;
2489     for(i=0; i<3; i++)
2490         d->last_dc[i] = s->last_dc[i];
2491
2492     /* statistics */
2493     d->mv_bits= s->mv_bits;
2494     d->i_tex_bits= s->i_tex_bits;
2495     d->p_tex_bits= s->p_tex_bits;
2496     d->i_count= s->i_count;
2497     d->f_count= s->f_count;
2498     d->b_count= s->b_count;
2499     d->skip_count= s->skip_count;
2500     d->misc_bits= s->misc_bits;
2501
2502     d->mb_intra= s->mb_intra;
2503     d->mb_skipped= s->mb_skipped;
2504     d->mv_type= s->mv_type;
2505     d->mv_dir= s->mv_dir;
2506     d->pb= s->pb;
2507     if(s->data_partitioning){
2508         d->pb2= s->pb2;
2509         d->tex_pb= s->tex_pb;
2510     }
2511     d->block= s->block;
2512     for(i=0; i<8; i++)
2513         d->block_last_index[i]= s->block_last_index[i];
2514     d->interlaced_dct= s->interlaced_dct;
2515     d->qscale= s->qscale;
2516
2517     d->esc3_level_length= s->esc3_level_length;
2518 }
2519
2520 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2521                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2522                            int *dmin, int *next_block, int motion_x, int motion_y)
2523 {
2524     int score;
2525     uint8_t *dest_backup[3];
2526
2527     copy_context_before_encode(s, backup, type);
2528
2529     s->block= s->blocks[*next_block];
2530     s->pb= pb[*next_block];
2531     if(s->data_partitioning){
2532         s->pb2   = pb2   [*next_block];
2533         s->tex_pb= tex_pb[*next_block];
2534     }
2535
2536     if(*next_block){
2537         memcpy(dest_backup, s->dest, sizeof(s->dest));
2538         s->dest[0] = s->sc.rd_scratchpad;
2539         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2540         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2541         av_assert0(s->linesize >= 32); //FIXME
2542     }
2543
2544     encode_mb(s, motion_x, motion_y);
2545
2546     score= put_bits_count(&s->pb);
2547     if(s->data_partitioning){
2548         score+= put_bits_count(&s->pb2);
2549         score+= put_bits_count(&s->tex_pb);
2550     }
2551
2552     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2553         ff_mpv_decode_mb(s, s->block);
2554
2555         score *= s->lambda2;
2556         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2557     }
2558
2559     if(*next_block){
2560         memcpy(s->dest, dest_backup, sizeof(s->dest));
2561     }
2562
2563     if(score<*dmin){
2564         *dmin= score;
2565         *next_block^=1;
2566
2567         copy_context_after_encode(best, s, type);
2568     }
2569 }
2570
2571 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2572     uint32_t *sq = ff_square_tab + 256;
2573     int acc=0;
2574     int x,y;
2575
2576     if(w==16 && h==16)
2577         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2578     else if(w==8 && h==8)
2579         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2580
2581     for(y=0; y<h; y++){
2582         for(x=0; x<w; x++){
2583             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2584         }
2585     }
2586
2587     av_assert2(acc>=0);
2588
2589     return acc;
2590 }
2591
2592 static int sse_mb(MpegEncContext *s){
2593     int w= 16;
2594     int h= 16;
2595
2596     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2597     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2598
2599     if(w==16 && h==16)
2600       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2601         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2602                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2603                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2604       }else{
2605         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2606                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2607                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2608       }
2609     else
2610         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2611                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2612                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2613 }
2614
2615 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2616     MpegEncContext *s= *(void**)arg;
2617
2618
2619     s->me.pre_pass=1;
2620     s->me.dia_size= s->avctx->pre_dia_size;
2621     s->first_slice_line=1;
2622     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2623         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2624             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2625         }
2626         s->first_slice_line=0;
2627     }
2628
2629     s->me.pre_pass=0;
2630
2631     return 0;
2632 }
2633
2634 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2635     MpegEncContext *s= *(void**)arg;
2636
2637     ff_check_alignment();
2638
2639     s->me.dia_size= s->avctx->dia_size;
2640     s->first_slice_line=1;
2641     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2642         s->mb_x=0; //for block init below
2643         ff_init_block_index(s);
2644         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2645             s->block_index[0]+=2;
2646             s->block_index[1]+=2;
2647             s->block_index[2]+=2;
2648             s->block_index[3]+=2;
2649
2650             /* compute motion vector & mb_type and store in context */
2651             if(s->pict_type==AV_PICTURE_TYPE_B)
2652                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2653             else
2654                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2655         }
2656         s->first_slice_line=0;
2657     }
2658     return 0;
2659 }
2660
2661 static int mb_var_thread(AVCodecContext *c, void *arg){
2662     MpegEncContext *s= *(void**)arg;
2663     int mb_x, mb_y;
2664
2665     ff_check_alignment();
2666
2667     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2668         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2669             int xx = mb_x * 16;
2670             int yy = mb_y * 16;
2671             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2672             int varc;
2673             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2674
2675             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2676                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2677
2678             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2679             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2680             s->me.mb_var_sum_temp    += varc;
2681         }
2682     }
2683     return 0;
2684 }
2685
2686 static void write_slice_end(MpegEncContext *s){
2687     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2688         if(s->partitioned_frame){
2689             ff_mpeg4_merge_partitions(s);
2690         }
2691
2692         ff_mpeg4_stuffing(&s->pb);
2693     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2694         ff_mjpeg_encode_stuffing(s);
2695     }
2696
2697     avpriv_align_put_bits(&s->pb);
2698     flush_put_bits(&s->pb);
2699
2700     if ((s->avctx->flags & CODEC_FLAG_PASS1) && !s->partitioned_frame)
2701         s->misc_bits+= get_bits_diff(s);
2702 }
2703
2704 static void write_mb_info(MpegEncContext *s)
2705 {
2706     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2707     int offset = put_bits_count(&s->pb);
2708     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2709     int gobn = s->mb_y / s->gob_index;
2710     int pred_x, pred_y;
2711     if (CONFIG_H263_ENCODER)
2712         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2713     bytestream_put_le32(&ptr, offset);
2714     bytestream_put_byte(&ptr, s->qscale);
2715     bytestream_put_byte(&ptr, gobn);
2716     bytestream_put_le16(&ptr, mba);
2717     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2718     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2719     /* 4MV not implemented */
2720     bytestream_put_byte(&ptr, 0); /* hmv2 */
2721     bytestream_put_byte(&ptr, 0); /* vmv2 */
2722 }
2723
2724 static void update_mb_info(MpegEncContext *s, int startcode)
2725 {
2726     if (!s->mb_info)
2727         return;
2728     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2729         s->mb_info_size += 12;
2730         s->prev_mb_info = s->last_mb_info;
2731     }
2732     if (startcode) {
2733         s->prev_mb_info = put_bits_count(&s->pb)/8;
2734         /* This might have incremented mb_info_size above, and we return without
2735          * actually writing any info into that slot yet. But in that case,
2736          * this will be called again at the start of the after writing the
2737          * start code, actually writing the mb info. */
2738         return;
2739     }
2740
2741     s->last_mb_info = put_bits_count(&s->pb)/8;
2742     if (!s->mb_info_size)
2743         s->mb_info_size += 12;
2744     write_mb_info(s);
2745 }
2746
2747 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2748 {
2749     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2750         && s->slice_context_count == 1
2751         && s->pb.buf == s->avctx->internal->byte_buffer) {
2752         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2753         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2754
2755         uint8_t *new_buffer = NULL;
2756         int new_buffer_size = 0;
2757
2758         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2759                               s->avctx->internal->byte_buffer_size + size_increase);
2760         if (!new_buffer)
2761             return AVERROR(ENOMEM);
2762
2763         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2764         av_free(s->avctx->internal->byte_buffer);
2765         s->avctx->internal->byte_buffer      = new_buffer;
2766         s->avctx->internal->byte_buffer_size = new_buffer_size;
2767         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2768         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2769         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2770     }
2771     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2772         return AVERROR(EINVAL);
2773     return 0;
2774 }
2775
2776 static int encode_thread(AVCodecContext *c, void *arg){
2777     MpegEncContext *s= *(void**)arg;
2778     int mb_x, mb_y, pdif = 0;
2779     int chr_h= 16>>s->chroma_y_shift;
2780     int i, j;
2781     MpegEncContext best_s = { 0 }, backup_s;
2782     uint8_t bit_buf[2][MAX_MB_BYTES];
2783     uint8_t bit_buf2[2][MAX_MB_BYTES];
2784     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2785     PutBitContext pb[2], pb2[2], tex_pb[2];
2786
2787     ff_check_alignment();
2788
2789     for(i=0; i<2; i++){
2790         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2791         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2792         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2793     }
2794
2795     s->last_bits= put_bits_count(&s->pb);
2796     s->mv_bits=0;
2797     s->misc_bits=0;
2798     s->i_tex_bits=0;
2799     s->p_tex_bits=0;
2800     s->i_count=0;
2801     s->f_count=0;
2802     s->b_count=0;
2803     s->skip_count=0;
2804
2805     for(i=0; i<3; i++){
2806         /* init last dc values */
2807         /* note: quant matrix value (8) is implied here */
2808         s->last_dc[i] = 128 << s->intra_dc_precision;
2809
2810         s->current_picture.error[i] = 0;
2811     }
2812     if(s->codec_id==AV_CODEC_ID_AMV){
2813         s->last_dc[0] = 128*8/13;
2814         s->last_dc[1] = 128*8/14;
2815         s->last_dc[2] = 128*8/14;
2816     }
2817     s->mb_skip_run = 0;
2818     memset(s->last_mv, 0, sizeof(s->last_mv));
2819
2820     s->last_mv_dir = 0;
2821
2822     switch(s->codec_id){
2823     case AV_CODEC_ID_H263:
2824     case AV_CODEC_ID_H263P:
2825     case AV_CODEC_ID_FLV1:
2826         if (CONFIG_H263_ENCODER)
2827             s->gob_index = H263_GOB_HEIGHT(s->height);
2828         break;
2829     case AV_CODEC_ID_MPEG4:
2830         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2831             ff_mpeg4_init_partitions(s);
2832         break;
2833     }
2834
2835     s->resync_mb_x=0;
2836     s->resync_mb_y=0;
2837     s->first_slice_line = 1;
2838     s->ptr_lastgob = s->pb.buf;
2839     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2840         s->mb_x=0;
2841         s->mb_y= mb_y;
2842
2843         ff_set_qscale(s, s->qscale);
2844         ff_init_block_index(s);
2845
2846         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2847             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2848             int mb_type= s->mb_type[xy];
2849 //            int d;
2850             int dmin= INT_MAX;
2851             int dir;
2852             int size_increase =  s->avctx->internal->byte_buffer_size/4
2853                                + s->mb_width*MAX_MB_BYTES;
2854
2855             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2856             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2857                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2858                 return -1;
2859             }
2860             if(s->data_partitioning){
2861                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2862                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2863                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2864                     return -1;
2865                 }
2866             }
2867
2868             s->mb_x = mb_x;
2869             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2870             ff_update_block_index(s);
2871
2872             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2873                 ff_h261_reorder_mb_index(s);
2874                 xy= s->mb_y*s->mb_stride + s->mb_x;
2875                 mb_type= s->mb_type[xy];
2876             }
2877
2878             /* write gob / video packet header  */
2879             if(s->rtp_mode){
2880                 int current_packet_size, is_gob_start;
2881
2882                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2883
2884                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2885
2886                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2887
2888                 switch(s->codec_id){
2889                 case AV_CODEC_ID_H263:
2890                 case AV_CODEC_ID_H263P:
2891                     if(!s->h263_slice_structured)
2892                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2893                     break;
2894                 case AV_CODEC_ID_MPEG2VIDEO:
2895                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2896                 case AV_CODEC_ID_MPEG1VIDEO:
2897                     if(s->mb_skip_run) is_gob_start=0;
2898                     break;
2899                 case AV_CODEC_ID_MJPEG:
2900                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2901                     break;
2902                 }
2903
2904                 if(is_gob_start){
2905                     if(s->start_mb_y != mb_y || mb_x!=0){
2906                         write_slice_end(s);
2907
2908                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2909                             ff_mpeg4_init_partitions(s);
2910                         }
2911                     }
2912
2913                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2914                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2915
2916                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2917                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2918                         int d = 100 / s->error_rate;
2919                         if(r % d == 0){
2920                             current_packet_size=0;
2921                             s->pb.buf_ptr= s->ptr_lastgob;
2922                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2923                         }
2924                     }
2925
2926                     if (s->avctx->rtp_callback){
2927                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2928                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2929                     }
2930                     update_mb_info(s, 1);
2931
2932                     switch(s->codec_id){
2933                     case AV_CODEC_ID_MPEG4:
2934                         if (CONFIG_MPEG4_ENCODER) {
2935                             ff_mpeg4_encode_video_packet_header(s);
2936                             ff_mpeg4_clean_buffers(s);
2937                         }
2938                     break;
2939                     case AV_CODEC_ID_MPEG1VIDEO:
2940                     case AV_CODEC_ID_MPEG2VIDEO:
2941                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2942                             ff_mpeg1_encode_slice_header(s);
2943                             ff_mpeg1_clean_buffers(s);
2944                         }
2945                     break;
2946                     case AV_CODEC_ID_H263:
2947                     case AV_CODEC_ID_H263P:
2948                         if (CONFIG_H263_ENCODER)
2949                             ff_h263_encode_gob_header(s, mb_y);
2950                     break;
2951                     }
2952
2953                     if (s->avctx->flags & CODEC_FLAG_PASS1) {
2954                         int bits= put_bits_count(&s->pb);
2955                         s->misc_bits+= bits - s->last_bits;
2956                         s->last_bits= bits;
2957                     }
2958
2959                     s->ptr_lastgob += current_packet_size;
2960                     s->first_slice_line=1;
2961                     s->resync_mb_x=mb_x;
2962                     s->resync_mb_y=mb_y;
2963                 }
2964             }
2965
2966             if(  (s->resync_mb_x   == s->mb_x)
2967                && s->resync_mb_y+1 == s->mb_y){
2968                 s->first_slice_line=0;
2969             }
2970
2971             s->mb_skipped=0;
2972             s->dquant=0; //only for QP_RD
2973
2974             update_mb_info(s, 0);
2975
2976             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2977                 int next_block=0;
2978                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2979
2980                 copy_context_before_encode(&backup_s, s, -1);
2981                 backup_s.pb= s->pb;
2982                 best_s.data_partitioning= s->data_partitioning;
2983                 best_s.partitioned_frame= s->partitioned_frame;
2984                 if(s->data_partitioning){
2985                     backup_s.pb2= s->pb2;
2986                     backup_s.tex_pb= s->tex_pb;
2987                 }
2988
2989                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2990                     s->mv_dir = MV_DIR_FORWARD;
2991                     s->mv_type = MV_TYPE_16X16;
2992                     s->mb_intra= 0;
2993                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2994                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2995                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2996                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2997                 }
2998                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2999                     s->mv_dir = MV_DIR_FORWARD;
3000                     s->mv_type = MV_TYPE_FIELD;
3001                     s->mb_intra= 0;
3002                     for(i=0; i<2; i++){
3003                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3004                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3005                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3006                     }
3007                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3008                                  &dmin, &next_block, 0, 0);
3009                 }
3010                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3011                     s->mv_dir = MV_DIR_FORWARD;
3012                     s->mv_type = MV_TYPE_16X16;
3013                     s->mb_intra= 0;
3014                     s->mv[0][0][0] = 0;
3015                     s->mv[0][0][1] = 0;
3016                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3017                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3018                 }
3019                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3020                     s->mv_dir = MV_DIR_FORWARD;
3021                     s->mv_type = MV_TYPE_8X8;
3022                     s->mb_intra= 0;
3023                     for(i=0; i<4; i++){
3024                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3025                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3026                     }
3027                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3028                                  &dmin, &next_block, 0, 0);
3029                 }
3030                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3031                     s->mv_dir = MV_DIR_FORWARD;
3032                     s->mv_type = MV_TYPE_16X16;
3033                     s->mb_intra= 0;
3034                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3035                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3036                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3037                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3038                 }
3039                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3040                     s->mv_dir = MV_DIR_BACKWARD;
3041                     s->mv_type = MV_TYPE_16X16;
3042                     s->mb_intra= 0;
3043                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3044                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3045                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3046                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3047                 }
3048                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3049                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3050                     s->mv_type = MV_TYPE_16X16;
3051                     s->mb_intra= 0;
3052                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3053                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3054                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3055                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3056                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3057                                  &dmin, &next_block, 0, 0);
3058                 }
3059                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3060                     s->mv_dir = MV_DIR_FORWARD;
3061                     s->mv_type = MV_TYPE_FIELD;
3062                     s->mb_intra= 0;
3063                     for(i=0; i<2; i++){
3064                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3065                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3066                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3067                     }
3068                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3069                                  &dmin, &next_block, 0, 0);
3070                 }
3071                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3072                     s->mv_dir = MV_DIR_BACKWARD;
3073                     s->mv_type = MV_TYPE_FIELD;
3074                     s->mb_intra= 0;
3075                     for(i=0; i<2; i++){
3076                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3077                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3078                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3079                     }
3080                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3081                                  &dmin, &next_block, 0, 0);
3082                 }
3083                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3084                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3085                     s->mv_type = MV_TYPE_FIELD;
3086                     s->mb_intra= 0;
3087                     for(dir=0; dir<2; dir++){
3088                         for(i=0; i<2; i++){
3089                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3090                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3091                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3092                         }
3093                     }
3094                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3095                                  &dmin, &next_block, 0, 0);
3096                 }
3097                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3098                     s->mv_dir = 0;
3099                     s->mv_type = MV_TYPE_16X16;
3100                     s->mb_intra= 1;
3101                     s->mv[0][0][0] = 0;
3102                     s->mv[0][0][1] = 0;
3103                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3104                                  &dmin, &next_block, 0, 0);
3105                     if(s->h263_pred || s->h263_aic){
3106                         if(best_s.mb_intra)
3107                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3108                         else
3109                             ff_clean_intra_table_entries(s); //old mode?
3110                     }
3111                 }
3112
3113                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3114                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3115                         const int last_qp= backup_s.qscale;
3116                         int qpi, qp, dc[6];
3117                         int16_t ac[6][16];
3118                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3119                         static const int dquant_tab[4]={-1,1,-2,2};
3120                         int storecoefs = s->mb_intra && s->dc_val[0];
3121
3122                         av_assert2(backup_s.dquant == 0);
3123
3124                         //FIXME intra
3125                         s->mv_dir= best_s.mv_dir;
3126                         s->mv_type = MV_TYPE_16X16;
3127                         s->mb_intra= best_s.mb_intra;
3128                         s->mv[0][0][0] = best_s.mv[0][0][0];
3129                         s->mv[0][0][1] = best_s.mv[0][0][1];
3130                         s->mv[1][0][0] = best_s.mv[1][0][0];
3131                         s->mv[1][0][1] = best_s.mv[1][0][1];
3132
3133                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3134                         for(; qpi<4; qpi++){
3135                             int dquant= dquant_tab[qpi];
3136                             qp= last_qp + dquant;
3137                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3138                                 continue;
3139                             backup_s.dquant= dquant;
3140                             if(storecoefs){
3141                                 for(i=0; i<6; i++){
3142                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3143                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3144                                 }
3145                             }
3146
3147                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3148                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3149                             if(best_s.qscale != qp){
3150                                 if(storecoefs){
3151                                     for(i=0; i<6; i++){
3152                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3153                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3154                                     }
3155                                 }
3156                             }
3157                         }
3158                     }
3159                 }
3160                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3161                     int mx= s->b_direct_mv_table[xy][0];
3162                     int my= s->b_direct_mv_table[xy][1];
3163
3164                     backup_s.dquant = 0;
3165                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3166                     s->mb_intra= 0;
3167                     ff_mpeg4_set_direct_mv(s, mx, my);
3168                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3169                                  &dmin, &next_block, mx, my);
3170                 }
3171                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3172                     backup_s.dquant = 0;
3173                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3174                     s->mb_intra= 0;
3175                     ff_mpeg4_set_direct_mv(s, 0, 0);
3176                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3177                                  &dmin, &next_block, 0, 0);
3178                 }
3179                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3180                     int coded=0;
3181                     for(i=0; i<6; i++)
3182                         coded |= s->block_last_index[i];
3183                     if(coded){
3184                         int mx,my;
3185                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3186                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3187                             mx=my=0; //FIXME find the one we actually used
3188                             ff_mpeg4_set_direct_mv(s, mx, my);
3189                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3190                             mx= s->mv[1][0][0];
3191                             my= s->mv[1][0][1];
3192                         }else{
3193                             mx= s->mv[0][0][0];
3194                             my= s->mv[0][0][1];
3195                         }
3196
3197                         s->mv_dir= best_s.mv_dir;
3198                         s->mv_type = best_s.mv_type;
3199                         s->mb_intra= 0;
3200 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3201                         s->mv[0][0][1] = best_s.mv[0][0][1];
3202                         s->mv[1][0][0] = best_s.mv[1][0][0];
3203                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3204                         backup_s.dquant= 0;
3205                         s->skipdct=1;
3206                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3207                                         &dmin, &next_block, mx, my);
3208                         s->skipdct=0;
3209                     }
3210                 }
3211
3212                 s->current_picture.qscale_table[xy] = best_s.qscale;
3213
3214                 copy_context_after_encode(s, &best_s, -1);
3215
3216                 pb_bits_count= put_bits_count(&s->pb);
3217                 flush_put_bits(&s->pb);
3218                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3219                 s->pb= backup_s.pb;
3220
3221                 if(s->data_partitioning){
3222                     pb2_bits_count= put_bits_count(&s->pb2);
3223                     flush_put_bits(&s->pb2);
3224                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3225                     s->pb2= backup_s.pb2;
3226
3227                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3228                     flush_put_bits(&s->tex_pb);
3229                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3230                     s->tex_pb= backup_s.tex_pb;
3231                 }
3232                 s->last_bits= put_bits_count(&s->pb);
3233
3234                 if (CONFIG_H263_ENCODER &&
3235                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3236                     ff_h263_update_motion_val(s);
3237
3238                 if(next_block==0){ //FIXME 16 vs linesize16
3239                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3240                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3241                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3242                 }
3243
3244                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3245                     ff_mpv_decode_mb(s, s->block);
3246             } else {
3247                 int motion_x = 0, motion_y = 0;
3248                 s->mv_type=MV_TYPE_16X16;
3249                 // only one MB-Type possible
3250
3251                 switch(mb_type){
3252                 case CANDIDATE_MB_TYPE_INTRA:
3253                     s->mv_dir = 0;
3254                     s->mb_intra= 1;
3255                     motion_x= s->mv[0][0][0] = 0;
3256                     motion_y= s->mv[0][0][1] = 0;
3257                     break;
3258                 case CANDIDATE_MB_TYPE_INTER:
3259                     s->mv_dir = MV_DIR_FORWARD;
3260                     s->mb_intra= 0;
3261                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3262                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3263                     break;
3264                 case CANDIDATE_MB_TYPE_INTER_I:
3265                     s->mv_dir = MV_DIR_FORWARD;
3266                     s->mv_type = MV_TYPE_FIELD;
3267                     s->mb_intra= 0;
3268                     for(i=0; i<2; i++){
3269                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3270                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3271                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3272                     }
3273                     break;
3274                 case CANDIDATE_MB_TYPE_INTER4V:
3275                     s->mv_dir = MV_DIR_FORWARD;
3276                     s->mv_type = MV_TYPE_8X8;
3277                     s->mb_intra= 0;
3278                     for(i=0; i<4; i++){
3279                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3280                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3281                     }
3282                     break;
3283                 case CANDIDATE_MB_TYPE_DIRECT:
3284                     if (CONFIG_MPEG4_ENCODER) {
3285                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3286                         s->mb_intra= 0;
3287                         motion_x=s->b_direct_mv_table[xy][0];
3288                         motion_y=s->b_direct_mv_table[xy][1];
3289                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3290                     }
3291                     break;
3292                 case CANDIDATE_MB_TYPE_DIRECT0:
3293                     if (CONFIG_MPEG4_ENCODER) {
3294                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3295                         s->mb_intra= 0;
3296                         ff_mpeg4_set_direct_mv(s, 0, 0);
3297                     }
3298                     break;
3299                 case CANDIDATE_MB_TYPE_BIDIR:
3300                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3301                     s->mb_intra= 0;
3302                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3303                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3304                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3305                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3306                     break;
3307                 case CANDIDATE_MB_TYPE_BACKWARD:
3308                     s->mv_dir = MV_DIR_BACKWARD;
3309                     s->mb_intra= 0;
3310                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3311                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3312                     break;
3313                 case CANDIDATE_MB_TYPE_FORWARD:
3314                     s->mv_dir = MV_DIR_FORWARD;
3315                     s->mb_intra= 0;
3316                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3317                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3318                     break;
3319                 case CANDIDATE_MB_TYPE_FORWARD_I:
3320                     s->mv_dir = MV_DIR_FORWARD;
3321                     s->mv_type = MV_TYPE_FIELD;
3322                     s->mb_intra= 0;
3323                     for(i=0; i<2; i++){
3324                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3325                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3326                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3327                     }
3328                     break;
3329                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3330                     s->mv_dir = MV_DIR_BACKWARD;
3331                     s->mv_type = MV_TYPE_FIELD;
3332                     s->mb_intra= 0;
3333                     for(i=0; i<2; i++){
3334                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3335                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3336                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3337                     }
3338                     break;
3339                 case CANDIDATE_MB_TYPE_BIDIR_I:
3340                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3341                     s->mv_type = MV_TYPE_FIELD;
3342                     s->mb_intra= 0;
3343                     for(dir=0; dir<2; dir++){
3344                         for(i=0; i<2; i++){
3345                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3346                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3347                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3348                         }
3349                     }
3350                     break;
3351                 default:
3352                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3353                 }
3354
3355                 encode_mb(s, motion_x, motion_y);
3356
3357                 // RAL: Update last macroblock type
3358                 s->last_mv_dir = s->mv_dir;
3359
3360                 if (CONFIG_H263_ENCODER &&
3361                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3362                     ff_h263_update_motion_val(s);
3363
3364                 ff_mpv_decode_mb(s, s->block);
3365             }
3366
3367             /* clean the MV table in IPS frames for direct mode in B frames */
3368             if(s->mb_intra /* && I,P,S_TYPE */){
3369                 s->p_mv_table[xy][0]=0;
3370                 s->p_mv_table[xy][1]=0;
3371             }
3372
3373             if (s->avctx->flags & CODEC_FLAG_PSNR) {
3374                 int w= 16;
3375                 int h= 16;
3376
3377                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3378                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3379
3380                 s->current_picture.error[0] += sse(
3381                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3382                     s->dest[0], w, h, s->linesize);
3383                 s->current_picture.error[1] += sse(
3384                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3385                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3386                 s->current_picture.error[2] += sse(
3387                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3388                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3389             }
3390             if(s->loop_filter){
3391                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3392                     ff_h263_loop_filter(s);
3393             }
3394             ff_dlog(s->avctx, "MB %d %d bits\n",
3395                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3396         }
3397     }
3398
3399     //not beautiful here but we must write it before flushing so it has to be here
3400     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3401         ff_msmpeg4_encode_ext_header(s);
3402
3403     write_slice_end(s);
3404
3405     /* Send the last GOB if RTP */
3406     if (s->avctx->rtp_callback) {
3407         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3408         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3409         /* Call the RTP callback to send the last GOB */
3410         emms_c();
3411         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3412     }
3413
3414     return 0;
3415 }
3416
3417 #define MERGE(field) dst->field += src->field; src->field=0
3418 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3419     MERGE(me.scene_change_score);
3420     MERGE(me.mc_mb_var_sum_temp);
3421     MERGE(me.mb_var_sum_temp);
3422 }
3423
3424 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3425     int i;
3426
3427     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3428     MERGE(dct_count[1]);
3429     MERGE(mv_bits);
3430     MERGE(i_tex_bits);
3431     MERGE(p_tex_bits);
3432     MERGE(i_count);
3433     MERGE(f_count);
3434     MERGE(b_count);
3435     MERGE(skip_count);
3436     MERGE(misc_bits);
3437     MERGE(er.error_count);
3438     MERGE(padding_bug_score);
3439     MERGE(current_picture.error[0]);
3440     MERGE(current_picture.error[1]);
3441     MERGE(current_picture.error[2]);
3442
3443     if(dst->avctx->noise_reduction){
3444         for(i=0; i<64; i++){
3445             MERGE(dct_error_sum[0][i]);
3446             MERGE(dct_error_sum[1][i]);
3447         }
3448     }
3449
3450     assert(put_bits_count(&src->pb) % 8 ==0);
3451     assert(put_bits_count(&dst->pb) % 8 ==0);
3452     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3453     flush_put_bits(&dst->pb);
3454 }
3455
3456 static int estimate_qp(MpegEncContext *s, int dry_run){
3457     if (s->next_lambda){
3458         s->current_picture_ptr->f->quality =
3459         s->current_picture.f->quality = s->next_lambda;
3460         if(!dry_run) s->next_lambda= 0;
3461     } else if (!s->fixed_qscale) {
3462         s->current_picture_ptr->f->quality =
3463         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3464         if (s->current_picture.f->quality < 0)
3465             return -1;
3466     }
3467
3468     if(s->adaptive_quant){
3469         switch(s->codec_id){
3470         case AV_CODEC_ID_MPEG4:
3471             if (CONFIG_MPEG4_ENCODER)
3472                 ff_clean_mpeg4_qscales(s);
3473             break;
3474         case AV_CODEC_ID_H263:
3475         case AV_CODEC_ID_H263P:
3476         case AV_CODEC_ID_FLV1:
3477             if (CONFIG_H263_ENCODER)
3478                 ff_clean_h263_qscales(s);
3479             break;
3480         default:
3481             ff_init_qscale_tab(s);
3482         }
3483
3484         s->lambda= s->lambda_table[0];
3485         //FIXME broken
3486     }else
3487         s->lambda = s->current_picture.f->quality;
3488     update_qscale(s);
3489     return 0;
3490 }
3491
3492 /* must be called before writing the header */
3493 static void set_frame_distances(MpegEncContext * s){
3494     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3495     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3496
3497     if(s->pict_type==AV_PICTURE_TYPE_B){
3498         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3499         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3500     }else{
3501         s->pp_time= s->time - s->last_non_b_time;
3502         s->last_non_b_time= s->time;
3503         assert(s->picture_number==0 || s->pp_time > 0);
3504     }
3505 }
3506
3507 static int encode_picture(MpegEncContext *s, int picture_number)
3508 {
3509     int i, ret;
3510     int bits;
3511     int context_count = s->slice_context_count;
3512
3513     s->picture_number = picture_number;
3514
3515     /* Reset the average MB variance */
3516     s->me.mb_var_sum_temp    =
3517     s->me.mc_mb_var_sum_temp = 0;
3518
3519     /* we need to initialize some time vars before we can encode b-frames */
3520     // RAL: Condition added for MPEG1VIDEO
3521     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3522         set_frame_distances(s);
3523     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3524         ff_set_mpeg4_time(s);
3525
3526     s->me.scene_change_score=0;
3527
3528 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3529
3530     if(s->pict_type==AV_PICTURE_TYPE_I){
3531         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3532         else                        s->no_rounding=0;
3533     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3534         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3535             s->no_rounding ^= 1;
3536     }
3537
3538     if (s->avctx->flags & CODEC_FLAG_PASS2) {
3539         if (estimate_qp(s,1) < 0)
3540             return -1;
3541         ff_get_2pass_fcode(s);
3542     } else if (!(s->avctx->flags & CODEC_FLAG_QSCALE)) {
3543         if(s->pict_type==AV_PICTURE_TYPE_B)
3544             s->lambda= s->last_lambda_for[s->pict_type];
3545         else
3546             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3547         update_qscale(s);
3548     }
3549
3550     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3551         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3552         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3553         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3554         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3555     }
3556
3557     s->mb_intra=0; //for the rate distortion & bit compare functions
3558     for(i=1; i<context_count; i++){
3559         ret = ff_update_duplicate_context(s->thread_context[i], s);
3560         if (ret < 0)
3561             return ret;
3562     }
3563
3564     if(ff_init_me(s)<0)
3565         return -1;
3566
3567     /* Estimate motion for every MB */
3568     if(s->pict_type != AV_PICTURE_TYPE_I){
3569         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3570         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3571         if (s->pict_type != AV_PICTURE_TYPE_B) {
3572             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3573                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3574             }
3575         }
3576
3577         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3578     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3579         /* I-Frame */
3580         for(i=0; i<s->mb_stride*s->mb_height; i++)
3581             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3582
3583         if(!s->fixed_qscale){
3584             /* finding spatial complexity for I-frame rate control */
3585             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3586         }
3587     }
3588     for(i=1; i<context_count; i++){
3589         merge_context_after_me(s, s->thread_context[i]);
3590     }
3591     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3592     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3593     emms_c();
3594
3595     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3596         s->pict_type= AV_PICTURE_TYPE_I;
3597         for(i=0; i<s->mb_stride*s->mb_height; i++)
3598             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3599         if(s->msmpeg4_version >= 3)
3600             s->no_rounding=1;
3601         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3602                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3603     }
3604
3605     if(!s->umvplus){
3606         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3607             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3608
3609             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3610                 int a,b;
3611                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3612                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3613                 s->f_code= FFMAX3(s->f_code, a, b);
3614             }
3615
3616             ff_fix_long_p_mvs(s);
3617             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3618             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3619                 int j;
3620                 for(i=0; i<2; i++){
3621                     for(j=0; j<2; j++)
3622                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3623                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3624                 }
3625             }
3626         }
3627
3628         if(s->pict_type==AV_PICTURE_TYPE_B){
3629             int a, b;
3630
3631             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3632             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3633             s->f_code = FFMAX(a, b);
3634
3635             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3636             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3637             s->b_code = FFMAX(a, b);
3638
3639             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3640             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3641             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3642             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3643             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3644                 int dir, j;
3645                 for(dir=0; dir<2; dir++){
3646                     for(i=0; i<2; i++){
3647                         for(j=0; j<2; j++){
3648                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3649                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3650                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3651                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3652                         }
3653                     }
3654                 }
3655             }
3656         }
3657     }
3658
3659     if (estimate_qp(s, 0) < 0)
3660         return -1;
3661
3662     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3663         s->pict_type == AV_PICTURE_TYPE_I &&
3664         !(s->avctx->flags & CODEC_FLAG_QSCALE))
3665         s->qscale= 3; //reduce clipping problems
3666
3667     if (s->out_format == FMT_MJPEG) {
3668         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3669         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3670
3671         if (s->avctx->intra_matrix) {
3672             chroma_matrix =
3673             luma_matrix = s->avctx->intra_matrix;
3674         }
3675         if (s->avctx->chroma_intra_matrix)
3676             chroma_matrix = s->avctx->chroma_intra_matrix;
3677
3678         /* for mjpeg, we do include qscale in the matrix */
3679         for(i=1;i<64;i++){
3680             int j = s->idsp.idct_permutation[i];
3681
3682             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3683             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3684         }
3685         s->y_dc_scale_table=
3686         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3687         s->chroma_intra_matrix[0] =
3688         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3689         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3690                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3691         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3692                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3693         s->qscale= 8;
3694     }
3695     if(s->codec_id == AV_CODEC_ID_AMV){
3696         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3697         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3698         for(i=1;i<64;i++){
3699             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3700
3701             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3702             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3703         }
3704         s->y_dc_scale_table= y;
3705         s->c_dc_scale_table= c;
3706         s->intra_matrix[0] = 13;
3707         s->chroma_intra_matrix[0] = 14;
3708         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3709                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3710         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3711                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3712         s->qscale= 8;
3713     }
3714
3715     //FIXME var duplication
3716     s->current_picture_ptr->f->key_frame =
3717     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3718     s->current_picture_ptr->f->pict_type =
3719     s->current_picture.f->pict_type = s->pict_type;
3720
3721     if (s->current_picture.f->key_frame)
3722         s->picture_in_gop_number=0;
3723
3724     s->mb_x = s->mb_y = 0;
3725     s->last_bits= put_bits_count(&s->pb);
3726     switch(s->out_format) {
3727     case FMT_MJPEG:
3728         if (CONFIG_MJPEG_ENCODER)
3729             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3730                                            s->intra_matrix, s->chroma_intra_matrix);
3731         break;
3732     case FMT_H261:
3733         if (CONFIG_H261_ENCODER)
3734             ff_h261_encode_picture_header(s, picture_number);
3735         break;
3736     case FMT_H263:
3737         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3738             ff_wmv2_encode_picture_header(s, picture_number);
3739         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3740             ff_msmpeg4_encode_picture_header(s, picture_number);
3741         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3742             ff_mpeg4_encode_picture_header(s, picture_number);
3743         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3744             ret = ff_rv10_encode_picture_header(s, picture_number);
3745             if (ret < 0)
3746                 return ret;
3747         }
3748         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3749             ff_rv20_encode_picture_header(s, picture_number);
3750         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3751             ff_flv_encode_picture_header(s, picture_number);
3752         else if (CONFIG_H263_ENCODER)
3753             ff_h263_encode_picture_header(s, picture_number);
3754         break;
3755     case FMT_MPEG1:
3756         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3757             ff_mpeg1_encode_picture_header(s, picture_number);
3758         break;
3759     default:
3760         av_assert0(0);
3761     }
3762     bits= put_bits_count(&s->pb);
3763     s->header_bits= bits - s->last_bits;
3764
3765     for(i=1; i<context_count; i++){
3766         update_duplicate_context_after_me(s->thread_context[i], s);
3767     }
3768     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3769     for(i=1; i<context_count; i++){
3770         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3771             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3772         merge_context_after_encode(s, s->thread_context[i]);
3773     }
3774     emms_c();
3775     return 0;
3776 }
3777
3778 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3779     const int intra= s->mb_intra;
3780     int i;
3781
3782     s->dct_count[intra]++;
3783
3784     for(i=0; i<64; i++){
3785         int level= block[i];
3786
3787         if(level){
3788             if(level>0){
3789                 s->dct_error_sum[intra][i] += level;
3790                 level -= s->dct_offset[intra][i];
3791                 if(level<0) level=0;
3792             }else{
3793                 s->dct_error_sum[intra][i] -= level;
3794                 level += s->dct_offset[intra][i];
3795                 if(level>0) level=0;
3796             }
3797             block[i]= level;
3798         }
3799     }
3800 }
3801
3802 static int dct_quantize_trellis_c(MpegEncContext *s,
3803                                   int16_t *block, int n,
3804                                   int qscale, int *overflow){
3805     const int *qmat;
3806     const uint16_t *matrix;
3807     const uint8_t *scantable= s->intra_scantable.scantable;
3808     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3809     int max=0;
3810     unsigned int threshold1, threshold2;
3811     int bias=0;
3812     int run_tab[65];
3813     int level_tab[65];
3814     int score_tab[65];
3815     int survivor[65];
3816     int survivor_count;
3817     int last_run=0;
3818     int last_level=0;
3819     int last_score= 0;
3820     int last_i;
3821     int coeff[2][64];
3822     int coeff_count[64];
3823     int qmul, qadd, start_i, last_non_zero, i, dc;
3824     const int esc_length= s->ac_esc_length;
3825     uint8_t * length;
3826     uint8_t * last_length;
3827     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3828
3829     s->fdsp.fdct(block);
3830
3831     if(s->dct_error_sum)
3832         s->denoise_dct(s, block);
3833     qmul= qscale*16;
3834     qadd= ((qscale-1)|1)*8;
3835
3836     if (s->mb_intra) {
3837         int q;
3838         if (!s->h263_aic) {
3839             if (n < 4)
3840                 q = s->y_dc_scale;
3841             else
3842                 q = s->c_dc_scale;
3843             q = q << 3;
3844         } else{
3845             /* For AIC we skip quant/dequant of INTRADC */
3846             q = 1 << 3;
3847             qadd=0;
3848         }
3849
3850         /* note: block[0] is assumed to be positive */
3851         block[0] = (block[0] + (q >> 1)) / q;
3852         start_i = 1;
3853         last_non_zero = 0;
3854         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3855         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3856         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3857             bias= 1<<(QMAT_SHIFT-1);
3858
3859         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3860             length     = s->intra_chroma_ac_vlc_length;
3861             last_length= s->intra_chroma_ac_vlc_last_length;
3862         } else {
3863             length     = s->intra_ac_vlc_length;
3864             last_length= s->intra_ac_vlc_last_length;
3865         }
3866     } else {
3867         start_i = 0;
3868         last_non_zero = -1;
3869         qmat = s->q_inter_matrix[qscale];
3870         matrix = s->inter_matrix;
3871         length     = s->inter_ac_vlc_length;
3872         last_length= s->inter_ac_vlc_last_length;
3873     }
3874     last_i= start_i;
3875
3876     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3877     threshold2= (threshold1<<1);
3878
3879     for(i=63; i>=start_i; i--) {
3880         const int j = scantable[i];
3881         int level = block[j] * qmat[j];
3882
3883         if(((unsigned)(level+threshold1))>threshold2){
3884             last_non_zero = i;
3885             break;
3886         }
3887     }
3888
3889     for(i=start_i; i<=last_non_zero; i++) {
3890         const int j = scantable[i];
3891         int level = block[j] * qmat[j];
3892
3893 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3894 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3895         if(((unsigned)(level+threshold1))>threshold2){
3896             if(level>0){
3897                 level= (bias + level)>>QMAT_SHIFT;
3898                 coeff[0][i]= level;
3899                 coeff[1][i]= level-1;
3900 //                coeff[2][k]= level-2;
3901             }else{
3902                 level= (bias - level)>>QMAT_SHIFT;
3903                 coeff[0][i]= -level;
3904                 coeff[1][i]= -level+1;
3905 //                coeff[2][k]= -level+2;
3906             }
3907             coeff_count[i]= FFMIN(level, 2);
3908             av_assert2(coeff_count[i]);
3909             max |=level;
3910         }else{
3911             coeff[0][i]= (level>>31)|1;
3912             coeff_count[i]= 1;
3913         }
3914     }
3915
3916     *overflow= s->max_qcoeff < max; //overflow might have happened
3917
3918     if(last_non_zero < start_i){
3919         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3920         return last_non_zero;
3921     }
3922
3923     score_tab[start_i]= 0;
3924     survivor[0]= start_i;
3925     survivor_count= 1;
3926
3927     for(i=start_i; i<=last_non_zero; i++){
3928         int level_index, j, zero_distortion;
3929         int dct_coeff= FFABS(block[ scantable[i] ]);
3930         int best_score=256*256*256*120;
3931
3932         if (s->fdsp.fdct == ff_fdct_ifast)
3933             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3934         zero_distortion= dct_coeff*dct_coeff;
3935
3936         for(level_index=0; level_index < coeff_count[i]; level_index++){
3937             int distortion;
3938             int level= coeff[level_index][i];
3939             const int alevel= FFABS(level);
3940             int unquant_coeff;
3941
3942             av_assert2(level);
3943
3944             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3945                 unquant_coeff= alevel*qmul + qadd;
3946             } else if(s->out_format == FMT_MJPEG) {
3947                 j = s->idsp.idct_permutation[scantable[i]];
3948                 unquant_coeff = alevel * matrix[j] * 8;
3949             }else{ //MPEG1
3950                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3951                 if(s->mb_intra){
3952                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3953                         unquant_coeff =   (unquant_coeff - 1) | 1;
3954                 }else{
3955                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3956                         unquant_coeff =   (unquant_coeff - 1) | 1;
3957                 }
3958                 unquant_coeff<<= 3;
3959             }
3960
3961             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3962             level+=64;
3963             if((level&(~127)) == 0){
3964                 for(j=survivor_count-1; j>=0; j--){
3965                     int run= i - survivor[j];
3966                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3967                     score += score_tab[i-run];
3968
3969                     if(score < best_score){
3970                         best_score= score;
3971                         run_tab[i+1]= run;
3972                         level_tab[i+1]= level-64;
3973                     }
3974                 }
3975
3976                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3977                     for(j=survivor_count-1; j>=0; j--){
3978                         int run= i - survivor[j];
3979                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3980                         score += score_tab[i-run];
3981                         if(score < last_score){
3982                             last_score= score;
3983                             last_run= run;
3984                             last_level= level-64;
3985                             last_i= i+1;
3986                         }
3987                     }
3988                 }
3989             }else{
3990                 distortion += esc_length*lambda;
3991                 for(j=survivor_count-1; j>=0; j--){
3992                     int run= i - survivor[j];
3993                     int score= distortion + score_tab[i-run];
3994
3995                     if(score < best_score){
3996                         best_score= score;
3997                         run_tab[i+1]= run;
3998                         level_tab[i+1]= level-64;
3999                     }
4000                 }
4001
4002                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4003                   for(j=survivor_count-1; j>=0; j--){
4004                         int run= i - survivor[j];
4005                         int score= distortion + score_tab[i-run];
4006                         if(score < last_score){
4007                             last_score= score;
4008                             last_run= run;
4009                             last_level= level-64;
4010                             last_i= i+1;
4011                         }
4012                     }
4013                 }
4014             }
4015         }
4016
4017         score_tab[i+1]= best_score;
4018
4019         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4020         if(last_non_zero <= 27){
4021             for(; survivor_count; survivor_count--){
4022                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4023                     break;
4024             }
4025         }else{
4026             for(; survivor_count; survivor_count--){
4027                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4028                     break;
4029             }
4030         }
4031
4032         survivor[ survivor_count++ ]= i+1;
4033     }
4034
4035     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4036         last_score= 256*256*256*120;
4037         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4038             int score= score_tab[i];
4039             if(i) score += lambda*2; //FIXME exacter?
4040
4041             if(score < last_score){
4042                 last_score= score;
4043                 last_i= i;
4044                 last_level= level_tab[i];
4045                 last_run= run_tab[i];
4046             }
4047         }
4048     }
4049
4050     s->coded_score[n] = last_score;
4051
4052     dc= FFABS(block[0]);
4053     last_non_zero= last_i - 1;
4054     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4055
4056     if(last_non_zero < start_i)
4057         return last_non_zero;
4058
4059     if(last_non_zero == 0 && start_i == 0){
4060         int best_level= 0;
4061         int best_score= dc * dc;
4062
4063         for(i=0; i<coeff_count[0]; i++){
4064             int level= coeff[i][0];
4065             int alevel= FFABS(level);
4066             int unquant_coeff, score, distortion;
4067
4068             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4069                     unquant_coeff= (alevel*qmul + qadd)>>3;
4070             }else{ //MPEG1
4071                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4072                     unquant_coeff =   (unquant_coeff - 1) | 1;
4073             }
4074             unquant_coeff = (unquant_coeff + 4) >> 3;
4075             unquant_coeff<<= 3 + 3;
4076
4077             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4078             level+=64;
4079             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4080             else                    score= distortion + esc_length*lambda;
4081
4082             if(score < best_score){
4083                 best_score= score;
4084                 best_level= level - 64;
4085             }
4086         }
4087         block[0]= best_level;
4088         s->coded_score[n] = best_score - dc*dc;
4089         if(best_level == 0) return -1;
4090         else                return last_non_zero;
4091     }
4092
4093     i= last_i;
4094     av_assert2(last_level);
4095
4096     block[ perm_scantable[last_non_zero] ]= last_level;
4097     i -= last_run + 1;
4098
4099     for(; i>start_i; i -= run_tab[i] + 1){
4100         block[ perm_scantable[i-1] ]= level_tab[i];
4101     }
4102
4103     return last_non_zero;
4104 }
4105
4106 //#define REFINE_STATS 1
4107 static int16_t basis[64][64];
4108
4109 static void build_basis(uint8_t *perm){
4110     int i, j, x, y;
4111     emms_c();
4112     for(i=0; i<8; i++){
4113         for(j=0; j<8; j++){
4114             for(y=0; y<8; y++){
4115                 for(x=0; x<8; x++){
4116                     double s= 0.25*(1<<BASIS_SHIFT);
4117                     int index= 8*i + j;
4118                     int perm_index= perm[index];
4119                     if(i==0) s*= sqrt(0.5);
4120                     if(j==0) s*= sqrt(0.5);
4121                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4122                 }
4123             }
4124         }
4125     }
4126 }
4127
4128 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4129                         int16_t *block, int16_t *weight, int16_t *orig,
4130                         int n, int qscale){
4131     int16_t rem[64];
4132     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4133     const uint8_t *scantable= s->intra_scantable.scantable;
4134     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4135 //    unsigned int threshold1, threshold2;
4136 //    int bias=0;
4137     int run_tab[65];
4138     int prev_run=0;
4139     int prev_level=0;
4140     int qmul, qadd, start_i, last_non_zero, i, dc;
4141     uint8_t * length;
4142     uint8_t * last_length;
4143     int lambda;
4144     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4145 #ifdef REFINE_STATS
4146 static int count=0;
4147 static int after_last=0;
4148 static int to_zero=0;
4149 static int from_zero=0;
4150 static int raise=0;
4151 static int lower=0;
4152 static int messed_sign=0;
4153 #endif
4154
4155     if(basis[0][0] == 0)
4156         build_basis(s->idsp.idct_permutation);
4157
4158     qmul= qscale*2;
4159     qadd= (qscale-1)|1;
4160     if (s->mb_intra) {
4161         if (!s->h263_aic) {
4162             if (n < 4)
4163                 q = s->y_dc_scale;
4164             else
4165                 q = s->c_dc_scale;
4166         } else{
4167             /* For AIC we skip quant/dequant of INTRADC */
4168             q = 1;
4169             qadd=0;
4170         }
4171         q <<= RECON_SHIFT-3;
4172         /* note: block[0] is assumed to be positive */
4173         dc= block[0]*q;
4174 //        block[0] = (block[0] + (q >> 1)) / q;
4175         start_i = 1;
4176 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4177 //            bias= 1<<(QMAT_SHIFT-1);
4178         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4179             length     = s->intra_chroma_ac_vlc_length;
4180             last_length= s->intra_chroma_ac_vlc_last_length;
4181         } else {
4182             length     = s->intra_ac_vlc_length;
4183             last_length= s->intra_ac_vlc_last_length;
4184         }
4185     } else {
4186         dc= 0;
4187         start_i = 0;
4188         length     = s->inter_ac_vlc_length;
4189         last_length= s->inter_ac_vlc_last_length;
4190     }
4191     last_non_zero = s->block_last_index[n];
4192
4193 #ifdef REFINE_STATS
4194 {START_TIMER
4195 #endif
4196     dc += (1<<(RECON_SHIFT-1));
4197     for(i=0; i<64; i++){
4198         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4199     }
4200 #ifdef REFINE_STATS
4201 STOP_TIMER("memset rem[]")}
4202 #endif
4203     sum=0;
4204     for(i=0; i<64; i++){
4205         int one= 36;
4206         int qns=4;
4207         int w;
4208
4209         w= FFABS(weight[i]) + qns*one;
4210         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4211
4212         weight[i] = w;
4213 //        w=weight[i] = (63*qns + (w/2)) / w;
4214
4215         av_assert2(w>0);
4216         av_assert2(w<(1<<6));
4217         sum += w*w;
4218     }
4219     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4220 #ifdef REFINE_STATS
4221 {START_TIMER
4222 #endif
4223     run=0;
4224     rle_index=0;
4225     for(i=start_i; i<=last_non_zero; i++){
4226         int j= perm_scantable[i];
4227         const int level= block[j];
4228         int coeff;
4229
4230         if(level){
4231             if(level<0) coeff= qmul*level - qadd;
4232             else        coeff= qmul*level + qadd;
4233             run_tab[rle_index++]=run;
4234             run=0;
4235
4236             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4237         }else{
4238             run++;
4239         }
4240     }
4241 #ifdef REFINE_STATS
4242 if(last_non_zero>0){
4243 STOP_TIMER("init rem[]")
4244 }
4245 }
4246
4247 {START_TIMER
4248 #endif
4249     for(;;){
4250         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4251         int best_coeff=0;
4252         int best_change=0;
4253         int run2, best_unquant_change=0, analyze_gradient;
4254 #ifdef REFINE_STATS
4255 {START_TIMER
4256 #endif
4257         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4258
4259         if(analyze_gradient){
4260 #ifdef REFINE_STATS
4261 {START_TIMER
4262 #endif
4263             for(i=0; i<64; i++){
4264                 int w= weight[i];
4265
4266                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4267             }
4268 #ifdef REFINE_STATS
4269 STOP_TIMER("rem*w*w")}
4270 {START_TIMER
4271 #endif
4272             s->fdsp.fdct(d1);
4273 #ifdef REFINE_STATS
4274 STOP_TIMER("dct")}
4275 #endif
4276         }
4277
4278         if(start_i){
4279             const int level= block[0];
4280             int change, old_coeff;
4281
4282             av_assert2(s->mb_intra);
4283
4284             old_coeff= q*level;
4285
4286             for(change=-1; change<=1; change+=2){
4287                 int new_level= level + change;
4288                 int score, new_coeff;
4289
4290                 new_coeff= q*new_level;
4291                 if(new_coeff >= 2048 || new_coeff < 0)
4292                     continue;
4293
4294                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4295                                                   new_coeff - old_coeff);
4296                 if(score<best_score){
4297                     best_score= score;
4298                     best_coeff= 0;
4299                     best_change= change;
4300                     best_unquant_change= new_coeff - old_coeff;
4301                 }
4302             }
4303         }
4304
4305         run=0;
4306         rle_index=0;
4307         run2= run_tab[rle_index++];
4308         prev_level=0;
4309         prev_run=0;
4310
4311         for(i=start_i; i<64; i++){
4312             int j= perm_scantable[i];
4313             const int level= block[j];
4314             int change, old_coeff;
4315
4316             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4317                 break;
4318
4319             if(level){
4320                 if(level<0) old_coeff= qmul*level - qadd;
4321                 else        old_coeff= qmul*level + qadd;
4322                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4323             }else{
4324                 old_coeff=0;
4325                 run2--;
4326                 av_assert2(run2>=0 || i >= last_non_zero );
4327             }
4328
4329             for(change=-1; change<=1; change+=2){
4330                 int new_level= level + change;
4331                 int score, new_coeff, unquant_change;
4332
4333                 score=0;
4334                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4335                    continue;
4336
4337                 if(new_level){
4338                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4339                     else            new_coeff= qmul*new_level + qadd;
4340                     if(new_coeff >= 2048 || new_coeff <= -2048)
4341                         continue;
4342                     //FIXME check for overflow
4343
4344                     if(level){
4345                         if(level < 63 && level > -63){
4346                             if(i < last_non_zero)
4347                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4348                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4349                             else
4350                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4351                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4352                         }
4353                     }else{
4354                         av_assert2(FFABS(new_level)==1);
4355
4356                         if(analyze_gradient){
4357                             int g= d1[ scantable[i] ];
4358                             if(g && (g^new_level) >= 0)
4359                                 continue;
4360                         }
4361
4362                         if(i < last_non_zero){
4363                             int next_i= i + run2 + 1;
4364                             int next_level= block[ perm_scantable[next_i] ] + 64;
4365
4366                             if(next_level&(~127))
4367                                 next_level= 0;
4368
4369                             if(next_i < last_non_zero)
4370                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4371                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4372                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4373                             else
4374                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4375                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4376                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4377                         }else{
4378                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4379                             if(prev_level){
4380                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4381                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4382                             }
4383                         }
4384                     }
4385                 }else{
4386                     new_coeff=0;
4387                     av_assert2(FFABS(level)==1);
4388
4389                     if(i < last_non_zero){
4390                         int next_i= i + run2 + 1;
4391                         int next_level= block[ perm_scantable[next_i] ] + 64;
4392
4393                         if(next_level&(~127))
4394                             next_level= 0;
4395
4396                         if(next_i < last_non_zero)
4397                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4398                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4399                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4400                         else
4401                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4402                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4403                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4404                     }else{
4405                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4406                         if(prev_level){
4407                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4408                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4409                         }
4410                     }
4411                 }
4412
4413                 score *= lambda;
4414
4415                 unquant_change= new_coeff - old_coeff;
4416                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4417
4418                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4419                                                    unquant_change);
4420                 if(score<best_score){
4421                     best_score= score;
4422                     best_coeff= i;
4423                     best_change= change;
4424                     best_unquant_change= unquant_change;
4425                 }
4426             }
4427             if(level){
4428                 prev_level= level + 64;
4429                 if(prev_level&(~127))
4430                     prev_level= 0;
4431                 prev_run= run;
4432                 run=0;
4433             }else{
4434                 run++;
4435             }
4436         }
4437 #ifdef REFINE_STATS
4438 STOP_TIMER("iterative step")}
4439 #endif
4440
4441         if(best_change){
4442             int j= perm_scantable[ best_coeff ];
4443
4444             block[j] += best_change;
4445
4446             if(best_coeff > last_non_zero){
4447                 last_non_zero= best_coeff;
4448                 av_assert2(block[j]);
4449 #ifdef REFINE_STATS
4450 after_last++;
4451 #endif
4452             }else{
4453 #ifdef REFINE_STATS
4454 if(block[j]){
4455     if(block[j] - best_change){
4456         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4457             raise++;
4458         }else{
4459             lower++;
4460         }
4461     }else{
4462         from_zero++;
4463     }
4464 }else{
4465     to_zero++;
4466 }
4467 #endif
4468                 for(; last_non_zero>=start_i; last_non_zero--){
4469                     if(block[perm_scantable[last_non_zero]])
4470                         break;
4471                 }
4472             }
4473 #ifdef REFINE_STATS
4474 count++;
4475 if(256*256*256*64 % count == 0){
4476     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4477 }
4478 #endif
4479             run=0;
4480             rle_index=0;
4481             for(i=start_i; i<=last_non_zero; i++){
4482                 int j= perm_scantable[i];
4483                 const int level= block[j];
4484
4485                  if(level){
4486                      run_tab[rle_index++]=run;
4487                      run=0;
4488                  }else{
4489                      run++;
4490                  }
4491             }
4492
4493             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4494         }else{
4495             break;
4496         }
4497     }
4498 #ifdef REFINE_STATS
4499 if(last_non_zero>0){
4500 STOP_TIMER("iterative search")
4501 }
4502 }
4503 #endif
4504
4505     return last_non_zero;
4506 }
4507
4508 /**
4509  * Permute an 8x8 block according to permuatation.
4510  * @param block the block which will be permuted according to
4511  *              the given permutation vector
4512  * @param permutation the permutation vector
4513  * @param last the last non zero coefficient in scantable order, used to
4514  *             speed the permutation up
4515  * @param scantable the used scantable, this is only used to speed the
4516  *                  permutation up, the block is not (inverse) permutated
4517  *                  to scantable order!
4518  */
4519 static void block_permute(int16_t *block, uint8_t *permutation,
4520                           const uint8_t *scantable, int last)
4521 {
4522     int i;
4523     int16_t temp[64];
4524
4525     if (last <= 0)
4526         return;
4527     //FIXME it is ok but not clean and might fail for some permutations
4528     // if (permutation[1] == 1)
4529     // return;
4530
4531     for (i = 0; i <= last; i++) {
4532         const int j = scantable[i];
4533         temp[j] = block[j];
4534         block[j] = 0;
4535     }
4536
4537     for (i = 0; i <= last; i++) {
4538         const int j = scantable[i];
4539         const int perm_j = permutation[j];
4540         block[perm_j] = temp[j];
4541     }
4542 }
4543
4544 int ff_dct_quantize_c(MpegEncContext *s,
4545                         int16_t *block, int n,
4546                         int qscale, int *overflow)
4547 {
4548     int i, j, level, last_non_zero, q, start_i;
4549     const int *qmat;
4550     const uint8_t *scantable= s->intra_scantable.scantable;
4551     int bias;
4552     int max=0;
4553     unsigned int threshold1, threshold2;
4554
4555     s->fdsp.fdct(block);
4556
4557     if(s->dct_error_sum)
4558         s->denoise_dct(s, block);
4559
4560     if (s->mb_intra) {
4561         if (!s->h263_aic) {
4562             if (n < 4)
4563                 q = s->y_dc_scale;
4564             else
4565                 q = s->c_dc_scale;
4566             q = q << 3;
4567         } else
4568             /* For AIC we skip quant/dequant of INTRADC */
4569             q = 1 << 3;
4570
4571         /* note: block[0] is assumed to be positive */
4572         block[0] = (block[0] + (q >> 1)) / q;
4573         start_i = 1;
4574         last_non_zero = 0;
4575         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4576         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4577     } else {
4578         start_i = 0;
4579         last_non_zero = -1;
4580         qmat = s->q_inter_matrix[qscale];
4581         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4582     }
4583     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4584     threshold2= (threshold1<<1);
4585     for(i=63;i>=start_i;i--) {
4586         j = scantable[i];
4587         level = block[j] * qmat[j];
4588
4589         if(((unsigned)(level+threshold1))>threshold2){
4590             last_non_zero = i;
4591             break;
4592         }else{
4593             block[j]=0;
4594         }
4595     }
4596     for(i=start_i; i<=last_non_zero; i++) {
4597         j = scantable[i];
4598         level = block[j] * qmat[j];
4599
4600 //        if(   bias+level >= (1<<QMAT_SHIFT)
4601 //           || bias-level >= (1<<QMAT_SHIFT)){
4602         if(((unsigned)(level+threshold1))>threshold2){
4603             if(level>0){
4604                 level= (bias + level)>>QMAT_SHIFT;
4605                 block[j]= level;
4606             }else{
4607                 level= (bias - level)>>QMAT_SHIFT;
4608                 block[j]= -level;
4609             }
4610             max |=level;
4611         }else{
4612             block[j]=0;
4613         }
4614     }
4615     *overflow= s->max_qcoeff < max; //overflow might have happened
4616
4617     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4618     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4619         block_permute(block, s->idsp.idct_permutation,
4620                       scantable, last_non_zero);
4621
4622     return last_non_zero;
4623 }
4624
4625 #define OFFSET(x) offsetof(MpegEncContext, x)
4626 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4627 static const AVOption h263_options[] = {
4628     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4629     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4630     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4631     FF_MPV_COMMON_OPTS
4632     { NULL },
4633 };
4634
4635 static const AVClass h263_class = {
4636     .class_name = "H.263 encoder",
4637     .item_name  = av_default_item_name,
4638     .option     = h263_options,
4639     .version    = LIBAVUTIL_VERSION_INT,
4640 };
4641
4642 AVCodec ff_h263_encoder = {
4643     .name           = "h263",
4644     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4645     .type           = AVMEDIA_TYPE_VIDEO,
4646     .id             = AV_CODEC_ID_H263,
4647     .priv_data_size = sizeof(MpegEncContext),
4648     .init           = ff_mpv_encode_init,
4649     .encode2        = ff_mpv_encode_picture,
4650     .close          = ff_mpv_encode_end,
4651     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4652     .priv_class     = &h263_class,
4653 };
4654
4655 static const AVOption h263p_options[] = {
4656     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4657     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4658     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4659     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4660     FF_MPV_COMMON_OPTS
4661     { NULL },
4662 };
4663 static const AVClass h263p_class = {
4664     .class_name = "H.263p encoder",
4665     .item_name  = av_default_item_name,
4666     .option     = h263p_options,
4667     .version    = LIBAVUTIL_VERSION_INT,
4668 };
4669
4670 AVCodec ff_h263p_encoder = {
4671     .name           = "h263p",
4672     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4673     .type           = AVMEDIA_TYPE_VIDEO,
4674     .id             = AV_CODEC_ID_H263P,
4675     .priv_data_size = sizeof(MpegEncContext),
4676     .init           = ff_mpv_encode_init,
4677     .encode2        = ff_mpv_encode_picture,
4678     .close          = ff_mpv_encode_end,
4679     .capabilities   = CODEC_CAP_SLICE_THREADS,
4680     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4681     .priv_class     = &h263p_class,
4682 };
4683
4684 static const AVClass msmpeg4v2_class = {
4685     .class_name = "msmpeg4v2 encoder",
4686     .item_name  = av_default_item_name,
4687     .option     = ff_mpv_generic_options,
4688     .version    = LIBAVUTIL_VERSION_INT,
4689 };
4690
4691 AVCodec ff_msmpeg4v2_encoder = {
4692     .name           = "msmpeg4v2",
4693     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4694     .type           = AVMEDIA_TYPE_VIDEO,
4695     .id             = AV_CODEC_ID_MSMPEG4V2,
4696     .priv_data_size = sizeof(MpegEncContext),
4697     .init           = ff_mpv_encode_init,
4698     .encode2        = ff_mpv_encode_picture,
4699     .close          = ff_mpv_encode_end,
4700     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4701     .priv_class     = &msmpeg4v2_class,
4702 };
4703
4704 static const AVClass msmpeg4v3_class = {
4705     .class_name = "msmpeg4v3 encoder",
4706     .item_name  = av_default_item_name,
4707     .option     = ff_mpv_generic_options,
4708     .version    = LIBAVUTIL_VERSION_INT,
4709 };
4710
4711 AVCodec ff_msmpeg4v3_encoder = {
4712     .name           = "msmpeg4",
4713     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4714     .type           = AVMEDIA_TYPE_VIDEO,
4715     .id             = AV_CODEC_ID_MSMPEG4V3,
4716     .priv_data_size = sizeof(MpegEncContext),
4717     .init           = ff_mpv_encode_init,
4718     .encode2        = ff_mpv_encode_picture,
4719     .close          = ff_mpv_encode_end,
4720     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4721     .priv_class     = &msmpeg4v3_class,
4722 };
4723
4724 static const AVClass wmv1_class = {
4725     .class_name = "wmv1 encoder",
4726     .item_name  = av_default_item_name,
4727     .option     = ff_mpv_generic_options,
4728     .version    = LIBAVUTIL_VERSION_INT,
4729 };
4730
4731 AVCodec ff_wmv1_encoder = {
4732     .name           = "wmv1",
4733     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4734     .type           = AVMEDIA_TYPE_VIDEO,
4735     .id             = AV_CODEC_ID_WMV1,
4736     .priv_data_size = sizeof(MpegEncContext),
4737     .init           = ff_mpv_encode_init,
4738     .encode2        = ff_mpv_encode_picture,
4739     .close          = ff_mpv_encode_end,
4740     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4741     .priv_class     = &wmv1_class,
4742 };