]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit '083cbc930d077651ea7e3fbc32ec45352cfed7e7'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "mpegvideodata.h"
44 #include "h261.h"
45 #include "h263.h"
46 #include "h263data.h"
47 #include "mjpegenc_common.h"
48 #include "mathops.h"
49 #include "mpegutils.h"
50 #include "mjpegenc.h"
51 #include "msmpeg4.h"
52 #include "pixblockdsp.h"
53 #include "qpeldsp.h"
54 #include "faandct.h"
55 #include "thread.h"
56 #include "aandcttab.h"
57 #include "flv.h"
58 #include "mpeg4video.h"
59 #include "internal.h"
60 #include "bytestream.h"
61 #include "wmv2.h"
62 #include "rv10.h"
63 #include <limits.h>
64 #include "sp5x.h"
65
66 #define QUANT_BIAS_SHIFT 8
67
68 #define QMAT_SHIFT_MMX 16
69 #define QMAT_SHIFT 21
70
71 static int encode_picture(MpegEncContext *s, int picture_number);
72 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
73 static int sse_mb(MpegEncContext *s);
74 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
75 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
76
77 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
78 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
79
80 const AVOption ff_mpv_generic_options[] = {
81     FF_MPV_COMMON_OPTS
82     { NULL },
83 };
84
85 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
86                        uint16_t (*qmat16)[2][64],
87                        const uint16_t *quant_matrix,
88                        int bias, int qmin, int qmax, int intra)
89 {
90     FDCTDSPContext *fdsp = &s->fdsp;
91     int qscale;
92     int shift = 0;
93
94     for (qscale = qmin; qscale <= qmax; qscale++) {
95         int i;
96         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
97 #if CONFIG_FAANDCT
98             fdsp->fdct == ff_faandct            ||
99 #endif /* CONFIG_FAANDCT */
100             fdsp->fdct == ff_jpeg_fdct_islow_10) {
101             for (i = 0; i < 64; i++) {
102                 const int j = s->idsp.idct_permutation[i];
103                 int64_t den = (int64_t) qscale * quant_matrix[j];
104                 /* 16 <= qscale * quant_matrix[i] <= 7905
105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
106                  *             19952 <=              x  <= 249205026
107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
108                  *           3444240 >= (1 << 36) / (x) >= 275 */
109
110                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
111             }
112         } else if (fdsp->fdct == ff_fdct_ifast) {
113             for (i = 0; i < 64; i++) {
114                 const int j = s->idsp.idct_permutation[i];
115                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
116                 /* 16 <= qscale * quant_matrix[i] <= 7905
117                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
118                  *             19952 <=              x  <= 249205026
119                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
120                  *           3444240 >= (1 << 36) / (x) >= 275 */
121
122                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
123             }
124         } else {
125             for (i = 0; i < 64; i++) {
126                 const int j = s->idsp.idct_permutation[i];
127                 int64_t den = (int64_t) qscale * quant_matrix[j];
128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
129                  * Assume x = qscale * quant_matrix[i]
130                  * So             16 <=              x  <= 7905
131                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
132                  * so          32768 >= (1 << 19) / (x) >= 67 */
133                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
134                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
135                 //                    (qscale * quant_matrix[i]);
136                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
137
138                 if (qmat16[qscale][0][i] == 0 ||
139                     qmat16[qscale][0][i] == 128 * 256)
140                     qmat16[qscale][0][i] = 128 * 256 - 1;
141                 qmat16[qscale][1][i] =
142                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
143                                 qmat16[qscale][0][i]);
144             }
145         }
146
147         for (i = intra; i < 64; i++) {
148             int64_t max = 8191;
149             if (fdsp->fdct == ff_fdct_ifast) {
150                 max = (8191LL * ff_aanscales[i]) >> 14;
151             }
152             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
153                 shift++;
154             }
155         }
156     }
157     if (shift) {
158         av_log(NULL, AV_LOG_INFO,
159                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
160                QMAT_SHIFT - shift);
161     }
162 }
163
164 static inline void update_qscale(MpegEncContext *s)
165 {
166     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
167                 (FF_LAMBDA_SHIFT + 7);
168     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
169
170     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
171                  FF_LAMBDA_SHIFT;
172 }
173
174 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
175 {
176     int i;
177
178     if (matrix) {
179         put_bits(pb, 1, 1);
180         for (i = 0; i < 64; i++) {
181             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
182         }
183     } else
184         put_bits(pb, 1, 0);
185 }
186
187 /**
188  * init s->current_picture.qscale_table from s->lambda_table
189  */
190 void ff_init_qscale_tab(MpegEncContext *s)
191 {
192     int8_t * const qscale_table = s->current_picture.qscale_table;
193     int i;
194
195     for (i = 0; i < s->mb_num; i++) {
196         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
197         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
198         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
199                                                   s->avctx->qmax);
200     }
201 }
202
203 static void update_duplicate_context_after_me(MpegEncContext *dst,
204                                               MpegEncContext *src)
205 {
206 #define COPY(a) dst->a= src->a
207     COPY(pict_type);
208     COPY(current_picture);
209     COPY(f_code);
210     COPY(b_code);
211     COPY(qscale);
212     COPY(lambda);
213     COPY(lambda2);
214     COPY(picture_in_gop_number);
215     COPY(gop_picture_number);
216     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
217     COPY(progressive_frame);    // FIXME don't set in encode_header
218     COPY(partitioned_frame);    // FIXME don't set in encode_header
219 #undef COPY
220 }
221
222 /**
223  * Set the given MpegEncContext to defaults for encoding.
224  * the changed fields will not depend upon the prior state of the MpegEncContext.
225  */
226 static void mpv_encode_defaults(MpegEncContext *s)
227 {
228     int i;
229     ff_mpv_common_defaults(s);
230
231     for (i = -16; i < 16; i++) {
232         default_fcode_tab[i + MAX_MV] = 1;
233     }
234     s->me.mv_penalty = default_mv_penalty;
235     s->fcode_tab     = default_fcode_tab;
236
237     s->input_picture_number  = 0;
238     s->picture_in_gop_number = 0;
239 }
240
241 av_cold int ff_dct_encode_init(MpegEncContext *s) {
242     if (ARCH_X86)
243         ff_dct_encode_init_x86(s);
244
245     if (CONFIG_H263_ENCODER)
246         ff_h263dsp_init(&s->h263dsp);
247     if (!s->dct_quantize)
248         s->dct_quantize = ff_dct_quantize_c;
249     if (!s->denoise_dct)
250         s->denoise_dct  = denoise_dct_c;
251     s->fast_dct_quantize = s->dct_quantize;
252     if (s->avctx->trellis)
253         s->dct_quantize  = dct_quantize_trellis_c;
254
255     return 0;
256 }
257
258 /* init video encoder */
259 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
260 {
261     MpegEncContext *s = avctx->priv_data;
262     int i, ret, format_supported;
263
264     mpv_encode_defaults(s);
265
266     switch (avctx->codec_id) {
267     case AV_CODEC_ID_MPEG2VIDEO:
268         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
269             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
270             av_log(avctx, AV_LOG_ERROR,
271                    "only YUV420 and YUV422 are supported\n");
272             return -1;
273         }
274         break;
275     case AV_CODEC_ID_MJPEG:
276     case AV_CODEC_ID_AMV:
277         format_supported = 0;
278         /* JPEG color space */
279         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
280             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
281             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
282             (avctx->color_range == AVCOL_RANGE_JPEG &&
283              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
284               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
285               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
286             format_supported = 1;
287         /* MPEG color space */
288         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
289                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
290                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
291                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
292             format_supported = 1;
293
294         if (!format_supported) {
295             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
296             return -1;
297         }
298         break;
299     default:
300         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
301             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
302             return -1;
303         }
304     }
305
306     switch (avctx->pix_fmt) {
307     case AV_PIX_FMT_YUVJ444P:
308     case AV_PIX_FMT_YUV444P:
309         s->chroma_format = CHROMA_444;
310         break;
311     case AV_PIX_FMT_YUVJ422P:
312     case AV_PIX_FMT_YUV422P:
313         s->chroma_format = CHROMA_422;
314         break;
315     case AV_PIX_FMT_YUVJ420P:
316     case AV_PIX_FMT_YUV420P:
317     default:
318         s->chroma_format = CHROMA_420;
319         break;
320     }
321
322     s->bit_rate = avctx->bit_rate;
323     s->width    = avctx->width;
324     s->height   = avctx->height;
325     if (avctx->gop_size > 600 &&
326         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
327         av_log(avctx, AV_LOG_WARNING,
328                "keyframe interval too large!, reducing it from %d to %d\n",
329                avctx->gop_size, 600);
330         avctx->gop_size = 600;
331     }
332     s->gop_size     = avctx->gop_size;
333     s->avctx        = avctx;
334     if (avctx->max_b_frames > MAX_B_FRAMES) {
335         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
336                "is %d.\n", MAX_B_FRAMES);
337         avctx->max_b_frames = MAX_B_FRAMES;
338     }
339     s->max_b_frames = avctx->max_b_frames;
340     s->codec_id     = avctx->codec->id;
341     s->strict_std_compliance = avctx->strict_std_compliance;
342     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
343     s->mpeg_quant         = avctx->mpeg_quant;
344     s->rtp_mode           = !!avctx->rtp_payload_size;
345     s->intra_dc_precision = avctx->intra_dc_precision;
346
347     // workaround some differences between how applications specify dc precision
348     if (s->intra_dc_precision < 0) {
349         s->intra_dc_precision += 8;
350     } else if (s->intra_dc_precision >= 8)
351         s->intra_dc_precision -= 8;
352
353     if (s->intra_dc_precision < 0) {
354         av_log(avctx, AV_LOG_ERROR,
355                 "intra dc precision must be positive, note some applications use"
356                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
357         return AVERROR(EINVAL);
358     }
359
360     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
361         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
362         return AVERROR(EINVAL);
363     }
364     s->user_specified_pts = AV_NOPTS_VALUE;
365
366     if (s->gop_size <= 1) {
367         s->intra_only = 1;
368         s->gop_size   = 12;
369     } else {
370         s->intra_only = 0;
371     }
372
373     s->me_method = avctx->me_method;
374
375     /* Fixed QSCALE */
376     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
377
378 #if FF_API_MPV_OPT
379     FF_DISABLE_DEPRECATION_WARNINGS
380     if (avctx->border_masking != 0.0)
381         s->border_masking = avctx->border_masking;
382     FF_ENABLE_DEPRECATION_WARNINGS
383 #endif
384
385     s->adaptive_quant = (s->avctx->lumi_masking ||
386                          s->avctx->dark_masking ||
387                          s->avctx->temporal_cplx_masking ||
388                          s->avctx->spatial_cplx_masking  ||
389                          s->avctx->p_masking      ||
390                          s->border_masking ||
391                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
392                         !s->fixed_qscale;
393
394     s->loop_filter = !!(s->avctx->flags & CODEC_FLAG_LOOP_FILTER);
395
396     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
397         switch(avctx->codec_id) {
398         case AV_CODEC_ID_MPEG1VIDEO:
399         case AV_CODEC_ID_MPEG2VIDEO:
400             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
401             break;
402         case AV_CODEC_ID_MPEG4:
403         case AV_CODEC_ID_MSMPEG4V1:
404         case AV_CODEC_ID_MSMPEG4V2:
405         case AV_CODEC_ID_MSMPEG4V3:
406             if       (avctx->rc_max_rate >= 15000000) {
407                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
408             } else if(avctx->rc_max_rate >=  2000000) {
409                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
410             } else if(avctx->rc_max_rate >=   384000) {
411                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
412             } else
413                 avctx->rc_buffer_size = 40;
414             avctx->rc_buffer_size *= 16384;
415             break;
416         }
417         if (avctx->rc_buffer_size) {
418             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
419         }
420     }
421
422     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
423         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
424         return -1;
425     }
426
427     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
428         av_log(avctx, AV_LOG_INFO,
429                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
430     }
431
432     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
433         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
434         return -1;
435     }
436
437     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
438         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
439         return -1;
440     }
441
442     if (avctx->rc_max_rate &&
443         avctx->rc_max_rate == avctx->bit_rate &&
444         avctx->rc_max_rate != avctx->rc_min_rate) {
445         av_log(avctx, AV_LOG_INFO,
446                "impossible bitrate constraints, this will fail\n");
447     }
448
449     if (avctx->rc_buffer_size &&
450         avctx->bit_rate * (int64_t)avctx->time_base.num >
451             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
452         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
453         return -1;
454     }
455
456     if (!s->fixed_qscale &&
457         avctx->bit_rate * av_q2d(avctx->time_base) >
458             avctx->bit_rate_tolerance) {
459         av_log(avctx, AV_LOG_WARNING,
460                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
461         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
462     }
463
464     if (s->avctx->rc_max_rate &&
465         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
466         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
467          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
468         90000LL * (avctx->rc_buffer_size - 1) >
469             s->avctx->rc_max_rate * 0xFFFFLL) {
470         av_log(avctx, AV_LOG_INFO,
471                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
472                "specified vbv buffer is too large for the given bitrate!\n");
473     }
474
475     if ((s->avctx->flags & CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
476         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
477         s->codec_id != AV_CODEC_ID_FLV1) {
478         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
479         return -1;
480     }
481
482     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
483         av_log(avctx, AV_LOG_ERROR,
484                "OBMC is only supported with simple mb decision\n");
485         return -1;
486     }
487
488     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
489         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
490         return -1;
491     }
492
493     if (s->max_b_frames                    &&
494         s->codec_id != AV_CODEC_ID_MPEG4      &&
495         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
496         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
497         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
498         return -1;
499     }
500     if (s->max_b_frames < 0) {
501         av_log(avctx, AV_LOG_ERROR,
502                "max b frames must be 0 or positive for mpegvideo based encoders\n");
503         return -1;
504     }
505
506     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
507          s->codec_id == AV_CODEC_ID_H263  ||
508          s->codec_id == AV_CODEC_ID_H263P) &&
509         (avctx->sample_aspect_ratio.num > 255 ||
510          avctx->sample_aspect_ratio.den > 255)) {
511         av_log(avctx, AV_LOG_WARNING,
512                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
513                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
514         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
515                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
516     }
517
518     if ((s->codec_id == AV_CODEC_ID_H263  ||
519          s->codec_id == AV_CODEC_ID_H263P) &&
520         (avctx->width  > 2048 ||
521          avctx->height > 1152 )) {
522         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
523         return -1;
524     }
525     if ((s->codec_id == AV_CODEC_ID_H263  ||
526          s->codec_id == AV_CODEC_ID_H263P) &&
527         ((avctx->width &3) ||
528          (avctx->height&3) )) {
529         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
530         return -1;
531     }
532
533     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
534         (avctx->width  > 4095 ||
535          avctx->height > 4095 )) {
536         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
537         return -1;
538     }
539
540     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
541         (avctx->width  > 16383 ||
542          avctx->height > 16383 )) {
543         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
544         return -1;
545     }
546
547     if (s->codec_id == AV_CODEC_ID_RV10 &&
548         (avctx->width &15 ||
549          avctx->height&15 )) {
550         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
551         return AVERROR(EINVAL);
552     }
553
554     if (s->codec_id == AV_CODEC_ID_RV20 &&
555         (avctx->width &3 ||
556          avctx->height&3 )) {
557         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
558         return AVERROR(EINVAL);
559     }
560
561     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
562          s->codec_id == AV_CODEC_ID_WMV2) &&
563          avctx->width & 1) {
564          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
565          return -1;
566     }
567
568     if ((s->avctx->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
569         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
570         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
571         return -1;
572     }
573
574     // FIXME mpeg2 uses that too
575     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
576                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
577         av_log(avctx, AV_LOG_ERROR,
578                "mpeg2 style quantization not supported by codec\n");
579         return -1;
580     }
581
582     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
583         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
584         return -1;
585     }
586
587     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
588         s->avctx->mb_decision != FF_MB_DECISION_RD) {
589         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
590         return -1;
591     }
592
593     if (s->avctx->scenechange_threshold < 1000000000 &&
594         (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)) {
595         av_log(avctx, AV_LOG_ERROR,
596                "closed gop with scene change detection are not supported yet, "
597                "set threshold to 1000000000\n");
598         return -1;
599     }
600
601     if (s->avctx->flags & CODEC_FLAG_LOW_DELAY) {
602         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
603             av_log(avctx, AV_LOG_ERROR,
604                   "low delay forcing is only available for mpeg2\n");
605             return -1;
606         }
607         if (s->max_b_frames != 0) {
608             av_log(avctx, AV_LOG_ERROR,
609                    "b frames cannot be used with low delay\n");
610             return -1;
611         }
612     }
613
614     if (s->q_scale_type == 1) {
615         if (avctx->qmax > 12) {
616             av_log(avctx, AV_LOG_ERROR,
617                    "non linear quant only supports qmax <= 12 currently\n");
618             return -1;
619         }
620     }
621
622     if (s->avctx->thread_count > 1         &&
623         s->codec_id != AV_CODEC_ID_MPEG4      &&
624         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
625         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
626         s->codec_id != AV_CODEC_ID_MJPEG      &&
627         (s->codec_id != AV_CODEC_ID_H263P)) {
628         av_log(avctx, AV_LOG_ERROR,
629                "multi threaded encoding not supported by codec\n");
630         return -1;
631     }
632
633     if (s->avctx->thread_count < 1) {
634         av_log(avctx, AV_LOG_ERROR,
635                "automatic thread number detection not supported by codec, "
636                "patch welcome\n");
637         return -1;
638     }
639
640     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
641         s->rtp_mode = 1;
642
643     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
644         s->h263_slice_structured = 1;
645
646     if (!avctx->time_base.den || !avctx->time_base.num) {
647         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
648         return -1;
649     }
650
651     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
652         av_log(avctx, AV_LOG_INFO,
653                "notice: b_frame_strategy only affects the first pass\n");
654         avctx->b_frame_strategy = 0;
655     }
656
657     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
658     if (i > 1) {
659         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
660         avctx->time_base.den /= i;
661         avctx->time_base.num /= i;
662         //return -1;
663     }
664
665     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
666         // (a + x * 3 / 8) / x
667         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
668         s->inter_quant_bias = 0;
669     } else {
670         s->intra_quant_bias = 0;
671         // (a - x / 4) / x
672         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
673     }
674
675     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
676         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
677         return AVERROR(EINVAL);
678     }
679
680 #if FF_API_QUANT_BIAS
681 FF_DISABLE_DEPRECATION_WARNINGS
682     if (s->intra_quant_bias == FF_DEFAULT_QUANT_BIAS &&
683         avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
684         s->intra_quant_bias = avctx->intra_quant_bias;
685     if (s->inter_quant_bias == FF_DEFAULT_QUANT_BIAS &&
686         avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
687         s->inter_quant_bias = avctx->inter_quant_bias;
688 FF_ENABLE_DEPRECATION_WARNINGS
689 #endif
690
691     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
692
693     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
694         s->avctx->time_base.den > (1 << 16) - 1) {
695         av_log(avctx, AV_LOG_ERROR,
696                "timebase %d/%d not supported by MPEG 4 standard, "
697                "the maximum admitted value for the timebase denominator "
698                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
699                (1 << 16) - 1);
700         return -1;
701     }
702     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
703
704     switch (avctx->codec->id) {
705     case AV_CODEC_ID_MPEG1VIDEO:
706         s->out_format = FMT_MPEG1;
707         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
708         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
709         break;
710     case AV_CODEC_ID_MPEG2VIDEO:
711         s->out_format = FMT_MPEG1;
712         s->low_delay  = !!(s->avctx->flags & CODEC_FLAG_LOW_DELAY);
713         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
714         s->rtp_mode   = 1;
715         break;
716     case AV_CODEC_ID_MJPEG:
717     case AV_CODEC_ID_AMV:
718         s->out_format = FMT_MJPEG;
719         s->intra_only = 1; /* force intra only for jpeg */
720         if (!CONFIG_MJPEG_ENCODER ||
721             ff_mjpeg_encode_init(s) < 0)
722             return -1;
723         avctx->delay = 0;
724         s->low_delay = 1;
725         break;
726     case AV_CODEC_ID_H261:
727         if (!CONFIG_H261_ENCODER)
728             return -1;
729         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
730             av_log(avctx, AV_LOG_ERROR,
731                    "The specified picture size of %dx%d is not valid for the "
732                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
733                     s->width, s->height);
734             return -1;
735         }
736         s->out_format = FMT_H261;
737         avctx->delay  = 0;
738         s->low_delay  = 1;
739         s->rtp_mode   = 0; /* Sliced encoding not supported */
740         break;
741     case AV_CODEC_ID_H263:
742         if (!CONFIG_H263_ENCODER)
743             return -1;
744         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
745                              s->width, s->height) == 8) {
746             av_log(avctx, AV_LOG_ERROR,
747                    "The specified picture size of %dx%d is not valid for "
748                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
749                    "352x288, 704x576, and 1408x1152. "
750                    "Try H.263+.\n", s->width, s->height);
751             return -1;
752         }
753         s->out_format = FMT_H263;
754         avctx->delay  = 0;
755         s->low_delay  = 1;
756         break;
757     case AV_CODEC_ID_H263P:
758         s->out_format = FMT_H263;
759         s->h263_plus  = 1;
760         /* Fx */
761         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
762         s->modified_quant  = s->h263_aic;
763         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
764         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
765
766         /* /Fx */
767         /* These are just to be sure */
768         avctx->delay = 0;
769         s->low_delay = 1;
770         break;
771     case AV_CODEC_ID_FLV1:
772         s->out_format      = FMT_H263;
773         s->h263_flv        = 2; /* format = 1; 11-bit codes */
774         s->unrestricted_mv = 1;
775         s->rtp_mode  = 0; /* don't allow GOB */
776         avctx->delay = 0;
777         s->low_delay = 1;
778         break;
779     case AV_CODEC_ID_RV10:
780         s->out_format = FMT_H263;
781         avctx->delay  = 0;
782         s->low_delay  = 1;
783         break;
784     case AV_CODEC_ID_RV20:
785         s->out_format      = FMT_H263;
786         avctx->delay       = 0;
787         s->low_delay       = 1;
788         s->modified_quant  = 1;
789         s->h263_aic        = 1;
790         s->h263_plus       = 1;
791         s->loop_filter     = 1;
792         s->unrestricted_mv = 0;
793         break;
794     case AV_CODEC_ID_MPEG4:
795         s->out_format      = FMT_H263;
796         s->h263_pred       = 1;
797         s->unrestricted_mv = 1;
798         s->low_delay       = s->max_b_frames ? 0 : 1;
799         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
800         break;
801     case AV_CODEC_ID_MSMPEG4V2:
802         s->out_format      = FMT_H263;
803         s->h263_pred       = 1;
804         s->unrestricted_mv = 1;
805         s->msmpeg4_version = 2;
806         avctx->delay       = 0;
807         s->low_delay       = 1;
808         break;
809     case AV_CODEC_ID_MSMPEG4V3:
810         s->out_format        = FMT_H263;
811         s->h263_pred         = 1;
812         s->unrestricted_mv   = 1;
813         s->msmpeg4_version   = 3;
814         s->flipflop_rounding = 1;
815         avctx->delay         = 0;
816         s->low_delay         = 1;
817         break;
818     case AV_CODEC_ID_WMV1:
819         s->out_format        = FMT_H263;
820         s->h263_pred         = 1;
821         s->unrestricted_mv   = 1;
822         s->msmpeg4_version   = 4;
823         s->flipflop_rounding = 1;
824         avctx->delay         = 0;
825         s->low_delay         = 1;
826         break;
827     case AV_CODEC_ID_WMV2:
828         s->out_format        = FMT_H263;
829         s->h263_pred         = 1;
830         s->unrestricted_mv   = 1;
831         s->msmpeg4_version   = 5;
832         s->flipflop_rounding = 1;
833         avctx->delay         = 0;
834         s->low_delay         = 1;
835         break;
836     default:
837         return -1;
838     }
839
840     avctx->has_b_frames = !s->low_delay;
841
842     s->encoding = 1;
843
844     s->progressive_frame    =
845     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
846                                                 CODEC_FLAG_INTERLACED_ME) ||
847                                 s->alternate_scan);
848
849     /* init */
850     ff_mpv_idct_init(s);
851     if (ff_mpv_common_init(s) < 0)
852         return -1;
853
854     ff_fdctdsp_init(&s->fdsp, avctx);
855     ff_me_cmp_init(&s->mecc, avctx);
856     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
857     ff_pixblockdsp_init(&s->pdsp, avctx);
858     ff_qpeldsp_init(&s->qdsp);
859
860     if (s->msmpeg4_version) {
861         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
862                           2 * 2 * (MAX_LEVEL + 1) *
863                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
864     }
865     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
866
867     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
868     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
869     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
870     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
871     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
872     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
873     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
874                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
875     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
876                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
877
878     if (s->avctx->noise_reduction) {
879         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
880                           2 * 64 * sizeof(uint16_t), fail);
881     }
882
883     ff_dct_encode_init(s);
884
885     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
886         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
887
888     s->quant_precision = 5;
889
890     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
891     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
892
893     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
894         ff_h261_encode_init(s);
895     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
896         ff_h263_encode_init(s);
897     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
898         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
899             return ret;
900     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
901         && s->out_format == FMT_MPEG1)
902         ff_mpeg1_encode_init(s);
903
904     /* init q matrix */
905     for (i = 0; i < 64; i++) {
906         int j = s->idsp.idct_permutation[i];
907         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
908             s->mpeg_quant) {
909             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
910             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
911         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
912             s->intra_matrix[j] =
913             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
914         } else {
915             /* mpeg1/2 */
916             s->chroma_intra_matrix[j] =
917             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
918             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
919         }
920         if (s->avctx->intra_matrix)
921             s->intra_matrix[j] = s->avctx->intra_matrix[i];
922         if (s->avctx->inter_matrix)
923             s->inter_matrix[j] = s->avctx->inter_matrix[i];
924     }
925
926     /* precompute matrix */
927     /* for mjpeg, we do include qscale in the matrix */
928     if (s->out_format != FMT_MJPEG) {
929         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
930                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
931                           31, 1);
932         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
933                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
934                           31, 0);
935     }
936
937     if (ff_rate_control_init(s) < 0)
938         return -1;
939
940 #if FF_API_ERROR_RATE
941     FF_DISABLE_DEPRECATION_WARNINGS
942     if (avctx->error_rate)
943         s->error_rate = avctx->error_rate;
944     FF_ENABLE_DEPRECATION_WARNINGS;
945 #endif
946
947 #if FF_API_NORMALIZE_AQP
948     FF_DISABLE_DEPRECATION_WARNINGS
949     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
950         s->mpv_flags |= FF_MPV_FLAG_NAQ;
951     FF_ENABLE_DEPRECATION_WARNINGS;
952 #endif
953
954 #if FF_API_MV0
955     FF_DISABLE_DEPRECATION_WARNINGS
956     if (avctx->flags & CODEC_FLAG_MV0)
957         s->mpv_flags |= FF_MPV_FLAG_MV0;
958     FF_ENABLE_DEPRECATION_WARNINGS
959 #endif
960
961 #if FF_API_MPV_OPT
962     FF_DISABLE_DEPRECATION_WARNINGS
963     if (avctx->rc_qsquish != 0.0)
964         s->rc_qsquish = avctx->rc_qsquish;
965     if (avctx->rc_qmod_amp != 0.0)
966         s->rc_qmod_amp = avctx->rc_qmod_amp;
967     if (avctx->rc_qmod_freq)
968         s->rc_qmod_freq = avctx->rc_qmod_freq;
969     if (avctx->rc_buffer_aggressivity != 1.0)
970         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
971     if (avctx->rc_initial_cplx != 0.0)
972         s->rc_initial_cplx = avctx->rc_initial_cplx;
973     if (avctx->lmin)
974         s->lmin = avctx->lmin;
975     if (avctx->lmax)
976         s->lmax = avctx->lmax;
977
978     if (avctx->rc_eq) {
979         av_freep(&s->rc_eq);
980         s->rc_eq = av_strdup(avctx->rc_eq);
981         if (!s->rc_eq)
982             return AVERROR(ENOMEM);
983     }
984     FF_ENABLE_DEPRECATION_WARNINGS
985 #endif
986
987     if (avctx->b_frame_strategy == 2) {
988         for (i = 0; i < s->max_b_frames + 2; i++) {
989             s->tmp_frames[i] = av_frame_alloc();
990             if (!s->tmp_frames[i])
991                 return AVERROR(ENOMEM);
992
993             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
994             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
995             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
996
997             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
998             if (ret < 0)
999                 return ret;
1000         }
1001     }
1002
1003     return 0;
1004 fail:
1005     ff_mpv_encode_end(avctx);
1006     return AVERROR_UNKNOWN;
1007 }
1008
1009 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1010 {
1011     MpegEncContext *s = avctx->priv_data;
1012     int i;
1013
1014     ff_rate_control_uninit(s);
1015
1016     ff_mpv_common_end(s);
1017     if (CONFIG_MJPEG_ENCODER &&
1018         s->out_format == FMT_MJPEG)
1019         ff_mjpeg_encode_close(s);
1020
1021     av_freep(&avctx->extradata);
1022
1023     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1024         av_frame_free(&s->tmp_frames[i]);
1025
1026     ff_free_picture_tables(&s->new_picture);
1027     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1028
1029     av_freep(&s->avctx->stats_out);
1030     av_freep(&s->ac_stats);
1031
1032     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1033     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1034     s->q_chroma_intra_matrix=   NULL;
1035     s->q_chroma_intra_matrix16= NULL;
1036     av_freep(&s->q_intra_matrix);
1037     av_freep(&s->q_inter_matrix);
1038     av_freep(&s->q_intra_matrix16);
1039     av_freep(&s->q_inter_matrix16);
1040     av_freep(&s->input_picture);
1041     av_freep(&s->reordered_input_picture);
1042     av_freep(&s->dct_offset);
1043
1044     return 0;
1045 }
1046
1047 static int get_sae(uint8_t *src, int ref, int stride)
1048 {
1049     int x,y;
1050     int acc = 0;
1051
1052     for (y = 0; y < 16; y++) {
1053         for (x = 0; x < 16; x++) {
1054             acc += FFABS(src[x + y * stride] - ref);
1055         }
1056     }
1057
1058     return acc;
1059 }
1060
1061 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1062                            uint8_t *ref, int stride)
1063 {
1064     int x, y, w, h;
1065     int acc = 0;
1066
1067     w = s->width  & ~15;
1068     h = s->height & ~15;
1069
1070     for (y = 0; y < h; y += 16) {
1071         for (x = 0; x < w; x += 16) {
1072             int offset = x + y * stride;
1073             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1074                                       stride, 16);
1075             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1076             int sae  = get_sae(src + offset, mean, stride);
1077
1078             acc += sae + 500 < sad;
1079         }
1080     }
1081     return acc;
1082 }
1083
1084 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1085 {
1086     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1087                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1088                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1089                             &s->linesize, &s->uvlinesize);
1090 }
1091
1092 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1093 {
1094     Picture *pic = NULL;
1095     int64_t pts;
1096     int i, display_picture_number = 0, ret;
1097     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1098                                                  (s->low_delay ? 0 : 1);
1099     int direct = 1;
1100
1101     if (pic_arg) {
1102         pts = pic_arg->pts;
1103         display_picture_number = s->input_picture_number++;
1104
1105         if (pts != AV_NOPTS_VALUE) {
1106             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1107                 int64_t last = s->user_specified_pts;
1108
1109                 if (pts <= last) {
1110                     av_log(s->avctx, AV_LOG_ERROR,
1111                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1112                            pts, last);
1113                     return AVERROR(EINVAL);
1114                 }
1115
1116                 if (!s->low_delay && display_picture_number == 1)
1117                     s->dts_delta = pts - last;
1118             }
1119             s->user_specified_pts = pts;
1120         } else {
1121             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1122                 s->user_specified_pts =
1123                 pts = s->user_specified_pts + 1;
1124                 av_log(s->avctx, AV_LOG_INFO,
1125                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1126                        pts);
1127             } else {
1128                 pts = display_picture_number;
1129             }
1130         }
1131     }
1132
1133     if (pic_arg) {
1134         if (!pic_arg->buf[0] ||
1135             pic_arg->linesize[0] != s->linesize ||
1136             pic_arg->linesize[1] != s->uvlinesize ||
1137             pic_arg->linesize[2] != s->uvlinesize)
1138             direct = 0;
1139         if ((s->width & 15) || (s->height & 15))
1140             direct = 0;
1141         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1142             direct = 0;
1143         if (s->linesize & (STRIDE_ALIGN-1))
1144             direct = 0;
1145
1146         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1147                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1148
1149         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1150         if (i < 0)
1151             return i;
1152
1153         pic = &s->picture[i];
1154         pic->reference = 3;
1155
1156         if (direct) {
1157             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1158                 return ret;
1159         }
1160         ret = alloc_picture(s, pic, direct);
1161         if (ret < 0)
1162             return ret;
1163
1164         if (!direct) {
1165             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1166                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1167                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1168                 // empty
1169             } else {
1170                 int h_chroma_shift, v_chroma_shift;
1171                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1172                                                  &h_chroma_shift,
1173                                                  &v_chroma_shift);
1174
1175                 for (i = 0; i < 3; i++) {
1176                     int src_stride = pic_arg->linesize[i];
1177                     int dst_stride = i ? s->uvlinesize : s->linesize;
1178                     int h_shift = i ? h_chroma_shift : 0;
1179                     int v_shift = i ? v_chroma_shift : 0;
1180                     int w = s->width  >> h_shift;
1181                     int h = s->height >> v_shift;
1182                     uint8_t *src = pic_arg->data[i];
1183                     uint8_t *dst = pic->f->data[i];
1184                     int vpad = 16;
1185
1186                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1187                         && !s->progressive_sequence
1188                         && FFALIGN(s->height, 32) - s->height > 16)
1189                         vpad = 32;
1190
1191                     if (!s->avctx->rc_buffer_size)
1192                         dst += INPLACE_OFFSET;
1193
1194                     if (src_stride == dst_stride)
1195                         memcpy(dst, src, src_stride * h);
1196                     else {
1197                         int h2 = h;
1198                         uint8_t *dst2 = dst;
1199                         while (h2--) {
1200                             memcpy(dst2, src, w);
1201                             dst2 += dst_stride;
1202                             src += src_stride;
1203                         }
1204                     }
1205                     if ((s->width & 15) || (s->height & (vpad-1))) {
1206                         s->mpvencdsp.draw_edges(dst, dst_stride,
1207                                                 w, h,
1208                                                 16 >> h_shift,
1209                                                 vpad >> v_shift,
1210                                                 EDGE_BOTTOM);
1211                     }
1212                 }
1213             }
1214         }
1215         ret = av_frame_copy_props(pic->f, pic_arg);
1216         if (ret < 0)
1217             return ret;
1218
1219         pic->f->display_picture_number = display_picture_number;
1220         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1221     }
1222
1223     /* shift buffer entries */
1224     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1225         s->input_picture[i - 1] = s->input_picture[i];
1226
1227     s->input_picture[encoding_delay] = (Picture*) pic;
1228
1229     return 0;
1230 }
1231
1232 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1233 {
1234     int x, y, plane;
1235     int score = 0;
1236     int64_t score64 = 0;
1237
1238     for (plane = 0; plane < 3; plane++) {
1239         const int stride = p->f->linesize[plane];
1240         const int bw = plane ? 1 : 2;
1241         for (y = 0; y < s->mb_height * bw; y++) {
1242             for (x = 0; x < s->mb_width * bw; x++) {
1243                 int off = p->shared ? 0 : 16;
1244                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1245                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1246                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1247
1248                 switch (FFABS(s->avctx->frame_skip_exp)) {
1249                 case 0: score    =  FFMAX(score, v);          break;
1250                 case 1: score   += FFABS(v);                  break;
1251                 case 2: score64 += v * (int64_t)v;                       break;
1252                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1253                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1254                 }
1255             }
1256         }
1257     }
1258     emms_c();
1259
1260     if (score)
1261         score64 = score;
1262     if (s->avctx->frame_skip_exp < 0)
1263         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1264                       -1.0/s->avctx->frame_skip_exp);
1265
1266     if (score64 < s->avctx->frame_skip_threshold)
1267         return 1;
1268     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1269         return 1;
1270     return 0;
1271 }
1272
1273 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1274 {
1275     AVPacket pkt = { 0 };
1276     int ret, got_output;
1277
1278     av_init_packet(&pkt);
1279     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1280     if (ret < 0)
1281         return ret;
1282
1283     ret = pkt.size;
1284     av_free_packet(&pkt);
1285     return ret;
1286 }
1287
1288 static int estimate_best_b_count(MpegEncContext *s)
1289 {
1290     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1291     AVCodecContext *c = avcodec_alloc_context3(NULL);
1292     const int scale = s->avctx->brd_scale;
1293     int i, j, out_size, p_lambda, b_lambda, lambda2;
1294     int64_t best_rd  = INT64_MAX;
1295     int best_b_count = -1;
1296
1297     if (!c)
1298         return AVERROR(ENOMEM);
1299     av_assert0(scale >= 0 && scale <= 3);
1300
1301     //emms_c();
1302     //s->next_picture_ptr->quality;
1303     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1304     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1305     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1306     if (!b_lambda) // FIXME we should do this somewhere else
1307         b_lambda = p_lambda;
1308     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1309                FF_LAMBDA_SHIFT;
1310
1311     c->width        = s->width  >> scale;
1312     c->height       = s->height >> scale;
1313     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1314     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1315     c->mb_decision  = s->avctx->mb_decision;
1316     c->me_cmp       = s->avctx->me_cmp;
1317     c->mb_cmp       = s->avctx->mb_cmp;
1318     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1319     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1320     c->time_base    = s->avctx->time_base;
1321     c->max_b_frames = s->max_b_frames;
1322
1323     if (avcodec_open2(c, codec, NULL) < 0)
1324         return -1;
1325
1326     for (i = 0; i < s->max_b_frames + 2; i++) {
1327         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1328                                                 s->next_picture_ptr;
1329         uint8_t *data[4];
1330
1331         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1332             pre_input = *pre_input_ptr;
1333             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1334
1335             if (!pre_input.shared && i) {
1336                 data[0] += INPLACE_OFFSET;
1337                 data[1] += INPLACE_OFFSET;
1338                 data[2] += INPLACE_OFFSET;
1339             }
1340
1341             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1342                                        s->tmp_frames[i]->linesize[0],
1343                                        data[0],
1344                                        pre_input.f->linesize[0],
1345                                        c->width, c->height);
1346             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1347                                        s->tmp_frames[i]->linesize[1],
1348                                        data[1],
1349                                        pre_input.f->linesize[1],
1350                                        c->width >> 1, c->height >> 1);
1351             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1352                                        s->tmp_frames[i]->linesize[2],
1353                                        data[2],
1354                                        pre_input.f->linesize[2],
1355                                        c->width >> 1, c->height >> 1);
1356         }
1357     }
1358
1359     for (j = 0; j < s->max_b_frames + 1; j++) {
1360         int64_t rd = 0;
1361
1362         if (!s->input_picture[j])
1363             break;
1364
1365         c->error[0] = c->error[1] = c->error[2] = 0;
1366
1367         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1368         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1369
1370         out_size = encode_frame(c, s->tmp_frames[0]);
1371
1372         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1373
1374         for (i = 0; i < s->max_b_frames + 1; i++) {
1375             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1376
1377             s->tmp_frames[i + 1]->pict_type = is_p ?
1378                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1379             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1380
1381             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1382
1383             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1384         }
1385
1386         /* get the delayed frames */
1387         while (out_size) {
1388             out_size = encode_frame(c, NULL);
1389             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1390         }
1391
1392         rd += c->error[0] + c->error[1] + c->error[2];
1393
1394         if (rd < best_rd) {
1395             best_rd = rd;
1396             best_b_count = j;
1397         }
1398     }
1399
1400     avcodec_close(c);
1401     av_freep(&c);
1402
1403     return best_b_count;
1404 }
1405
1406 static int select_input_picture(MpegEncContext *s)
1407 {
1408     int i, ret;
1409
1410     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1411         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1412     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1413
1414     /* set next picture type & ordering */
1415     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1416         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1417             if (s->picture_in_gop_number < s->gop_size &&
1418                 s->next_picture_ptr &&
1419                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1420                 // FIXME check that te gop check above is +-1 correct
1421                 av_frame_unref(s->input_picture[0]->f);
1422
1423                 ff_vbv_update(s, 0);
1424
1425                 goto no_output_pic;
1426             }
1427         }
1428
1429         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1430             !s->next_picture_ptr || s->intra_only) {
1431             s->reordered_input_picture[0] = s->input_picture[0];
1432             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1433             s->reordered_input_picture[0]->f->coded_picture_number =
1434                 s->coded_picture_number++;
1435         } else {
1436             int b_frames;
1437
1438             if (s->avctx->flags & CODEC_FLAG_PASS2) {
1439                 for (i = 0; i < s->max_b_frames + 1; i++) {
1440                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1441
1442                     if (pict_num >= s->rc_context.num_entries)
1443                         break;
1444                     if (!s->input_picture[i]) {
1445                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1446                         break;
1447                     }
1448
1449                     s->input_picture[i]->f->pict_type =
1450                         s->rc_context.entry[pict_num].new_pict_type;
1451                 }
1452             }
1453
1454             if (s->avctx->b_frame_strategy == 0) {
1455                 b_frames = s->max_b_frames;
1456                 while (b_frames && !s->input_picture[b_frames])
1457                     b_frames--;
1458             } else if (s->avctx->b_frame_strategy == 1) {
1459                 for (i = 1; i < s->max_b_frames + 1; i++) {
1460                     if (s->input_picture[i] &&
1461                         s->input_picture[i]->b_frame_score == 0) {
1462                         s->input_picture[i]->b_frame_score =
1463                             get_intra_count(s,
1464                                             s->input_picture[i    ]->f->data[0],
1465                                             s->input_picture[i - 1]->f->data[0],
1466                                             s->linesize) + 1;
1467                     }
1468                 }
1469                 for (i = 0; i < s->max_b_frames + 1; i++) {
1470                     if (!s->input_picture[i] ||
1471                         s->input_picture[i]->b_frame_score - 1 >
1472                             s->mb_num / s->avctx->b_sensitivity)
1473                         break;
1474                 }
1475
1476                 b_frames = FFMAX(0, i - 1);
1477
1478                 /* reset scores */
1479                 for (i = 0; i < b_frames + 1; i++) {
1480                     s->input_picture[i]->b_frame_score = 0;
1481                 }
1482             } else if (s->avctx->b_frame_strategy == 2) {
1483                 b_frames = estimate_best_b_count(s);
1484             } else {
1485                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1486                 b_frames = 0;
1487             }
1488
1489             emms_c();
1490
1491             for (i = b_frames - 1; i >= 0; i--) {
1492                 int type = s->input_picture[i]->f->pict_type;
1493                 if (type && type != AV_PICTURE_TYPE_B)
1494                     b_frames = i;
1495             }
1496             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1497                 b_frames == s->max_b_frames) {
1498                 av_log(s->avctx, AV_LOG_ERROR,
1499                        "warning, too many b frames in a row\n");
1500             }
1501
1502             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1503                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1504                     s->gop_size > s->picture_in_gop_number) {
1505                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1506                 } else {
1507                     if (s->avctx->flags & CODEC_FLAG_CLOSED_GOP)
1508                         b_frames = 0;
1509                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1510                 }
1511             }
1512
1513             if ((s->avctx->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1514                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1515                 b_frames--;
1516
1517             s->reordered_input_picture[0] = s->input_picture[b_frames];
1518             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1519                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1520             s->reordered_input_picture[0]->f->coded_picture_number =
1521                 s->coded_picture_number++;
1522             for (i = 0; i < b_frames; i++) {
1523                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1524                 s->reordered_input_picture[i + 1]->f->pict_type =
1525                     AV_PICTURE_TYPE_B;
1526                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1527                     s->coded_picture_number++;
1528             }
1529         }
1530     }
1531 no_output_pic:
1532     if (s->reordered_input_picture[0]) {
1533         s->reordered_input_picture[0]->reference =
1534            s->reordered_input_picture[0]->f->pict_type !=
1535                AV_PICTURE_TYPE_B ? 3 : 0;
1536
1537         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1538         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1539             return ret;
1540
1541         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1542             // input is a shared pix, so we can't modifiy it -> alloc a new
1543             // one & ensure that the shared one is reuseable
1544
1545             Picture *pic;
1546             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1547             if (i < 0)
1548                 return i;
1549             pic = &s->picture[i];
1550
1551             pic->reference = s->reordered_input_picture[0]->reference;
1552             if (alloc_picture(s, pic, 0) < 0) {
1553                 return -1;
1554             }
1555
1556             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1557             if (ret < 0)
1558                 return ret;
1559
1560             /* mark us unused / free shared pic */
1561             av_frame_unref(s->reordered_input_picture[0]->f);
1562             s->reordered_input_picture[0]->shared = 0;
1563
1564             s->current_picture_ptr = pic;
1565         } else {
1566             // input is not a shared pix -> reuse buffer for current_pix
1567             s->current_picture_ptr = s->reordered_input_picture[0];
1568             for (i = 0; i < 4; i++) {
1569                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1570             }
1571         }
1572         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1573         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1574                                        s->current_picture_ptr)) < 0)
1575             return ret;
1576
1577         s->picture_number = s->new_picture.f->display_picture_number;
1578     } else {
1579         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1580     }
1581     return 0;
1582 }
1583
1584 static void frame_end(MpegEncContext *s)
1585 {
1586     if (s->unrestricted_mv &&
1587         s->current_picture.reference &&
1588         !s->intra_only) {
1589         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1590         int hshift = desc->log2_chroma_w;
1591         int vshift = desc->log2_chroma_h;
1592         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1593                                 s->current_picture.f->linesize[0],
1594                                 s->h_edge_pos, s->v_edge_pos,
1595                                 EDGE_WIDTH, EDGE_WIDTH,
1596                                 EDGE_TOP | EDGE_BOTTOM);
1597         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1598                                 s->current_picture.f->linesize[1],
1599                                 s->h_edge_pos >> hshift,
1600                                 s->v_edge_pos >> vshift,
1601                                 EDGE_WIDTH >> hshift,
1602                                 EDGE_WIDTH >> vshift,
1603                                 EDGE_TOP | EDGE_BOTTOM);
1604         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1605                                 s->current_picture.f->linesize[2],
1606                                 s->h_edge_pos >> hshift,
1607                                 s->v_edge_pos >> vshift,
1608                                 EDGE_WIDTH >> hshift,
1609                                 EDGE_WIDTH >> vshift,
1610                                 EDGE_TOP | EDGE_BOTTOM);
1611     }
1612
1613     emms_c();
1614
1615     s->last_pict_type                 = s->pict_type;
1616     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1617     if (s->pict_type!= AV_PICTURE_TYPE_B)
1618         s->last_non_b_pict_type = s->pict_type;
1619
1620 #if FF_API_CODED_FRAME
1621 FF_DISABLE_DEPRECATION_WARNINGS
1622     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1623 FF_ENABLE_DEPRECATION_WARNINGS
1624 #endif
1625 }
1626
1627 static void update_noise_reduction(MpegEncContext *s)
1628 {
1629     int intra, i;
1630
1631     for (intra = 0; intra < 2; intra++) {
1632         if (s->dct_count[intra] > (1 << 16)) {
1633             for (i = 0; i < 64; i++) {
1634                 s->dct_error_sum[intra][i] >>= 1;
1635             }
1636             s->dct_count[intra] >>= 1;
1637         }
1638
1639         for (i = 0; i < 64; i++) {
1640             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1641                                        s->dct_count[intra] +
1642                                        s->dct_error_sum[intra][i] / 2) /
1643                                       (s->dct_error_sum[intra][i] + 1);
1644         }
1645     }
1646 }
1647
1648 static int frame_start(MpegEncContext *s)
1649 {
1650     int ret;
1651
1652     /* mark & release old frames */
1653     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1654         s->last_picture_ptr != s->next_picture_ptr &&
1655         s->last_picture_ptr->f->buf[0]) {
1656         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1657     }
1658
1659     s->current_picture_ptr->f->pict_type = s->pict_type;
1660     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1661
1662     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1663     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1664                                    s->current_picture_ptr)) < 0)
1665         return ret;
1666
1667     if (s->pict_type != AV_PICTURE_TYPE_B) {
1668         s->last_picture_ptr = s->next_picture_ptr;
1669         if (!s->droppable)
1670             s->next_picture_ptr = s->current_picture_ptr;
1671     }
1672
1673     if (s->last_picture_ptr) {
1674         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1675         if (s->last_picture_ptr->f->buf[0] &&
1676             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1677                                        s->last_picture_ptr)) < 0)
1678             return ret;
1679     }
1680     if (s->next_picture_ptr) {
1681         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1682         if (s->next_picture_ptr->f->buf[0] &&
1683             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1684                                        s->next_picture_ptr)) < 0)
1685             return ret;
1686     }
1687
1688     if (s->picture_structure!= PICT_FRAME) {
1689         int i;
1690         for (i = 0; i < 4; i++) {
1691             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1692                 s->current_picture.f->data[i] +=
1693                     s->current_picture.f->linesize[i];
1694             }
1695             s->current_picture.f->linesize[i] *= 2;
1696             s->last_picture.f->linesize[i]    *= 2;
1697             s->next_picture.f->linesize[i]    *= 2;
1698         }
1699     }
1700
1701     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1702         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1703         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1704     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1705         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1706         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1707     } else {
1708         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1709         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1710     }
1711
1712     if (s->dct_error_sum) {
1713         av_assert2(s->avctx->noise_reduction && s->encoding);
1714         update_noise_reduction(s);
1715     }
1716
1717     return 0;
1718 }
1719
1720 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1721                           const AVFrame *pic_arg, int *got_packet)
1722 {
1723     MpegEncContext *s = avctx->priv_data;
1724     int i, stuffing_count, ret;
1725     int context_count = s->slice_context_count;
1726
1727     s->picture_in_gop_number++;
1728
1729     if (load_input_picture(s, pic_arg) < 0)
1730         return -1;
1731
1732     if (select_input_picture(s) < 0) {
1733         return -1;
1734     }
1735
1736     /* output? */
1737     if (s->new_picture.f->data[0]) {
1738         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1739         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - FF_INPUT_BUFFER_PADDING_SIZE
1740                                               :
1741                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1742         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size)) < 0)
1743             return ret;
1744         if (s->mb_info) {
1745             s->mb_info_ptr = av_packet_new_side_data(pkt,
1746                                  AV_PKT_DATA_H263_MB_INFO,
1747                                  s->mb_width*s->mb_height*12);
1748             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1749         }
1750
1751         for (i = 0; i < context_count; i++) {
1752             int start_y = s->thread_context[i]->start_mb_y;
1753             int   end_y = s->thread_context[i]->  end_mb_y;
1754             int h       = s->mb_height;
1755             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1756             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1757
1758             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1759         }
1760
1761         s->pict_type = s->new_picture.f->pict_type;
1762         //emms_c();
1763         ret = frame_start(s);
1764         if (ret < 0)
1765             return ret;
1766 vbv_retry:
1767         ret = encode_picture(s, s->picture_number);
1768         if (growing_buffer) {
1769             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1770             pkt->data = s->pb.buf;
1771             pkt->size = avctx->internal->byte_buffer_size;
1772         }
1773         if (ret < 0)
1774             return -1;
1775
1776         avctx->header_bits = s->header_bits;
1777         avctx->mv_bits     = s->mv_bits;
1778         avctx->misc_bits   = s->misc_bits;
1779         avctx->i_tex_bits  = s->i_tex_bits;
1780         avctx->p_tex_bits  = s->p_tex_bits;
1781         avctx->i_count     = s->i_count;
1782         // FIXME f/b_count in avctx
1783         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1784         avctx->skip_count  = s->skip_count;
1785
1786         frame_end(s);
1787
1788         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality, NULL, 0, s->pict_type);
1789
1790         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1791             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1792
1793         if (avctx->rc_buffer_size) {
1794             RateControlContext *rcc = &s->rc_context;
1795             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1796
1797             if (put_bits_count(&s->pb) > max_size &&
1798                 s->lambda < s->lmax) {
1799                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1800                                        (s->qscale + 1) / s->qscale);
1801                 if (s->adaptive_quant) {
1802                     int i;
1803                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1804                         s->lambda_table[i] =
1805                             FFMAX(s->lambda_table[i] + 1,
1806                                   s->lambda_table[i] * (s->qscale + 1) /
1807                                   s->qscale);
1808                 }
1809                 s->mb_skipped = 0;        // done in frame_start()
1810                 // done in encode_picture() so we must undo it
1811                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1812                     if (s->flipflop_rounding          ||
1813                         s->codec_id == AV_CODEC_ID_H263P ||
1814                         s->codec_id == AV_CODEC_ID_MPEG4)
1815                         s->no_rounding ^= 1;
1816                 }
1817                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1818                     s->time_base       = s->last_time_base;
1819                     s->last_non_b_time = s->time - s->pp_time;
1820                 }
1821                 for (i = 0; i < context_count; i++) {
1822                     PutBitContext *pb = &s->thread_context[i]->pb;
1823                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1824                 }
1825                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1826                 goto vbv_retry;
1827             }
1828
1829             av_assert0(s->avctx->rc_max_rate);
1830         }
1831
1832         if (s->avctx->flags & CODEC_FLAG_PASS1)
1833             ff_write_pass1_stats(s);
1834
1835         for (i = 0; i < 4; i++) {
1836             s->current_picture_ptr->f->error[i] =
1837             s->current_picture.f->error[i] =
1838                 s->current_picture.error[i];
1839             avctx->error[i] += s->current_picture_ptr->f->error[i];
1840         }
1841
1842         if (s->avctx->flags & CODEC_FLAG_PASS1)
1843             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1844                    avctx->i_tex_bits + avctx->p_tex_bits ==
1845                        put_bits_count(&s->pb));
1846         flush_put_bits(&s->pb);
1847         s->frame_bits  = put_bits_count(&s->pb);
1848
1849         stuffing_count = ff_vbv_update(s, s->frame_bits);
1850         s->stuffing_bits = 8*stuffing_count;
1851         if (stuffing_count) {
1852             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1853                     stuffing_count + 50) {
1854                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1855                 return -1;
1856             }
1857
1858             switch (s->codec_id) {
1859             case AV_CODEC_ID_MPEG1VIDEO:
1860             case AV_CODEC_ID_MPEG2VIDEO:
1861                 while (stuffing_count--) {
1862                     put_bits(&s->pb, 8, 0);
1863                 }
1864             break;
1865             case AV_CODEC_ID_MPEG4:
1866                 put_bits(&s->pb, 16, 0);
1867                 put_bits(&s->pb, 16, 0x1C3);
1868                 stuffing_count -= 4;
1869                 while (stuffing_count--) {
1870                     put_bits(&s->pb, 8, 0xFF);
1871                 }
1872             break;
1873             default:
1874                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1875             }
1876             flush_put_bits(&s->pb);
1877             s->frame_bits  = put_bits_count(&s->pb);
1878         }
1879
1880         /* update mpeg1/2 vbv_delay for CBR */
1881         if (s->avctx->rc_max_rate                          &&
1882             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1883             s->out_format == FMT_MPEG1                     &&
1884             90000LL * (avctx->rc_buffer_size - 1) <=
1885                 s->avctx->rc_max_rate * 0xFFFFLL) {
1886             int vbv_delay, min_delay;
1887             double inbits  = s->avctx->rc_max_rate *
1888                              av_q2d(s->avctx->time_base);
1889             int    minbits = s->frame_bits - 8 *
1890                              (s->vbv_delay_ptr - s->pb.buf - 1);
1891             double bits    = s->rc_context.buffer_index + minbits - inbits;
1892
1893             if (bits < 0)
1894                 av_log(s->avctx, AV_LOG_ERROR,
1895                        "Internal error, negative bits\n");
1896
1897             assert(s->repeat_first_field == 0);
1898
1899             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1900             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1901                         s->avctx->rc_max_rate;
1902
1903             vbv_delay = FFMAX(vbv_delay, min_delay);
1904
1905             av_assert0(vbv_delay < 0xFFFF);
1906
1907             s->vbv_delay_ptr[0] &= 0xF8;
1908             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1909             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1910             s->vbv_delay_ptr[2] &= 0x07;
1911             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1912             avctx->vbv_delay     = vbv_delay * 300;
1913         }
1914         s->total_bits     += s->frame_bits;
1915         avctx->frame_bits  = s->frame_bits;
1916
1917         pkt->pts = s->current_picture.f->pts;
1918         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1919             if (!s->current_picture.f->coded_picture_number)
1920                 pkt->dts = pkt->pts - s->dts_delta;
1921             else
1922                 pkt->dts = s->reordered_pts;
1923             s->reordered_pts = pkt->pts;
1924         } else
1925             pkt->dts = pkt->pts;
1926         if (s->current_picture.f->key_frame)
1927             pkt->flags |= AV_PKT_FLAG_KEY;
1928         if (s->mb_info)
1929             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1930     } else {
1931         s->frame_bits = 0;
1932     }
1933
1934     /* release non-reference frames */
1935     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1936         if (!s->picture[i].reference)
1937             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1938     }
1939
1940     av_assert1((s->frame_bits & 7) == 0);
1941
1942     pkt->size = s->frame_bits / 8;
1943     *got_packet = !!pkt->size;
1944     return 0;
1945 }
1946
1947 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1948                                                 int n, int threshold)
1949 {
1950     static const char tab[64] = {
1951         3, 2, 2, 1, 1, 1, 1, 1,
1952         1, 1, 1, 1, 1, 1, 1, 1,
1953         1, 1, 1, 1, 1, 1, 1, 1,
1954         0, 0, 0, 0, 0, 0, 0, 0,
1955         0, 0, 0, 0, 0, 0, 0, 0,
1956         0, 0, 0, 0, 0, 0, 0, 0,
1957         0, 0, 0, 0, 0, 0, 0, 0,
1958         0, 0, 0, 0, 0, 0, 0, 0
1959     };
1960     int score = 0;
1961     int run = 0;
1962     int i;
1963     int16_t *block = s->block[n];
1964     const int last_index = s->block_last_index[n];
1965     int skip_dc;
1966
1967     if (threshold < 0) {
1968         skip_dc = 0;
1969         threshold = -threshold;
1970     } else
1971         skip_dc = 1;
1972
1973     /* Are all we could set to zero already zero? */
1974     if (last_index <= skip_dc - 1)
1975         return;
1976
1977     for (i = 0; i <= last_index; i++) {
1978         const int j = s->intra_scantable.permutated[i];
1979         const int level = FFABS(block[j]);
1980         if (level == 1) {
1981             if (skip_dc && i == 0)
1982                 continue;
1983             score += tab[run];
1984             run = 0;
1985         } else if (level > 1) {
1986             return;
1987         } else {
1988             run++;
1989         }
1990     }
1991     if (score >= threshold)
1992         return;
1993     for (i = skip_dc; i <= last_index; i++) {
1994         const int j = s->intra_scantable.permutated[i];
1995         block[j] = 0;
1996     }
1997     if (block[0])
1998         s->block_last_index[n] = 0;
1999     else
2000         s->block_last_index[n] = -1;
2001 }
2002
2003 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2004                                int last_index)
2005 {
2006     int i;
2007     const int maxlevel = s->max_qcoeff;
2008     const int minlevel = s->min_qcoeff;
2009     int overflow = 0;
2010
2011     if (s->mb_intra) {
2012         i = 1; // skip clipping of intra dc
2013     } else
2014         i = 0;
2015
2016     for (; i <= last_index; i++) {
2017         const int j = s->intra_scantable.permutated[i];
2018         int level = block[j];
2019
2020         if (level > maxlevel) {
2021             level = maxlevel;
2022             overflow++;
2023         } else if (level < minlevel) {
2024             level = minlevel;
2025             overflow++;
2026         }
2027
2028         block[j] = level;
2029     }
2030
2031     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2032         av_log(s->avctx, AV_LOG_INFO,
2033                "warning, clipping %d dct coefficients to %d..%d\n",
2034                overflow, minlevel, maxlevel);
2035 }
2036
2037 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2038 {
2039     int x, y;
2040     // FIXME optimize
2041     for (y = 0; y < 8; y++) {
2042         for (x = 0; x < 8; x++) {
2043             int x2, y2;
2044             int sum = 0;
2045             int sqr = 0;
2046             int count = 0;
2047
2048             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2049                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2050                     int v = ptr[x2 + y2 * stride];
2051                     sum += v;
2052                     sqr += v * v;
2053                     count++;
2054                 }
2055             }
2056             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2057         }
2058     }
2059 }
2060
2061 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2062                                                 int motion_x, int motion_y,
2063                                                 int mb_block_height,
2064                                                 int mb_block_width,
2065                                                 int mb_block_count)
2066 {
2067     int16_t weight[12][64];
2068     int16_t orig[12][64];
2069     const int mb_x = s->mb_x;
2070     const int mb_y = s->mb_y;
2071     int i;
2072     int skip_dct[12];
2073     int dct_offset = s->linesize * 8; // default for progressive frames
2074     int uv_dct_offset = s->uvlinesize * 8;
2075     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2076     ptrdiff_t wrap_y, wrap_c;
2077
2078     for (i = 0; i < mb_block_count; i++)
2079         skip_dct[i] = s->skipdct;
2080
2081     if (s->adaptive_quant) {
2082         const int last_qp = s->qscale;
2083         const int mb_xy = mb_x + mb_y * s->mb_stride;
2084
2085         s->lambda = s->lambda_table[mb_xy];
2086         update_qscale(s);
2087
2088         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2089             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2090             s->dquant = s->qscale - last_qp;
2091
2092             if (s->out_format == FMT_H263) {
2093                 s->dquant = av_clip(s->dquant, -2, 2);
2094
2095                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2096                     if (!s->mb_intra) {
2097                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2098                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2099                                 s->dquant = 0;
2100                         }
2101                         if (s->mv_type == MV_TYPE_8X8)
2102                             s->dquant = 0;
2103                     }
2104                 }
2105             }
2106         }
2107         ff_set_qscale(s, last_qp + s->dquant);
2108     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2109         ff_set_qscale(s, s->qscale + s->dquant);
2110
2111     wrap_y = s->linesize;
2112     wrap_c = s->uvlinesize;
2113     ptr_y  = s->new_picture.f->data[0] +
2114              (mb_y * 16 * wrap_y)              + mb_x * 16;
2115     ptr_cb = s->new_picture.f->data[1] +
2116              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2117     ptr_cr = s->new_picture.f->data[2] +
2118              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2119
2120     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2121         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2122         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2123         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2124         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2125                                  wrap_y, wrap_y,
2126                                  16, 16, mb_x * 16, mb_y * 16,
2127                                  s->width, s->height);
2128         ptr_y = ebuf;
2129         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2130                                  wrap_c, wrap_c,
2131                                  mb_block_width, mb_block_height,
2132                                  mb_x * mb_block_width, mb_y * mb_block_height,
2133                                  cw, ch);
2134         ptr_cb = ebuf + 16 * wrap_y;
2135         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2136                                  wrap_c, wrap_c,
2137                                  mb_block_width, mb_block_height,
2138                                  mb_x * mb_block_width, mb_y * mb_block_height,
2139                                  cw, ch);
2140         ptr_cr = ebuf + 16 * wrap_y + 16;
2141     }
2142
2143     if (s->mb_intra) {
2144         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2145             int progressive_score, interlaced_score;
2146
2147             s->interlaced_dct = 0;
2148             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2149                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2150                                                      NULL, wrap_y, 8) - 400;
2151
2152             if (progressive_score > 0) {
2153                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2154                                                         NULL, wrap_y * 2, 8) +
2155                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2156                                                         NULL, wrap_y * 2, 8);
2157                 if (progressive_score > interlaced_score) {
2158                     s->interlaced_dct = 1;
2159
2160                     dct_offset = wrap_y;
2161                     uv_dct_offset = wrap_c;
2162                     wrap_y <<= 1;
2163                     if (s->chroma_format == CHROMA_422 ||
2164                         s->chroma_format == CHROMA_444)
2165                         wrap_c <<= 1;
2166                 }
2167             }
2168         }
2169
2170         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2171         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2172         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2173         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2174
2175         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2176             skip_dct[4] = 1;
2177             skip_dct[5] = 1;
2178         } else {
2179             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2180             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2181             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2182                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2183                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2184             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2185                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2186                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2187                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2188                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2189                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2190                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2191             }
2192         }
2193     } else {
2194         op_pixels_func (*op_pix)[4];
2195         qpel_mc_func (*op_qpix)[16];
2196         uint8_t *dest_y, *dest_cb, *dest_cr;
2197
2198         dest_y  = s->dest[0];
2199         dest_cb = s->dest[1];
2200         dest_cr = s->dest[2];
2201
2202         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2203             op_pix  = s->hdsp.put_pixels_tab;
2204             op_qpix = s->qdsp.put_qpel_pixels_tab;
2205         } else {
2206             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2207             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2208         }
2209
2210         if (s->mv_dir & MV_DIR_FORWARD) {
2211             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2212                           s->last_picture.f->data,
2213                           op_pix, op_qpix);
2214             op_pix  = s->hdsp.avg_pixels_tab;
2215             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2216         }
2217         if (s->mv_dir & MV_DIR_BACKWARD) {
2218             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2219                           s->next_picture.f->data,
2220                           op_pix, op_qpix);
2221         }
2222
2223         if (s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
2224             int progressive_score, interlaced_score;
2225
2226             s->interlaced_dct = 0;
2227             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2228                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2229                                                      ptr_y + wrap_y * 8,
2230                                                      wrap_y, 8) - 400;
2231
2232             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2233                 progressive_score -= 400;
2234
2235             if (progressive_score > 0) {
2236                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2237                                                         wrap_y * 2, 8) +
2238                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2239                                                         ptr_y + wrap_y,
2240                                                         wrap_y * 2, 8);
2241
2242                 if (progressive_score > interlaced_score) {
2243                     s->interlaced_dct = 1;
2244
2245                     dct_offset = wrap_y;
2246                     uv_dct_offset = wrap_c;
2247                     wrap_y <<= 1;
2248                     if (s->chroma_format == CHROMA_422)
2249                         wrap_c <<= 1;
2250                 }
2251             }
2252         }
2253
2254         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2255         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2256         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2257                             dest_y + dct_offset, wrap_y);
2258         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2259                             dest_y + dct_offset + 8, wrap_y);
2260
2261         if (s->avctx->flags & CODEC_FLAG_GRAY) {
2262             skip_dct[4] = 1;
2263             skip_dct[5] = 1;
2264         } else {
2265             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2266             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2267             if (!s->chroma_y_shift) { /* 422 */
2268                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2269                                     dest_cb + uv_dct_offset, wrap_c);
2270                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2271                                     dest_cr + uv_dct_offset, wrap_c);
2272             }
2273         }
2274         /* pre quantization */
2275         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2276                 2 * s->qscale * s->qscale) {
2277             // FIXME optimize
2278             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2279                 skip_dct[0] = 1;
2280             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2281                 skip_dct[1] = 1;
2282             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2283                                wrap_y, 8) < 20 * s->qscale)
2284                 skip_dct[2] = 1;
2285             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2286                                wrap_y, 8) < 20 * s->qscale)
2287                 skip_dct[3] = 1;
2288             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2289                 skip_dct[4] = 1;
2290             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2291                 skip_dct[5] = 1;
2292             if (!s->chroma_y_shift) { /* 422 */
2293                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2294                                    dest_cb + uv_dct_offset,
2295                                    wrap_c, 8) < 20 * s->qscale)
2296                     skip_dct[6] = 1;
2297                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2298                                    dest_cr + uv_dct_offset,
2299                                    wrap_c, 8) < 20 * s->qscale)
2300                     skip_dct[7] = 1;
2301             }
2302         }
2303     }
2304
2305     if (s->quantizer_noise_shaping) {
2306         if (!skip_dct[0])
2307             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2308         if (!skip_dct[1])
2309             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2310         if (!skip_dct[2])
2311             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2312         if (!skip_dct[3])
2313             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2314         if (!skip_dct[4])
2315             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2316         if (!skip_dct[5])
2317             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2318         if (!s->chroma_y_shift) { /* 422 */
2319             if (!skip_dct[6])
2320                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2321                                   wrap_c);
2322             if (!skip_dct[7])
2323                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2324                                   wrap_c);
2325         }
2326         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2327     }
2328
2329     /* DCT & quantize */
2330     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2331     {
2332         for (i = 0; i < mb_block_count; i++) {
2333             if (!skip_dct[i]) {
2334                 int overflow;
2335                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2336                 // FIXME we could decide to change to quantizer instead of
2337                 // clipping
2338                 // JS: I don't think that would be a good idea it could lower
2339                 //     quality instead of improve it. Just INTRADC clipping
2340                 //     deserves changes in quantizer
2341                 if (overflow)
2342                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2343             } else
2344                 s->block_last_index[i] = -1;
2345         }
2346         if (s->quantizer_noise_shaping) {
2347             for (i = 0; i < mb_block_count; i++) {
2348                 if (!skip_dct[i]) {
2349                     s->block_last_index[i] =
2350                         dct_quantize_refine(s, s->block[i], weight[i],
2351                                             orig[i], i, s->qscale);
2352                 }
2353             }
2354         }
2355
2356         if (s->luma_elim_threshold && !s->mb_intra)
2357             for (i = 0; i < 4; i++)
2358                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2359         if (s->chroma_elim_threshold && !s->mb_intra)
2360             for (i = 4; i < mb_block_count; i++)
2361                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2362
2363         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2364             for (i = 0; i < mb_block_count; i++) {
2365                 if (s->block_last_index[i] == -1)
2366                     s->coded_score[i] = INT_MAX / 256;
2367             }
2368         }
2369     }
2370
2371     if ((s->avctx->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2372         s->block_last_index[4] =
2373         s->block_last_index[5] = 0;
2374         s->block[4][0] =
2375         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2376         if (!s->chroma_y_shift) { /* 422 / 444 */
2377             for (i=6; i<12; i++) {
2378                 s->block_last_index[i] = 0;
2379                 s->block[i][0] = s->block[4][0];
2380             }
2381         }
2382     }
2383
2384     // non c quantize code returns incorrect block_last_index FIXME
2385     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2386         for (i = 0; i < mb_block_count; i++) {
2387             int j;
2388             if (s->block_last_index[i] > 0) {
2389                 for (j = 63; j > 0; j--) {
2390                     if (s->block[i][s->intra_scantable.permutated[j]])
2391                         break;
2392                 }
2393                 s->block_last_index[i] = j;
2394             }
2395         }
2396     }
2397
2398     /* huffman encode */
2399     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2400     case AV_CODEC_ID_MPEG1VIDEO:
2401     case AV_CODEC_ID_MPEG2VIDEO:
2402         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2403             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2404         break;
2405     case AV_CODEC_ID_MPEG4:
2406         if (CONFIG_MPEG4_ENCODER)
2407             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2408         break;
2409     case AV_CODEC_ID_MSMPEG4V2:
2410     case AV_CODEC_ID_MSMPEG4V3:
2411     case AV_CODEC_ID_WMV1:
2412         if (CONFIG_MSMPEG4_ENCODER)
2413             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2414         break;
2415     case AV_CODEC_ID_WMV2:
2416         if (CONFIG_WMV2_ENCODER)
2417             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2418         break;
2419     case AV_CODEC_ID_H261:
2420         if (CONFIG_H261_ENCODER)
2421             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2422         break;
2423     case AV_CODEC_ID_H263:
2424     case AV_CODEC_ID_H263P:
2425     case AV_CODEC_ID_FLV1:
2426     case AV_CODEC_ID_RV10:
2427     case AV_CODEC_ID_RV20:
2428         if (CONFIG_H263_ENCODER)
2429             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2430         break;
2431     case AV_CODEC_ID_MJPEG:
2432     case AV_CODEC_ID_AMV:
2433         if (CONFIG_MJPEG_ENCODER)
2434             ff_mjpeg_encode_mb(s, s->block);
2435         break;
2436     default:
2437         av_assert1(0);
2438     }
2439 }
2440
2441 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2442 {
2443     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2444     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2445     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2446 }
2447
2448 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2449     int i;
2450
2451     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2452
2453     /* mpeg1 */
2454     d->mb_skip_run= s->mb_skip_run;
2455     for(i=0; i<3; i++)
2456         d->last_dc[i] = s->last_dc[i];
2457
2458     /* statistics */
2459     d->mv_bits= s->mv_bits;
2460     d->i_tex_bits= s->i_tex_bits;
2461     d->p_tex_bits= s->p_tex_bits;
2462     d->i_count= s->i_count;
2463     d->f_count= s->f_count;
2464     d->b_count= s->b_count;
2465     d->skip_count= s->skip_count;
2466     d->misc_bits= s->misc_bits;
2467     d->last_bits= 0;
2468
2469     d->mb_skipped= 0;
2470     d->qscale= s->qscale;
2471     d->dquant= s->dquant;
2472
2473     d->esc3_level_length= s->esc3_level_length;
2474 }
2475
2476 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2477     int i;
2478
2479     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2480     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2481
2482     /* mpeg1 */
2483     d->mb_skip_run= s->mb_skip_run;
2484     for(i=0; i<3; i++)
2485         d->last_dc[i] = s->last_dc[i];
2486
2487     /* statistics */
2488     d->mv_bits= s->mv_bits;
2489     d->i_tex_bits= s->i_tex_bits;
2490     d->p_tex_bits= s->p_tex_bits;
2491     d->i_count= s->i_count;
2492     d->f_count= s->f_count;
2493     d->b_count= s->b_count;
2494     d->skip_count= s->skip_count;
2495     d->misc_bits= s->misc_bits;
2496
2497     d->mb_intra= s->mb_intra;
2498     d->mb_skipped= s->mb_skipped;
2499     d->mv_type= s->mv_type;
2500     d->mv_dir= s->mv_dir;
2501     d->pb= s->pb;
2502     if(s->data_partitioning){
2503         d->pb2= s->pb2;
2504         d->tex_pb= s->tex_pb;
2505     }
2506     d->block= s->block;
2507     for(i=0; i<8; i++)
2508         d->block_last_index[i]= s->block_last_index[i];
2509     d->interlaced_dct= s->interlaced_dct;
2510     d->qscale= s->qscale;
2511
2512     d->esc3_level_length= s->esc3_level_length;
2513 }
2514
2515 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2516                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2517                            int *dmin, int *next_block, int motion_x, int motion_y)
2518 {
2519     int score;
2520     uint8_t *dest_backup[3];
2521
2522     copy_context_before_encode(s, backup, type);
2523
2524     s->block= s->blocks[*next_block];
2525     s->pb= pb[*next_block];
2526     if(s->data_partitioning){
2527         s->pb2   = pb2   [*next_block];
2528         s->tex_pb= tex_pb[*next_block];
2529     }
2530
2531     if(*next_block){
2532         memcpy(dest_backup, s->dest, sizeof(s->dest));
2533         s->dest[0] = s->sc.rd_scratchpad;
2534         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2535         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2536         av_assert0(s->linesize >= 32); //FIXME
2537     }
2538
2539     encode_mb(s, motion_x, motion_y);
2540
2541     score= put_bits_count(&s->pb);
2542     if(s->data_partitioning){
2543         score+= put_bits_count(&s->pb2);
2544         score+= put_bits_count(&s->tex_pb);
2545     }
2546
2547     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2548         ff_mpv_decode_mb(s, s->block);
2549
2550         score *= s->lambda2;
2551         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2552     }
2553
2554     if(*next_block){
2555         memcpy(s->dest, dest_backup, sizeof(s->dest));
2556     }
2557
2558     if(score<*dmin){
2559         *dmin= score;
2560         *next_block^=1;
2561
2562         copy_context_after_encode(best, s, type);
2563     }
2564 }
2565
2566 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2567     uint32_t *sq = ff_square_tab + 256;
2568     int acc=0;
2569     int x,y;
2570
2571     if(w==16 && h==16)
2572         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2573     else if(w==8 && h==8)
2574         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2575
2576     for(y=0; y<h; y++){
2577         for(x=0; x<w; x++){
2578             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2579         }
2580     }
2581
2582     av_assert2(acc>=0);
2583
2584     return acc;
2585 }
2586
2587 static int sse_mb(MpegEncContext *s){
2588     int w= 16;
2589     int h= 16;
2590
2591     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2592     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2593
2594     if(w==16 && h==16)
2595       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2596         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2597                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2598                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2599       }else{
2600         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2601                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2602                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2603       }
2604     else
2605         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2606                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2607                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2608 }
2609
2610 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2611     MpegEncContext *s= *(void**)arg;
2612
2613
2614     s->me.pre_pass=1;
2615     s->me.dia_size= s->avctx->pre_dia_size;
2616     s->first_slice_line=1;
2617     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2618         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2619             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2620         }
2621         s->first_slice_line=0;
2622     }
2623
2624     s->me.pre_pass=0;
2625
2626     return 0;
2627 }
2628
2629 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2630     MpegEncContext *s= *(void**)arg;
2631
2632     ff_check_alignment();
2633
2634     s->me.dia_size= s->avctx->dia_size;
2635     s->first_slice_line=1;
2636     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2637         s->mb_x=0; //for block init below
2638         ff_init_block_index(s);
2639         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2640             s->block_index[0]+=2;
2641             s->block_index[1]+=2;
2642             s->block_index[2]+=2;
2643             s->block_index[3]+=2;
2644
2645             /* compute motion vector & mb_type and store in context */
2646             if(s->pict_type==AV_PICTURE_TYPE_B)
2647                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2648             else
2649                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2650         }
2651         s->first_slice_line=0;
2652     }
2653     return 0;
2654 }
2655
2656 static int mb_var_thread(AVCodecContext *c, void *arg){
2657     MpegEncContext *s= *(void**)arg;
2658     int mb_x, mb_y;
2659
2660     ff_check_alignment();
2661
2662     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2663         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2664             int xx = mb_x * 16;
2665             int yy = mb_y * 16;
2666             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2667             int varc;
2668             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2669
2670             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2671                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2672
2673             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2674             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2675             s->me.mb_var_sum_temp    += varc;
2676         }
2677     }
2678     return 0;
2679 }
2680
2681 static void write_slice_end(MpegEncContext *s){
2682     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2683         if(s->partitioned_frame){
2684             ff_mpeg4_merge_partitions(s);
2685         }
2686
2687         ff_mpeg4_stuffing(&s->pb);
2688     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2689         ff_mjpeg_encode_stuffing(s);
2690     }
2691
2692     avpriv_align_put_bits(&s->pb);
2693     flush_put_bits(&s->pb);
2694
2695     if ((s->avctx->flags & CODEC_FLAG_PASS1) && !s->partitioned_frame)
2696         s->misc_bits+= get_bits_diff(s);
2697 }
2698
2699 static void write_mb_info(MpegEncContext *s)
2700 {
2701     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2702     int offset = put_bits_count(&s->pb);
2703     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2704     int gobn = s->mb_y / s->gob_index;
2705     int pred_x, pred_y;
2706     if (CONFIG_H263_ENCODER)
2707         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2708     bytestream_put_le32(&ptr, offset);
2709     bytestream_put_byte(&ptr, s->qscale);
2710     bytestream_put_byte(&ptr, gobn);
2711     bytestream_put_le16(&ptr, mba);
2712     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2713     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2714     /* 4MV not implemented */
2715     bytestream_put_byte(&ptr, 0); /* hmv2 */
2716     bytestream_put_byte(&ptr, 0); /* vmv2 */
2717 }
2718
2719 static void update_mb_info(MpegEncContext *s, int startcode)
2720 {
2721     if (!s->mb_info)
2722         return;
2723     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2724         s->mb_info_size += 12;
2725         s->prev_mb_info = s->last_mb_info;
2726     }
2727     if (startcode) {
2728         s->prev_mb_info = put_bits_count(&s->pb)/8;
2729         /* This might have incremented mb_info_size above, and we return without
2730          * actually writing any info into that slot yet. But in that case,
2731          * this will be called again at the start of the after writing the
2732          * start code, actually writing the mb info. */
2733         return;
2734     }
2735
2736     s->last_mb_info = put_bits_count(&s->pb)/8;
2737     if (!s->mb_info_size)
2738         s->mb_info_size += 12;
2739     write_mb_info(s);
2740 }
2741
2742 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2743 {
2744     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2745         && s->slice_context_count == 1
2746         && s->pb.buf == s->avctx->internal->byte_buffer) {
2747         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2748         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2749
2750         uint8_t *new_buffer = NULL;
2751         int new_buffer_size = 0;
2752
2753         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2754                               s->avctx->internal->byte_buffer_size + size_increase);
2755         if (!new_buffer)
2756             return AVERROR(ENOMEM);
2757
2758         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2759         av_free(s->avctx->internal->byte_buffer);
2760         s->avctx->internal->byte_buffer      = new_buffer;
2761         s->avctx->internal->byte_buffer_size = new_buffer_size;
2762         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2763         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2764         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2765     }
2766     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2767         return AVERROR(EINVAL);
2768     return 0;
2769 }
2770
2771 static int encode_thread(AVCodecContext *c, void *arg){
2772     MpegEncContext *s= *(void**)arg;
2773     int mb_x, mb_y, pdif = 0;
2774     int chr_h= 16>>s->chroma_y_shift;
2775     int i, j;
2776     MpegEncContext best_s = { 0 }, backup_s;
2777     uint8_t bit_buf[2][MAX_MB_BYTES];
2778     uint8_t bit_buf2[2][MAX_MB_BYTES];
2779     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2780     PutBitContext pb[2], pb2[2], tex_pb[2];
2781
2782     ff_check_alignment();
2783
2784     for(i=0; i<2; i++){
2785         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2786         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2787         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2788     }
2789
2790     s->last_bits= put_bits_count(&s->pb);
2791     s->mv_bits=0;
2792     s->misc_bits=0;
2793     s->i_tex_bits=0;
2794     s->p_tex_bits=0;
2795     s->i_count=0;
2796     s->f_count=0;
2797     s->b_count=0;
2798     s->skip_count=0;
2799
2800     for(i=0; i<3; i++){
2801         /* init last dc values */
2802         /* note: quant matrix value (8) is implied here */
2803         s->last_dc[i] = 128 << s->intra_dc_precision;
2804
2805         s->current_picture.error[i] = 0;
2806     }
2807     if(s->codec_id==AV_CODEC_ID_AMV){
2808         s->last_dc[0] = 128*8/13;
2809         s->last_dc[1] = 128*8/14;
2810         s->last_dc[2] = 128*8/14;
2811     }
2812     s->mb_skip_run = 0;
2813     memset(s->last_mv, 0, sizeof(s->last_mv));
2814
2815     s->last_mv_dir = 0;
2816
2817     switch(s->codec_id){
2818     case AV_CODEC_ID_H263:
2819     case AV_CODEC_ID_H263P:
2820     case AV_CODEC_ID_FLV1:
2821         if (CONFIG_H263_ENCODER)
2822             s->gob_index = H263_GOB_HEIGHT(s->height);
2823         break;
2824     case AV_CODEC_ID_MPEG4:
2825         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2826             ff_mpeg4_init_partitions(s);
2827         break;
2828     }
2829
2830     s->resync_mb_x=0;
2831     s->resync_mb_y=0;
2832     s->first_slice_line = 1;
2833     s->ptr_lastgob = s->pb.buf;
2834     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2835         s->mb_x=0;
2836         s->mb_y= mb_y;
2837
2838         ff_set_qscale(s, s->qscale);
2839         ff_init_block_index(s);
2840
2841         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2842             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2843             int mb_type= s->mb_type[xy];
2844 //            int d;
2845             int dmin= INT_MAX;
2846             int dir;
2847             int size_increase =  s->avctx->internal->byte_buffer_size/4
2848                                + s->mb_width*MAX_MB_BYTES;
2849
2850             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2851             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2852                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2853                 return -1;
2854             }
2855             if(s->data_partitioning){
2856                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2857                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2858                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2859                     return -1;
2860                 }
2861             }
2862
2863             s->mb_x = mb_x;
2864             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2865             ff_update_block_index(s);
2866
2867             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2868                 ff_h261_reorder_mb_index(s);
2869                 xy= s->mb_y*s->mb_stride + s->mb_x;
2870                 mb_type= s->mb_type[xy];
2871             }
2872
2873             /* write gob / video packet header  */
2874             if(s->rtp_mode){
2875                 int current_packet_size, is_gob_start;
2876
2877                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2878
2879                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2880
2881                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2882
2883                 switch(s->codec_id){
2884                 case AV_CODEC_ID_H263:
2885                 case AV_CODEC_ID_H263P:
2886                     if(!s->h263_slice_structured)
2887                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2888                     break;
2889                 case AV_CODEC_ID_MPEG2VIDEO:
2890                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2891                 case AV_CODEC_ID_MPEG1VIDEO:
2892                     if(s->mb_skip_run) is_gob_start=0;
2893                     break;
2894                 case AV_CODEC_ID_MJPEG:
2895                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2896                     break;
2897                 }
2898
2899                 if(is_gob_start){
2900                     if(s->start_mb_y != mb_y || mb_x!=0){
2901                         write_slice_end(s);
2902
2903                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2904                             ff_mpeg4_init_partitions(s);
2905                         }
2906                     }
2907
2908                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2909                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2910
2911                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2912                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2913                         int d = 100 / s->error_rate;
2914                         if(r % d == 0){
2915                             current_packet_size=0;
2916                             s->pb.buf_ptr= s->ptr_lastgob;
2917                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2918                         }
2919                     }
2920
2921                     if (s->avctx->rtp_callback){
2922                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2923                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2924                     }
2925                     update_mb_info(s, 1);
2926
2927                     switch(s->codec_id){
2928                     case AV_CODEC_ID_MPEG4:
2929                         if (CONFIG_MPEG4_ENCODER) {
2930                             ff_mpeg4_encode_video_packet_header(s);
2931                             ff_mpeg4_clean_buffers(s);
2932                         }
2933                     break;
2934                     case AV_CODEC_ID_MPEG1VIDEO:
2935                     case AV_CODEC_ID_MPEG2VIDEO:
2936                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2937                             ff_mpeg1_encode_slice_header(s);
2938                             ff_mpeg1_clean_buffers(s);
2939                         }
2940                     break;
2941                     case AV_CODEC_ID_H263:
2942                     case AV_CODEC_ID_H263P:
2943                         if (CONFIG_H263_ENCODER)
2944                             ff_h263_encode_gob_header(s, mb_y);
2945                     break;
2946                     }
2947
2948                     if (s->avctx->flags & CODEC_FLAG_PASS1) {
2949                         int bits= put_bits_count(&s->pb);
2950                         s->misc_bits+= bits - s->last_bits;
2951                         s->last_bits= bits;
2952                     }
2953
2954                     s->ptr_lastgob += current_packet_size;
2955                     s->first_slice_line=1;
2956                     s->resync_mb_x=mb_x;
2957                     s->resync_mb_y=mb_y;
2958                 }
2959             }
2960
2961             if(  (s->resync_mb_x   == s->mb_x)
2962                && s->resync_mb_y+1 == s->mb_y){
2963                 s->first_slice_line=0;
2964             }
2965
2966             s->mb_skipped=0;
2967             s->dquant=0; //only for QP_RD
2968
2969             update_mb_info(s, 0);
2970
2971             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2972                 int next_block=0;
2973                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2974
2975                 copy_context_before_encode(&backup_s, s, -1);
2976                 backup_s.pb= s->pb;
2977                 best_s.data_partitioning= s->data_partitioning;
2978                 best_s.partitioned_frame= s->partitioned_frame;
2979                 if(s->data_partitioning){
2980                     backup_s.pb2= s->pb2;
2981                     backup_s.tex_pb= s->tex_pb;
2982                 }
2983
2984                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2985                     s->mv_dir = MV_DIR_FORWARD;
2986                     s->mv_type = MV_TYPE_16X16;
2987                     s->mb_intra= 0;
2988                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2989                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2990                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2991                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2992                 }
2993                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2994                     s->mv_dir = MV_DIR_FORWARD;
2995                     s->mv_type = MV_TYPE_FIELD;
2996                     s->mb_intra= 0;
2997                     for(i=0; i<2; i++){
2998                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2999                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3000                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3001                     }
3002                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3003                                  &dmin, &next_block, 0, 0);
3004                 }
3005                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3006                     s->mv_dir = MV_DIR_FORWARD;
3007                     s->mv_type = MV_TYPE_16X16;
3008                     s->mb_intra= 0;
3009                     s->mv[0][0][0] = 0;
3010                     s->mv[0][0][1] = 0;
3011                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3012                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3013                 }
3014                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3015                     s->mv_dir = MV_DIR_FORWARD;
3016                     s->mv_type = MV_TYPE_8X8;
3017                     s->mb_intra= 0;
3018                     for(i=0; i<4; i++){
3019                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3020                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3021                     }
3022                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3023                                  &dmin, &next_block, 0, 0);
3024                 }
3025                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3026                     s->mv_dir = MV_DIR_FORWARD;
3027                     s->mv_type = MV_TYPE_16X16;
3028                     s->mb_intra= 0;
3029                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3030                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3031                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3032                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3033                 }
3034                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3035                     s->mv_dir = MV_DIR_BACKWARD;
3036                     s->mv_type = MV_TYPE_16X16;
3037                     s->mb_intra= 0;
3038                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3039                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3040                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3041                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3042                 }
3043                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3044                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3045                     s->mv_type = MV_TYPE_16X16;
3046                     s->mb_intra= 0;
3047                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3048                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3049                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3050                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3051                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3052                                  &dmin, &next_block, 0, 0);
3053                 }
3054                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3055                     s->mv_dir = MV_DIR_FORWARD;
3056                     s->mv_type = MV_TYPE_FIELD;
3057                     s->mb_intra= 0;
3058                     for(i=0; i<2; i++){
3059                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3060                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3061                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3062                     }
3063                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3064                                  &dmin, &next_block, 0, 0);
3065                 }
3066                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3067                     s->mv_dir = MV_DIR_BACKWARD;
3068                     s->mv_type = MV_TYPE_FIELD;
3069                     s->mb_intra= 0;
3070                     for(i=0; i<2; i++){
3071                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3072                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3073                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3074                     }
3075                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3076                                  &dmin, &next_block, 0, 0);
3077                 }
3078                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3079                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3080                     s->mv_type = MV_TYPE_FIELD;
3081                     s->mb_intra= 0;
3082                     for(dir=0; dir<2; dir++){
3083                         for(i=0; i<2; i++){
3084                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3085                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3086                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3087                         }
3088                     }
3089                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3090                                  &dmin, &next_block, 0, 0);
3091                 }
3092                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3093                     s->mv_dir = 0;
3094                     s->mv_type = MV_TYPE_16X16;
3095                     s->mb_intra= 1;
3096                     s->mv[0][0][0] = 0;
3097                     s->mv[0][0][1] = 0;
3098                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3099                                  &dmin, &next_block, 0, 0);
3100                     if(s->h263_pred || s->h263_aic){
3101                         if(best_s.mb_intra)
3102                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3103                         else
3104                             ff_clean_intra_table_entries(s); //old mode?
3105                     }
3106                 }
3107
3108                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3109                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3110                         const int last_qp= backup_s.qscale;
3111                         int qpi, qp, dc[6];
3112                         int16_t ac[6][16];
3113                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3114                         static const int dquant_tab[4]={-1,1,-2,2};
3115                         int storecoefs = s->mb_intra && s->dc_val[0];
3116
3117                         av_assert2(backup_s.dquant == 0);
3118
3119                         //FIXME intra
3120                         s->mv_dir= best_s.mv_dir;
3121                         s->mv_type = MV_TYPE_16X16;
3122                         s->mb_intra= best_s.mb_intra;
3123                         s->mv[0][0][0] = best_s.mv[0][0][0];
3124                         s->mv[0][0][1] = best_s.mv[0][0][1];
3125                         s->mv[1][0][0] = best_s.mv[1][0][0];
3126                         s->mv[1][0][1] = best_s.mv[1][0][1];
3127
3128                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3129                         for(; qpi<4; qpi++){
3130                             int dquant= dquant_tab[qpi];
3131                             qp= last_qp + dquant;
3132                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3133                                 continue;
3134                             backup_s.dquant= dquant;
3135                             if(storecoefs){
3136                                 for(i=0; i<6; i++){
3137                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3138                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3139                                 }
3140                             }
3141
3142                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3143                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3144                             if(best_s.qscale != qp){
3145                                 if(storecoefs){
3146                                     for(i=0; i<6; i++){
3147                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3148                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3149                                     }
3150                                 }
3151                             }
3152                         }
3153                     }
3154                 }
3155                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3156                     int mx= s->b_direct_mv_table[xy][0];
3157                     int my= s->b_direct_mv_table[xy][1];
3158
3159                     backup_s.dquant = 0;
3160                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3161                     s->mb_intra= 0;
3162                     ff_mpeg4_set_direct_mv(s, mx, my);
3163                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3164                                  &dmin, &next_block, mx, my);
3165                 }
3166                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3167                     backup_s.dquant = 0;
3168                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3169                     s->mb_intra= 0;
3170                     ff_mpeg4_set_direct_mv(s, 0, 0);
3171                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3172                                  &dmin, &next_block, 0, 0);
3173                 }
3174                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3175                     int coded=0;
3176                     for(i=0; i<6; i++)
3177                         coded |= s->block_last_index[i];
3178                     if(coded){
3179                         int mx,my;
3180                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3181                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3182                             mx=my=0; //FIXME find the one we actually used
3183                             ff_mpeg4_set_direct_mv(s, mx, my);
3184                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3185                             mx= s->mv[1][0][0];
3186                             my= s->mv[1][0][1];
3187                         }else{
3188                             mx= s->mv[0][0][0];
3189                             my= s->mv[0][0][1];
3190                         }
3191
3192                         s->mv_dir= best_s.mv_dir;
3193                         s->mv_type = best_s.mv_type;
3194                         s->mb_intra= 0;
3195 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3196                         s->mv[0][0][1] = best_s.mv[0][0][1];
3197                         s->mv[1][0][0] = best_s.mv[1][0][0];
3198                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3199                         backup_s.dquant= 0;
3200                         s->skipdct=1;
3201                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3202                                         &dmin, &next_block, mx, my);
3203                         s->skipdct=0;
3204                     }
3205                 }
3206
3207                 s->current_picture.qscale_table[xy] = best_s.qscale;
3208
3209                 copy_context_after_encode(s, &best_s, -1);
3210
3211                 pb_bits_count= put_bits_count(&s->pb);
3212                 flush_put_bits(&s->pb);
3213                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3214                 s->pb= backup_s.pb;
3215
3216                 if(s->data_partitioning){
3217                     pb2_bits_count= put_bits_count(&s->pb2);
3218                     flush_put_bits(&s->pb2);
3219                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3220                     s->pb2= backup_s.pb2;
3221
3222                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3223                     flush_put_bits(&s->tex_pb);
3224                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3225                     s->tex_pb= backup_s.tex_pb;
3226                 }
3227                 s->last_bits= put_bits_count(&s->pb);
3228
3229                 if (CONFIG_H263_ENCODER &&
3230                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3231                     ff_h263_update_motion_val(s);
3232
3233                 if(next_block==0){ //FIXME 16 vs linesize16
3234                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3235                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3236                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3237                 }
3238
3239                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3240                     ff_mpv_decode_mb(s, s->block);
3241             } else {
3242                 int motion_x = 0, motion_y = 0;
3243                 s->mv_type=MV_TYPE_16X16;
3244                 // only one MB-Type possible
3245
3246                 switch(mb_type){
3247                 case CANDIDATE_MB_TYPE_INTRA:
3248                     s->mv_dir = 0;
3249                     s->mb_intra= 1;
3250                     motion_x= s->mv[0][0][0] = 0;
3251                     motion_y= s->mv[0][0][1] = 0;
3252                     break;
3253                 case CANDIDATE_MB_TYPE_INTER:
3254                     s->mv_dir = MV_DIR_FORWARD;
3255                     s->mb_intra= 0;
3256                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3257                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3258                     break;
3259                 case CANDIDATE_MB_TYPE_INTER_I:
3260                     s->mv_dir = MV_DIR_FORWARD;
3261                     s->mv_type = MV_TYPE_FIELD;
3262                     s->mb_intra= 0;
3263                     for(i=0; i<2; i++){
3264                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3265                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3266                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3267                     }
3268                     break;
3269                 case CANDIDATE_MB_TYPE_INTER4V:
3270                     s->mv_dir = MV_DIR_FORWARD;
3271                     s->mv_type = MV_TYPE_8X8;
3272                     s->mb_intra= 0;
3273                     for(i=0; i<4; i++){
3274                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3275                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3276                     }
3277                     break;
3278                 case CANDIDATE_MB_TYPE_DIRECT:
3279                     if (CONFIG_MPEG4_ENCODER) {
3280                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3281                         s->mb_intra= 0;
3282                         motion_x=s->b_direct_mv_table[xy][0];
3283                         motion_y=s->b_direct_mv_table[xy][1];
3284                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3285                     }
3286                     break;
3287                 case CANDIDATE_MB_TYPE_DIRECT0:
3288                     if (CONFIG_MPEG4_ENCODER) {
3289                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3290                         s->mb_intra= 0;
3291                         ff_mpeg4_set_direct_mv(s, 0, 0);
3292                     }
3293                     break;
3294                 case CANDIDATE_MB_TYPE_BIDIR:
3295                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3296                     s->mb_intra= 0;
3297                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3298                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3299                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3300                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3301                     break;
3302                 case CANDIDATE_MB_TYPE_BACKWARD:
3303                     s->mv_dir = MV_DIR_BACKWARD;
3304                     s->mb_intra= 0;
3305                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3306                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3307                     break;
3308                 case CANDIDATE_MB_TYPE_FORWARD:
3309                     s->mv_dir = MV_DIR_FORWARD;
3310                     s->mb_intra= 0;
3311                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3312                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3313                     break;
3314                 case CANDIDATE_MB_TYPE_FORWARD_I:
3315                     s->mv_dir = MV_DIR_FORWARD;
3316                     s->mv_type = MV_TYPE_FIELD;
3317                     s->mb_intra= 0;
3318                     for(i=0; i<2; i++){
3319                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3320                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3321                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3322                     }
3323                     break;
3324                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3325                     s->mv_dir = MV_DIR_BACKWARD;
3326                     s->mv_type = MV_TYPE_FIELD;
3327                     s->mb_intra= 0;
3328                     for(i=0; i<2; i++){
3329                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3330                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3331                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3332                     }
3333                     break;
3334                 case CANDIDATE_MB_TYPE_BIDIR_I:
3335                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3336                     s->mv_type = MV_TYPE_FIELD;
3337                     s->mb_intra= 0;
3338                     for(dir=0; dir<2; dir++){
3339                         for(i=0; i<2; i++){
3340                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3341                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3342                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3343                         }
3344                     }
3345                     break;
3346                 default:
3347                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3348                 }
3349
3350                 encode_mb(s, motion_x, motion_y);
3351
3352                 // RAL: Update last macroblock type
3353                 s->last_mv_dir = s->mv_dir;
3354
3355                 if (CONFIG_H263_ENCODER &&
3356                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3357                     ff_h263_update_motion_val(s);
3358
3359                 ff_mpv_decode_mb(s, s->block);
3360             }
3361
3362             /* clean the MV table in IPS frames for direct mode in B frames */
3363             if(s->mb_intra /* && I,P,S_TYPE */){
3364                 s->p_mv_table[xy][0]=0;
3365                 s->p_mv_table[xy][1]=0;
3366             }
3367
3368             if (s->avctx->flags & CODEC_FLAG_PSNR) {
3369                 int w= 16;
3370                 int h= 16;
3371
3372                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3373                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3374
3375                 s->current_picture.error[0] += sse(
3376                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3377                     s->dest[0], w, h, s->linesize);
3378                 s->current_picture.error[1] += sse(
3379                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3380                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3381                 s->current_picture.error[2] += sse(
3382                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3383                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3384             }
3385             if(s->loop_filter){
3386                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3387                     ff_h263_loop_filter(s);
3388             }
3389             ff_dlog(s->avctx, "MB %d %d bits\n",
3390                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3391         }
3392     }
3393
3394     //not beautiful here but we must write it before flushing so it has to be here
3395     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3396         ff_msmpeg4_encode_ext_header(s);
3397
3398     write_slice_end(s);
3399
3400     /* Send the last GOB if RTP */
3401     if (s->avctx->rtp_callback) {
3402         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3403         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3404         /* Call the RTP callback to send the last GOB */
3405         emms_c();
3406         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3407     }
3408
3409     return 0;
3410 }
3411
3412 #define MERGE(field) dst->field += src->field; src->field=0
3413 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3414     MERGE(me.scene_change_score);
3415     MERGE(me.mc_mb_var_sum_temp);
3416     MERGE(me.mb_var_sum_temp);
3417 }
3418
3419 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3420     int i;
3421
3422     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3423     MERGE(dct_count[1]);
3424     MERGE(mv_bits);
3425     MERGE(i_tex_bits);
3426     MERGE(p_tex_bits);
3427     MERGE(i_count);
3428     MERGE(f_count);
3429     MERGE(b_count);
3430     MERGE(skip_count);
3431     MERGE(misc_bits);
3432     MERGE(er.error_count);
3433     MERGE(padding_bug_score);
3434     MERGE(current_picture.error[0]);
3435     MERGE(current_picture.error[1]);
3436     MERGE(current_picture.error[2]);
3437
3438     if(dst->avctx->noise_reduction){
3439         for(i=0; i<64; i++){
3440             MERGE(dct_error_sum[0][i]);
3441             MERGE(dct_error_sum[1][i]);
3442         }
3443     }
3444
3445     assert(put_bits_count(&src->pb) % 8 ==0);
3446     assert(put_bits_count(&dst->pb) % 8 ==0);
3447     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3448     flush_put_bits(&dst->pb);
3449 }
3450
3451 static int estimate_qp(MpegEncContext *s, int dry_run){
3452     if (s->next_lambda){
3453         s->current_picture_ptr->f->quality =
3454         s->current_picture.f->quality = s->next_lambda;
3455         if(!dry_run) s->next_lambda= 0;
3456     } else if (!s->fixed_qscale) {
3457         s->current_picture_ptr->f->quality =
3458         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3459         if (s->current_picture.f->quality < 0)
3460             return -1;
3461     }
3462
3463     if(s->adaptive_quant){
3464         switch(s->codec_id){
3465         case AV_CODEC_ID_MPEG4:
3466             if (CONFIG_MPEG4_ENCODER)
3467                 ff_clean_mpeg4_qscales(s);
3468             break;
3469         case AV_CODEC_ID_H263:
3470         case AV_CODEC_ID_H263P:
3471         case AV_CODEC_ID_FLV1:
3472             if (CONFIG_H263_ENCODER)
3473                 ff_clean_h263_qscales(s);
3474             break;
3475         default:
3476             ff_init_qscale_tab(s);
3477         }
3478
3479         s->lambda= s->lambda_table[0];
3480         //FIXME broken
3481     }else
3482         s->lambda = s->current_picture.f->quality;
3483     update_qscale(s);
3484     return 0;
3485 }
3486
3487 /* must be called before writing the header */
3488 static void set_frame_distances(MpegEncContext * s){
3489     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3490     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3491
3492     if(s->pict_type==AV_PICTURE_TYPE_B){
3493         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3494         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3495     }else{
3496         s->pp_time= s->time - s->last_non_b_time;
3497         s->last_non_b_time= s->time;
3498         assert(s->picture_number==0 || s->pp_time > 0);
3499     }
3500 }
3501
3502 static int encode_picture(MpegEncContext *s, int picture_number)
3503 {
3504     int i, ret;
3505     int bits;
3506     int context_count = s->slice_context_count;
3507
3508     s->picture_number = picture_number;
3509
3510     /* Reset the average MB variance */
3511     s->me.mb_var_sum_temp    =
3512     s->me.mc_mb_var_sum_temp = 0;
3513
3514     /* we need to initialize some time vars before we can encode b-frames */
3515     // RAL: Condition added for MPEG1VIDEO
3516     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3517         set_frame_distances(s);
3518     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3519         ff_set_mpeg4_time(s);
3520
3521     s->me.scene_change_score=0;
3522
3523 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3524
3525     if(s->pict_type==AV_PICTURE_TYPE_I){
3526         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3527         else                        s->no_rounding=0;
3528     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3529         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3530             s->no_rounding ^= 1;
3531     }
3532
3533     if (s->avctx->flags & CODEC_FLAG_PASS2) {
3534         if (estimate_qp(s,1) < 0)
3535             return -1;
3536         ff_get_2pass_fcode(s);
3537     } else if (!(s->avctx->flags & CODEC_FLAG_QSCALE)) {
3538         if(s->pict_type==AV_PICTURE_TYPE_B)
3539             s->lambda= s->last_lambda_for[s->pict_type];
3540         else
3541             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3542         update_qscale(s);
3543     }
3544
3545     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3546         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3547         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3548         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3549         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3550     }
3551
3552     s->mb_intra=0; //for the rate distortion & bit compare functions
3553     for(i=1; i<context_count; i++){
3554         ret = ff_update_duplicate_context(s->thread_context[i], s);
3555         if (ret < 0)
3556             return ret;
3557     }
3558
3559     if(ff_init_me(s)<0)
3560         return -1;
3561
3562     /* Estimate motion for every MB */
3563     if(s->pict_type != AV_PICTURE_TYPE_I){
3564         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3565         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3566         if (s->pict_type != AV_PICTURE_TYPE_B) {
3567             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3568                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3569             }
3570         }
3571
3572         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3573     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3574         /* I-Frame */
3575         for(i=0; i<s->mb_stride*s->mb_height; i++)
3576             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3577
3578         if(!s->fixed_qscale){
3579             /* finding spatial complexity for I-frame rate control */
3580             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3581         }
3582     }
3583     for(i=1; i<context_count; i++){
3584         merge_context_after_me(s, s->thread_context[i]);
3585     }
3586     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3587     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3588     emms_c();
3589
3590     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3591         s->pict_type= AV_PICTURE_TYPE_I;
3592         for(i=0; i<s->mb_stride*s->mb_height; i++)
3593             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3594         if(s->msmpeg4_version >= 3)
3595             s->no_rounding=1;
3596         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3597                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3598     }
3599
3600     if(!s->umvplus){
3601         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3602             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3603
3604             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3605                 int a,b;
3606                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3607                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3608                 s->f_code= FFMAX3(s->f_code, a, b);
3609             }
3610
3611             ff_fix_long_p_mvs(s);
3612             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3613             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3614                 int j;
3615                 for(i=0; i<2; i++){
3616                     for(j=0; j<2; j++)
3617                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3618                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3619                 }
3620             }
3621         }
3622
3623         if(s->pict_type==AV_PICTURE_TYPE_B){
3624             int a, b;
3625
3626             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3627             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3628             s->f_code = FFMAX(a, b);
3629
3630             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3631             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3632             s->b_code = FFMAX(a, b);
3633
3634             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3635             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3636             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3637             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3638             if (s->avctx->flags & CODEC_FLAG_INTERLACED_ME) {
3639                 int dir, j;
3640                 for(dir=0; dir<2; dir++){
3641                     for(i=0; i<2; i++){
3642                         for(j=0; j<2; j++){
3643                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3644                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3645                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3646                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3647                         }
3648                     }
3649                 }
3650             }
3651         }
3652     }
3653
3654     if (estimate_qp(s, 0) < 0)
3655         return -1;
3656
3657     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3658         s->pict_type == AV_PICTURE_TYPE_I &&
3659         !(s->avctx->flags & CODEC_FLAG_QSCALE))
3660         s->qscale= 3; //reduce clipping problems
3661
3662     if (s->out_format == FMT_MJPEG) {
3663         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3664         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3665
3666         if (s->avctx->intra_matrix) {
3667             chroma_matrix =
3668             luma_matrix = s->avctx->intra_matrix;
3669         }
3670         if (s->avctx->chroma_intra_matrix)
3671             chroma_matrix = s->avctx->chroma_intra_matrix;
3672
3673         /* for mjpeg, we do include qscale in the matrix */
3674         for(i=1;i<64;i++){
3675             int j = s->idsp.idct_permutation[i];
3676
3677             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3678             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3679         }
3680         s->y_dc_scale_table=
3681         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3682         s->chroma_intra_matrix[0] =
3683         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3684         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3685                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3686         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3687                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3688         s->qscale= 8;
3689     }
3690     if(s->codec_id == AV_CODEC_ID_AMV){
3691         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3692         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3693         for(i=1;i<64;i++){
3694             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3695
3696             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3697             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3698         }
3699         s->y_dc_scale_table= y;
3700         s->c_dc_scale_table= c;
3701         s->intra_matrix[0] = 13;
3702         s->chroma_intra_matrix[0] = 14;
3703         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3704                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3705         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3706                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3707         s->qscale= 8;
3708     }
3709
3710     //FIXME var duplication
3711     s->current_picture_ptr->f->key_frame =
3712     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3713     s->current_picture_ptr->f->pict_type =
3714     s->current_picture.f->pict_type = s->pict_type;
3715
3716     if (s->current_picture.f->key_frame)
3717         s->picture_in_gop_number=0;
3718
3719     s->mb_x = s->mb_y = 0;
3720     s->last_bits= put_bits_count(&s->pb);
3721     switch(s->out_format) {
3722     case FMT_MJPEG:
3723         if (CONFIG_MJPEG_ENCODER)
3724             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3725                                            s->intra_matrix, s->chroma_intra_matrix);
3726         break;
3727     case FMT_H261:
3728         if (CONFIG_H261_ENCODER)
3729             ff_h261_encode_picture_header(s, picture_number);
3730         break;
3731     case FMT_H263:
3732         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3733             ff_wmv2_encode_picture_header(s, picture_number);
3734         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3735             ff_msmpeg4_encode_picture_header(s, picture_number);
3736         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3737             ff_mpeg4_encode_picture_header(s, picture_number);
3738         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3739             ret = ff_rv10_encode_picture_header(s, picture_number);
3740             if (ret < 0)
3741                 return ret;
3742         }
3743         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3744             ff_rv20_encode_picture_header(s, picture_number);
3745         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3746             ff_flv_encode_picture_header(s, picture_number);
3747         else if (CONFIG_H263_ENCODER)
3748             ff_h263_encode_picture_header(s, picture_number);
3749         break;
3750     case FMT_MPEG1:
3751         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3752             ff_mpeg1_encode_picture_header(s, picture_number);
3753         break;
3754     default:
3755         av_assert0(0);
3756     }
3757     bits= put_bits_count(&s->pb);
3758     s->header_bits= bits - s->last_bits;
3759
3760     for(i=1; i<context_count; i++){
3761         update_duplicate_context_after_me(s->thread_context[i], s);
3762     }
3763     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3764     for(i=1; i<context_count; i++){
3765         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3766             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3767         merge_context_after_encode(s, s->thread_context[i]);
3768     }
3769     emms_c();
3770     return 0;
3771 }
3772
3773 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3774     const int intra= s->mb_intra;
3775     int i;
3776
3777     s->dct_count[intra]++;
3778
3779     for(i=0; i<64; i++){
3780         int level= block[i];
3781
3782         if(level){
3783             if(level>0){
3784                 s->dct_error_sum[intra][i] += level;
3785                 level -= s->dct_offset[intra][i];
3786                 if(level<0) level=0;
3787             }else{
3788                 s->dct_error_sum[intra][i] -= level;
3789                 level += s->dct_offset[intra][i];
3790                 if(level>0) level=0;
3791             }
3792             block[i]= level;
3793         }
3794     }
3795 }
3796
3797 static int dct_quantize_trellis_c(MpegEncContext *s,
3798                                   int16_t *block, int n,
3799                                   int qscale, int *overflow){
3800     const int *qmat;
3801     const uint16_t *matrix;
3802     const uint8_t *scantable= s->intra_scantable.scantable;
3803     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3804     int max=0;
3805     unsigned int threshold1, threshold2;
3806     int bias=0;
3807     int run_tab[65];
3808     int level_tab[65];
3809     int score_tab[65];
3810     int survivor[65];
3811     int survivor_count;
3812     int last_run=0;
3813     int last_level=0;
3814     int last_score= 0;
3815     int last_i;
3816     int coeff[2][64];
3817     int coeff_count[64];
3818     int qmul, qadd, start_i, last_non_zero, i, dc;
3819     const int esc_length= s->ac_esc_length;
3820     uint8_t * length;
3821     uint8_t * last_length;
3822     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3823
3824     s->fdsp.fdct(block);
3825
3826     if(s->dct_error_sum)
3827         s->denoise_dct(s, block);
3828     qmul= qscale*16;
3829     qadd= ((qscale-1)|1)*8;
3830
3831     if (s->mb_intra) {
3832         int q;
3833         if (!s->h263_aic) {
3834             if (n < 4)
3835                 q = s->y_dc_scale;
3836             else
3837                 q = s->c_dc_scale;
3838             q = q << 3;
3839         } else{
3840             /* For AIC we skip quant/dequant of INTRADC */
3841             q = 1 << 3;
3842             qadd=0;
3843         }
3844
3845         /* note: block[0] is assumed to be positive */
3846         block[0] = (block[0] + (q >> 1)) / q;
3847         start_i = 1;
3848         last_non_zero = 0;
3849         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3850         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3851         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3852             bias= 1<<(QMAT_SHIFT-1);
3853
3854         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3855             length     = s->intra_chroma_ac_vlc_length;
3856             last_length= s->intra_chroma_ac_vlc_last_length;
3857         } else {
3858             length     = s->intra_ac_vlc_length;
3859             last_length= s->intra_ac_vlc_last_length;
3860         }
3861     } else {
3862         start_i = 0;
3863         last_non_zero = -1;
3864         qmat = s->q_inter_matrix[qscale];
3865         matrix = s->inter_matrix;
3866         length     = s->inter_ac_vlc_length;
3867         last_length= s->inter_ac_vlc_last_length;
3868     }
3869     last_i= start_i;
3870
3871     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3872     threshold2= (threshold1<<1);
3873
3874     for(i=63; i>=start_i; i--) {
3875         const int j = scantable[i];
3876         int level = block[j] * qmat[j];
3877
3878         if(((unsigned)(level+threshold1))>threshold2){
3879             last_non_zero = i;
3880             break;
3881         }
3882     }
3883
3884     for(i=start_i; i<=last_non_zero; i++) {
3885         const int j = scantable[i];
3886         int level = block[j] * qmat[j];
3887
3888 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3889 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3890         if(((unsigned)(level+threshold1))>threshold2){
3891             if(level>0){
3892                 level= (bias + level)>>QMAT_SHIFT;
3893                 coeff[0][i]= level;
3894                 coeff[1][i]= level-1;
3895 //                coeff[2][k]= level-2;
3896             }else{
3897                 level= (bias - level)>>QMAT_SHIFT;
3898                 coeff[0][i]= -level;
3899                 coeff[1][i]= -level+1;
3900 //                coeff[2][k]= -level+2;
3901             }
3902             coeff_count[i]= FFMIN(level, 2);
3903             av_assert2(coeff_count[i]);
3904             max |=level;
3905         }else{
3906             coeff[0][i]= (level>>31)|1;
3907             coeff_count[i]= 1;
3908         }
3909     }
3910
3911     *overflow= s->max_qcoeff < max; //overflow might have happened
3912
3913     if(last_non_zero < start_i){
3914         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3915         return last_non_zero;
3916     }
3917
3918     score_tab[start_i]= 0;
3919     survivor[0]= start_i;
3920     survivor_count= 1;
3921
3922     for(i=start_i; i<=last_non_zero; i++){
3923         int level_index, j, zero_distortion;
3924         int dct_coeff= FFABS(block[ scantable[i] ]);
3925         int best_score=256*256*256*120;
3926
3927         if (s->fdsp.fdct == ff_fdct_ifast)
3928             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3929         zero_distortion= dct_coeff*dct_coeff;
3930
3931         for(level_index=0; level_index < coeff_count[i]; level_index++){
3932             int distortion;
3933             int level= coeff[level_index][i];
3934             const int alevel= FFABS(level);
3935             int unquant_coeff;
3936
3937             av_assert2(level);
3938
3939             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3940                 unquant_coeff= alevel*qmul + qadd;
3941             } else if(s->out_format == FMT_MJPEG) {
3942                 j = s->idsp.idct_permutation[scantable[i]];
3943                 unquant_coeff = alevel * matrix[j] * 8;
3944             }else{ //MPEG1
3945                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3946                 if(s->mb_intra){
3947                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3948                         unquant_coeff =   (unquant_coeff - 1) | 1;
3949                 }else{
3950                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3951                         unquant_coeff =   (unquant_coeff - 1) | 1;
3952                 }
3953                 unquant_coeff<<= 3;
3954             }
3955
3956             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3957             level+=64;
3958             if((level&(~127)) == 0){
3959                 for(j=survivor_count-1; j>=0; j--){
3960                     int run= i - survivor[j];
3961                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3962                     score += score_tab[i-run];
3963
3964                     if(score < best_score){
3965                         best_score= score;
3966                         run_tab[i+1]= run;
3967                         level_tab[i+1]= level-64;
3968                     }
3969                 }
3970
3971                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3972                     for(j=survivor_count-1; j>=0; j--){
3973                         int run= i - survivor[j];
3974                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3975                         score += score_tab[i-run];
3976                         if(score < last_score){
3977                             last_score= score;
3978                             last_run= run;
3979                             last_level= level-64;
3980                             last_i= i+1;
3981                         }
3982                     }
3983                 }
3984             }else{
3985                 distortion += esc_length*lambda;
3986                 for(j=survivor_count-1; j>=0; j--){
3987                     int run= i - survivor[j];
3988                     int score= distortion + score_tab[i-run];
3989
3990                     if(score < best_score){
3991                         best_score= score;
3992                         run_tab[i+1]= run;
3993                         level_tab[i+1]= level-64;
3994                     }
3995                 }
3996
3997                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3998                   for(j=survivor_count-1; j>=0; j--){
3999                         int run= i - survivor[j];
4000                         int score= distortion + score_tab[i-run];
4001                         if(score < last_score){
4002                             last_score= score;
4003                             last_run= run;
4004                             last_level= level-64;
4005                             last_i= i+1;
4006                         }
4007                     }
4008                 }
4009             }
4010         }
4011
4012         score_tab[i+1]= best_score;
4013
4014         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4015         if(last_non_zero <= 27){
4016             for(; survivor_count; survivor_count--){
4017                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4018                     break;
4019             }
4020         }else{
4021             for(; survivor_count; survivor_count--){
4022                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4023                     break;
4024             }
4025         }
4026
4027         survivor[ survivor_count++ ]= i+1;
4028     }
4029
4030     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4031         last_score= 256*256*256*120;
4032         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4033             int score= score_tab[i];
4034             if(i) score += lambda*2; //FIXME exacter?
4035
4036             if(score < last_score){
4037                 last_score= score;
4038                 last_i= i;
4039                 last_level= level_tab[i];
4040                 last_run= run_tab[i];
4041             }
4042         }
4043     }
4044
4045     s->coded_score[n] = last_score;
4046
4047     dc= FFABS(block[0]);
4048     last_non_zero= last_i - 1;
4049     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4050
4051     if(last_non_zero < start_i)
4052         return last_non_zero;
4053
4054     if(last_non_zero == 0 && start_i == 0){
4055         int best_level= 0;
4056         int best_score= dc * dc;
4057
4058         for(i=0; i<coeff_count[0]; i++){
4059             int level= coeff[i][0];
4060             int alevel= FFABS(level);
4061             int unquant_coeff, score, distortion;
4062
4063             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4064                     unquant_coeff= (alevel*qmul + qadd)>>3;
4065             }else{ //MPEG1
4066                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4067                     unquant_coeff =   (unquant_coeff - 1) | 1;
4068             }
4069             unquant_coeff = (unquant_coeff + 4) >> 3;
4070             unquant_coeff<<= 3 + 3;
4071
4072             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4073             level+=64;
4074             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4075             else                    score= distortion + esc_length*lambda;
4076
4077             if(score < best_score){
4078                 best_score= score;
4079                 best_level= level - 64;
4080             }
4081         }
4082         block[0]= best_level;
4083         s->coded_score[n] = best_score - dc*dc;
4084         if(best_level == 0) return -1;
4085         else                return last_non_zero;
4086     }
4087
4088     i= last_i;
4089     av_assert2(last_level);
4090
4091     block[ perm_scantable[last_non_zero] ]= last_level;
4092     i -= last_run + 1;
4093
4094     for(; i>start_i; i -= run_tab[i] + 1){
4095         block[ perm_scantable[i-1] ]= level_tab[i];
4096     }
4097
4098     return last_non_zero;
4099 }
4100
4101 //#define REFINE_STATS 1
4102 static int16_t basis[64][64];
4103
4104 static void build_basis(uint8_t *perm){
4105     int i, j, x, y;
4106     emms_c();
4107     for(i=0; i<8; i++){
4108         for(j=0; j<8; j++){
4109             for(y=0; y<8; y++){
4110                 for(x=0; x<8; x++){
4111                     double s= 0.25*(1<<BASIS_SHIFT);
4112                     int index= 8*i + j;
4113                     int perm_index= perm[index];
4114                     if(i==0) s*= sqrt(0.5);
4115                     if(j==0) s*= sqrt(0.5);
4116                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4117                 }
4118             }
4119         }
4120     }
4121 }
4122
4123 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4124                         int16_t *block, int16_t *weight, int16_t *orig,
4125                         int n, int qscale){
4126     int16_t rem[64];
4127     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4128     const uint8_t *scantable= s->intra_scantable.scantable;
4129     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4130 //    unsigned int threshold1, threshold2;
4131 //    int bias=0;
4132     int run_tab[65];
4133     int prev_run=0;
4134     int prev_level=0;
4135     int qmul, qadd, start_i, last_non_zero, i, dc;
4136     uint8_t * length;
4137     uint8_t * last_length;
4138     int lambda;
4139     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4140 #ifdef REFINE_STATS
4141 static int count=0;
4142 static int after_last=0;
4143 static int to_zero=0;
4144 static int from_zero=0;
4145 static int raise=0;
4146 static int lower=0;
4147 static int messed_sign=0;
4148 #endif
4149
4150     if(basis[0][0] == 0)
4151         build_basis(s->idsp.idct_permutation);
4152
4153     qmul= qscale*2;
4154     qadd= (qscale-1)|1;
4155     if (s->mb_intra) {
4156         if (!s->h263_aic) {
4157             if (n < 4)
4158                 q = s->y_dc_scale;
4159             else
4160                 q = s->c_dc_scale;
4161         } else{
4162             /* For AIC we skip quant/dequant of INTRADC */
4163             q = 1;
4164             qadd=0;
4165         }
4166         q <<= RECON_SHIFT-3;
4167         /* note: block[0] is assumed to be positive */
4168         dc= block[0]*q;
4169 //        block[0] = (block[0] + (q >> 1)) / q;
4170         start_i = 1;
4171 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4172 //            bias= 1<<(QMAT_SHIFT-1);
4173         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4174             length     = s->intra_chroma_ac_vlc_length;
4175             last_length= s->intra_chroma_ac_vlc_last_length;
4176         } else {
4177             length     = s->intra_ac_vlc_length;
4178             last_length= s->intra_ac_vlc_last_length;
4179         }
4180     } else {
4181         dc= 0;
4182         start_i = 0;
4183         length     = s->inter_ac_vlc_length;
4184         last_length= s->inter_ac_vlc_last_length;
4185     }
4186     last_non_zero = s->block_last_index[n];
4187
4188 #ifdef REFINE_STATS
4189 {START_TIMER
4190 #endif
4191     dc += (1<<(RECON_SHIFT-1));
4192     for(i=0; i<64; i++){
4193         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4194     }
4195 #ifdef REFINE_STATS
4196 STOP_TIMER("memset rem[]")}
4197 #endif
4198     sum=0;
4199     for(i=0; i<64; i++){
4200         int one= 36;
4201         int qns=4;
4202         int w;
4203
4204         w= FFABS(weight[i]) + qns*one;
4205         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4206
4207         weight[i] = w;
4208 //        w=weight[i] = (63*qns + (w/2)) / w;
4209
4210         av_assert2(w>0);
4211         av_assert2(w<(1<<6));
4212         sum += w*w;
4213     }
4214     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4215 #ifdef REFINE_STATS
4216 {START_TIMER
4217 #endif
4218     run=0;
4219     rle_index=0;
4220     for(i=start_i; i<=last_non_zero; i++){
4221         int j= perm_scantable[i];
4222         const int level= block[j];
4223         int coeff;
4224
4225         if(level){
4226             if(level<0) coeff= qmul*level - qadd;
4227             else        coeff= qmul*level + qadd;
4228             run_tab[rle_index++]=run;
4229             run=0;
4230
4231             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4232         }else{
4233             run++;
4234         }
4235     }
4236 #ifdef REFINE_STATS
4237 if(last_non_zero>0){
4238 STOP_TIMER("init rem[]")
4239 }
4240 }
4241
4242 {START_TIMER
4243 #endif
4244     for(;;){
4245         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4246         int best_coeff=0;
4247         int best_change=0;
4248         int run2, best_unquant_change=0, analyze_gradient;
4249 #ifdef REFINE_STATS
4250 {START_TIMER
4251 #endif
4252         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4253
4254         if(analyze_gradient){
4255 #ifdef REFINE_STATS
4256 {START_TIMER
4257 #endif
4258             for(i=0; i<64; i++){
4259                 int w= weight[i];
4260
4261                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4262             }
4263 #ifdef REFINE_STATS
4264 STOP_TIMER("rem*w*w")}
4265 {START_TIMER
4266 #endif
4267             s->fdsp.fdct(d1);
4268 #ifdef REFINE_STATS
4269 STOP_TIMER("dct")}
4270 #endif
4271         }
4272
4273         if(start_i){
4274             const int level= block[0];
4275             int change, old_coeff;
4276
4277             av_assert2(s->mb_intra);
4278
4279             old_coeff= q*level;
4280
4281             for(change=-1; change<=1; change+=2){
4282                 int new_level= level + change;
4283                 int score, new_coeff;
4284
4285                 new_coeff= q*new_level;
4286                 if(new_coeff >= 2048 || new_coeff < 0)
4287                     continue;
4288
4289                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4290                                                   new_coeff - old_coeff);
4291                 if(score<best_score){
4292                     best_score= score;
4293                     best_coeff= 0;
4294                     best_change= change;
4295                     best_unquant_change= new_coeff - old_coeff;
4296                 }
4297             }
4298         }
4299
4300         run=0;
4301         rle_index=0;
4302         run2= run_tab[rle_index++];
4303         prev_level=0;
4304         prev_run=0;
4305
4306         for(i=start_i; i<64; i++){
4307             int j= perm_scantable[i];
4308             const int level= block[j];
4309             int change, old_coeff;
4310
4311             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4312                 break;
4313
4314             if(level){
4315                 if(level<0) old_coeff= qmul*level - qadd;
4316                 else        old_coeff= qmul*level + qadd;
4317                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4318             }else{
4319                 old_coeff=0;
4320                 run2--;
4321                 av_assert2(run2>=0 || i >= last_non_zero );
4322             }
4323
4324             for(change=-1; change<=1; change+=2){
4325                 int new_level= level + change;
4326                 int score, new_coeff, unquant_change;
4327
4328                 score=0;
4329                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4330                    continue;
4331
4332                 if(new_level){
4333                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4334                     else            new_coeff= qmul*new_level + qadd;
4335                     if(new_coeff >= 2048 || new_coeff <= -2048)
4336                         continue;
4337                     //FIXME check for overflow
4338
4339                     if(level){
4340                         if(level < 63 && level > -63){
4341                             if(i < last_non_zero)
4342                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4343                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4344                             else
4345                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4346                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4347                         }
4348                     }else{
4349                         av_assert2(FFABS(new_level)==1);
4350
4351                         if(analyze_gradient){
4352                             int g= d1[ scantable[i] ];
4353                             if(g && (g^new_level) >= 0)
4354                                 continue;
4355                         }
4356
4357                         if(i < last_non_zero){
4358                             int next_i= i + run2 + 1;
4359                             int next_level= block[ perm_scantable[next_i] ] + 64;
4360
4361                             if(next_level&(~127))
4362                                 next_level= 0;
4363
4364                             if(next_i < last_non_zero)
4365                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4366                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4367                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4368                             else
4369                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4370                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4371                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4372                         }else{
4373                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4374                             if(prev_level){
4375                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4376                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4377                             }
4378                         }
4379                     }
4380                 }else{
4381                     new_coeff=0;
4382                     av_assert2(FFABS(level)==1);
4383
4384                     if(i < last_non_zero){
4385                         int next_i= i + run2 + 1;
4386                         int next_level= block[ perm_scantable[next_i] ] + 64;
4387
4388                         if(next_level&(~127))
4389                             next_level= 0;
4390
4391                         if(next_i < last_non_zero)
4392                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4393                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4394                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4395                         else
4396                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4397                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4398                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4399                     }else{
4400                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4401                         if(prev_level){
4402                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4403                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4404                         }
4405                     }
4406                 }
4407
4408                 score *= lambda;
4409
4410                 unquant_change= new_coeff - old_coeff;
4411                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4412
4413                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4414                                                    unquant_change);
4415                 if(score<best_score){
4416                     best_score= score;
4417                     best_coeff= i;
4418                     best_change= change;
4419                     best_unquant_change= unquant_change;
4420                 }
4421             }
4422             if(level){
4423                 prev_level= level + 64;
4424                 if(prev_level&(~127))
4425                     prev_level= 0;
4426                 prev_run= run;
4427                 run=0;
4428             }else{
4429                 run++;
4430             }
4431         }
4432 #ifdef REFINE_STATS
4433 STOP_TIMER("iterative step")}
4434 #endif
4435
4436         if(best_change){
4437             int j= perm_scantable[ best_coeff ];
4438
4439             block[j] += best_change;
4440
4441             if(best_coeff > last_non_zero){
4442                 last_non_zero= best_coeff;
4443                 av_assert2(block[j]);
4444 #ifdef REFINE_STATS
4445 after_last++;
4446 #endif
4447             }else{
4448 #ifdef REFINE_STATS
4449 if(block[j]){
4450     if(block[j] - best_change){
4451         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4452             raise++;
4453         }else{
4454             lower++;
4455         }
4456     }else{
4457         from_zero++;
4458     }
4459 }else{
4460     to_zero++;
4461 }
4462 #endif
4463                 for(; last_non_zero>=start_i; last_non_zero--){
4464                     if(block[perm_scantable[last_non_zero]])
4465                         break;
4466                 }
4467             }
4468 #ifdef REFINE_STATS
4469 count++;
4470 if(256*256*256*64 % count == 0){
4471     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4472 }
4473 #endif
4474             run=0;
4475             rle_index=0;
4476             for(i=start_i; i<=last_non_zero; i++){
4477                 int j= perm_scantable[i];
4478                 const int level= block[j];
4479
4480                  if(level){
4481                      run_tab[rle_index++]=run;
4482                      run=0;
4483                  }else{
4484                      run++;
4485                  }
4486             }
4487
4488             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4489         }else{
4490             break;
4491         }
4492     }
4493 #ifdef REFINE_STATS
4494 if(last_non_zero>0){
4495 STOP_TIMER("iterative search")
4496 }
4497 }
4498 #endif
4499
4500     return last_non_zero;
4501 }
4502
4503 /**
4504  * Permute an 8x8 block according to permuatation.
4505  * @param block the block which will be permuted according to
4506  *              the given permutation vector
4507  * @param permutation the permutation vector
4508  * @param last the last non zero coefficient in scantable order, used to
4509  *             speed the permutation up
4510  * @param scantable the used scantable, this is only used to speed the
4511  *                  permutation up, the block is not (inverse) permutated
4512  *                  to scantable order!
4513  */
4514 static void block_permute(int16_t *block, uint8_t *permutation,
4515                           const uint8_t *scantable, int last)
4516 {
4517     int i;
4518     int16_t temp[64];
4519
4520     if (last <= 0)
4521         return;
4522     //FIXME it is ok but not clean and might fail for some permutations
4523     // if (permutation[1] == 1)
4524     // return;
4525
4526     for (i = 0; i <= last; i++) {
4527         const int j = scantable[i];
4528         temp[j] = block[j];
4529         block[j] = 0;
4530     }
4531
4532     for (i = 0; i <= last; i++) {
4533         const int j = scantable[i];
4534         const int perm_j = permutation[j];
4535         block[perm_j] = temp[j];
4536     }
4537 }
4538
4539 int ff_dct_quantize_c(MpegEncContext *s,
4540                         int16_t *block, int n,
4541                         int qscale, int *overflow)
4542 {
4543     int i, j, level, last_non_zero, q, start_i;
4544     const int *qmat;
4545     const uint8_t *scantable= s->intra_scantable.scantable;
4546     int bias;
4547     int max=0;
4548     unsigned int threshold1, threshold2;
4549
4550     s->fdsp.fdct(block);
4551
4552     if(s->dct_error_sum)
4553         s->denoise_dct(s, block);
4554
4555     if (s->mb_intra) {
4556         if (!s->h263_aic) {
4557             if (n < 4)
4558                 q = s->y_dc_scale;
4559             else
4560                 q = s->c_dc_scale;
4561             q = q << 3;
4562         } else
4563             /* For AIC we skip quant/dequant of INTRADC */
4564             q = 1 << 3;
4565
4566         /* note: block[0] is assumed to be positive */
4567         block[0] = (block[0] + (q >> 1)) / q;
4568         start_i = 1;
4569         last_non_zero = 0;
4570         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4571         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4572     } else {
4573         start_i = 0;
4574         last_non_zero = -1;
4575         qmat = s->q_inter_matrix[qscale];
4576         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4577     }
4578     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4579     threshold2= (threshold1<<1);
4580     for(i=63;i>=start_i;i--) {
4581         j = scantable[i];
4582         level = block[j] * qmat[j];
4583
4584         if(((unsigned)(level+threshold1))>threshold2){
4585             last_non_zero = i;
4586             break;
4587         }else{
4588             block[j]=0;
4589         }
4590     }
4591     for(i=start_i; i<=last_non_zero; i++) {
4592         j = scantable[i];
4593         level = block[j] * qmat[j];
4594
4595 //        if(   bias+level >= (1<<QMAT_SHIFT)
4596 //           || bias-level >= (1<<QMAT_SHIFT)){
4597         if(((unsigned)(level+threshold1))>threshold2){
4598             if(level>0){
4599                 level= (bias + level)>>QMAT_SHIFT;
4600                 block[j]= level;
4601             }else{
4602                 level= (bias - level)>>QMAT_SHIFT;
4603                 block[j]= -level;
4604             }
4605             max |=level;
4606         }else{
4607             block[j]=0;
4608         }
4609     }
4610     *overflow= s->max_qcoeff < max; //overflow might have happened
4611
4612     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4613     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4614         block_permute(block, s->idsp.idct_permutation,
4615                       scantable, last_non_zero);
4616
4617     return last_non_zero;
4618 }
4619
4620 #define OFFSET(x) offsetof(MpegEncContext, x)
4621 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4622 static const AVOption h263_options[] = {
4623     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4624     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4625     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4626     FF_MPV_COMMON_OPTS
4627     { NULL },
4628 };
4629
4630 static const AVClass h263_class = {
4631     .class_name = "H.263 encoder",
4632     .item_name  = av_default_item_name,
4633     .option     = h263_options,
4634     .version    = LIBAVUTIL_VERSION_INT,
4635 };
4636
4637 AVCodec ff_h263_encoder = {
4638     .name           = "h263",
4639     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4640     .type           = AVMEDIA_TYPE_VIDEO,
4641     .id             = AV_CODEC_ID_H263,
4642     .priv_data_size = sizeof(MpegEncContext),
4643     .init           = ff_mpv_encode_init,
4644     .encode2        = ff_mpv_encode_picture,
4645     .close          = ff_mpv_encode_end,
4646     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4647     .priv_class     = &h263_class,
4648 };
4649
4650 static const AVOption h263p_options[] = {
4651     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4652     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4653     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4654     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4655     FF_MPV_COMMON_OPTS
4656     { NULL },
4657 };
4658 static const AVClass h263p_class = {
4659     .class_name = "H.263p encoder",
4660     .item_name  = av_default_item_name,
4661     .option     = h263p_options,
4662     .version    = LIBAVUTIL_VERSION_INT,
4663 };
4664
4665 AVCodec ff_h263p_encoder = {
4666     .name           = "h263p",
4667     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4668     .type           = AVMEDIA_TYPE_VIDEO,
4669     .id             = AV_CODEC_ID_H263P,
4670     .priv_data_size = sizeof(MpegEncContext),
4671     .init           = ff_mpv_encode_init,
4672     .encode2        = ff_mpv_encode_picture,
4673     .close          = ff_mpv_encode_end,
4674     .capabilities   = CODEC_CAP_SLICE_THREADS,
4675     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4676     .priv_class     = &h263p_class,
4677 };
4678
4679 static const AVClass msmpeg4v2_class = {
4680     .class_name = "msmpeg4v2 encoder",
4681     .item_name  = av_default_item_name,
4682     .option     = ff_mpv_generic_options,
4683     .version    = LIBAVUTIL_VERSION_INT,
4684 };
4685
4686 AVCodec ff_msmpeg4v2_encoder = {
4687     .name           = "msmpeg4v2",
4688     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4689     .type           = AVMEDIA_TYPE_VIDEO,
4690     .id             = AV_CODEC_ID_MSMPEG4V2,
4691     .priv_data_size = sizeof(MpegEncContext),
4692     .init           = ff_mpv_encode_init,
4693     .encode2        = ff_mpv_encode_picture,
4694     .close          = ff_mpv_encode_end,
4695     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4696     .priv_class     = &msmpeg4v2_class,
4697 };
4698
4699 static const AVClass msmpeg4v3_class = {
4700     .class_name = "msmpeg4v3 encoder",
4701     .item_name  = av_default_item_name,
4702     .option     = ff_mpv_generic_options,
4703     .version    = LIBAVUTIL_VERSION_INT,
4704 };
4705
4706 AVCodec ff_msmpeg4v3_encoder = {
4707     .name           = "msmpeg4",
4708     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4709     .type           = AVMEDIA_TYPE_VIDEO,
4710     .id             = AV_CODEC_ID_MSMPEG4V3,
4711     .priv_data_size = sizeof(MpegEncContext),
4712     .init           = ff_mpv_encode_init,
4713     .encode2        = ff_mpv_encode_picture,
4714     .close          = ff_mpv_encode_end,
4715     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4716     .priv_class     = &msmpeg4v3_class,
4717 };
4718
4719 static const AVClass wmv1_class = {
4720     .class_name = "wmv1 encoder",
4721     .item_name  = av_default_item_name,
4722     .option     = ff_mpv_generic_options,
4723     .version    = LIBAVUTIL_VERSION_INT,
4724 };
4725
4726 AVCodec ff_wmv1_encoder = {
4727     .name           = "wmv1",
4728     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4729     .type           = AVMEDIA_TYPE_VIDEO,
4730     .id             = AV_CODEC_ID_WMV1,
4731     .priv_data_size = sizeof(MpegEncContext),
4732     .init           = ff_mpv_encode_init,
4733     .encode2        = ff_mpv_encode_picture,
4734     .close          = ff_mpv_encode_end,
4735     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4736     .priv_class     = &wmv1_class,
4737 };