]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit 'b5c1c16247ab7d166c84eaf4564e49a1535fdaaf'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "mpegvideodata.h"
44 #include "h261.h"
45 #include "h263.h"
46 #include "h263data.h"
47 #include "mjpegenc_common.h"
48 #include "mathops.h"
49 #include "mpegutils.h"
50 #include "mjpegenc.h"
51 #include "msmpeg4.h"
52 #include "pixblockdsp.h"
53 #include "qpeldsp.h"
54 #include "faandct.h"
55 #include "thread.h"
56 #include "aandcttab.h"
57 #include "flv.h"
58 #include "mpeg4video.h"
59 #include "internal.h"
60 #include "bytestream.h"
61 #include "wmv2.h"
62 #include "rv10.h"
63 #include <limits.h>
64 #include "sp5x.h"
65
66 #define QUANT_BIAS_SHIFT 8
67
68 #define QMAT_SHIFT_MMX 16
69 #define QMAT_SHIFT 21
70
71 static int encode_picture(MpegEncContext *s, int picture_number);
72 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
73 static int sse_mb(MpegEncContext *s);
74 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
75 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
76
77 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
78 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
79
80 const AVOption ff_mpv_generic_options[] = {
81     FF_MPV_COMMON_OPTS
82     { NULL },
83 };
84
85 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
86                        uint16_t (*qmat16)[2][64],
87                        const uint16_t *quant_matrix,
88                        int bias, int qmin, int qmax, int intra)
89 {
90     FDCTDSPContext *fdsp = &s->fdsp;
91     int qscale;
92     int shift = 0;
93
94     for (qscale = qmin; qscale <= qmax; qscale++) {
95         int i;
96         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
97 #if CONFIG_FAANDCT
98             fdsp->fdct == ff_faandct            ||
99 #endif /* CONFIG_FAANDCT */
100             fdsp->fdct == ff_jpeg_fdct_islow_10) {
101             for (i = 0; i < 64; i++) {
102                 const int j = s->idsp.idct_permutation[i];
103                 int64_t den = (int64_t) qscale * quant_matrix[j];
104                 /* 16 <= qscale * quant_matrix[i] <= 7905
105                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
106                  *             19952 <=              x  <= 249205026
107                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
108                  *           3444240 >= (1 << 36) / (x) >= 275 */
109
110                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
111             }
112         } else if (fdsp->fdct == ff_fdct_ifast) {
113             for (i = 0; i < 64; i++) {
114                 const int j = s->idsp.idct_permutation[i];
115                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
116                 /* 16 <= qscale * quant_matrix[i] <= 7905
117                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
118                  *             19952 <=              x  <= 249205026
119                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
120                  *           3444240 >= (1 << 36) / (x) >= 275 */
121
122                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
123             }
124         } else {
125             for (i = 0; i < 64; i++) {
126                 const int j = s->idsp.idct_permutation[i];
127                 int64_t den = (int64_t) qscale * quant_matrix[j];
128                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
129                  * Assume x = qscale * quant_matrix[i]
130                  * So             16 <=              x  <= 7905
131                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
132                  * so          32768 >= (1 << 19) / (x) >= 67 */
133                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
134                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
135                 //                    (qscale * quant_matrix[i]);
136                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
137
138                 if (qmat16[qscale][0][i] == 0 ||
139                     qmat16[qscale][0][i] == 128 * 256)
140                     qmat16[qscale][0][i] = 128 * 256 - 1;
141                 qmat16[qscale][1][i] =
142                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
143                                 qmat16[qscale][0][i]);
144             }
145         }
146
147         for (i = intra; i < 64; i++) {
148             int64_t max = 8191;
149             if (fdsp->fdct == ff_fdct_ifast) {
150                 max = (8191LL * ff_aanscales[i]) >> 14;
151             }
152             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
153                 shift++;
154             }
155         }
156     }
157     if (shift) {
158         av_log(NULL, AV_LOG_INFO,
159                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
160                QMAT_SHIFT - shift);
161     }
162 }
163
164 static inline void update_qscale(MpegEncContext *s)
165 {
166     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
167                 (FF_LAMBDA_SHIFT + 7);
168     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
169
170     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
171                  FF_LAMBDA_SHIFT;
172 }
173
174 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
175 {
176     int i;
177
178     if (matrix) {
179         put_bits(pb, 1, 1);
180         for (i = 0; i < 64; i++) {
181             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
182         }
183     } else
184         put_bits(pb, 1, 0);
185 }
186
187 /**
188  * init s->current_picture.qscale_table from s->lambda_table
189  */
190 void ff_init_qscale_tab(MpegEncContext *s)
191 {
192     int8_t * const qscale_table = s->current_picture.qscale_table;
193     int i;
194
195     for (i = 0; i < s->mb_num; i++) {
196         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
197         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
198         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
199                                                   s->avctx->qmax);
200     }
201 }
202
203 static void update_duplicate_context_after_me(MpegEncContext *dst,
204                                               MpegEncContext *src)
205 {
206 #define COPY(a) dst->a= src->a
207     COPY(pict_type);
208     COPY(current_picture);
209     COPY(f_code);
210     COPY(b_code);
211     COPY(qscale);
212     COPY(lambda);
213     COPY(lambda2);
214     COPY(picture_in_gop_number);
215     COPY(gop_picture_number);
216     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
217     COPY(progressive_frame);    // FIXME don't set in encode_header
218     COPY(partitioned_frame);    // FIXME don't set in encode_header
219 #undef COPY
220 }
221
222 /**
223  * Set the given MpegEncContext to defaults for encoding.
224  * the changed fields will not depend upon the prior state of the MpegEncContext.
225  */
226 static void mpv_encode_defaults(MpegEncContext *s)
227 {
228     int i;
229     ff_mpv_common_defaults(s);
230
231     for (i = -16; i < 16; i++) {
232         default_fcode_tab[i + MAX_MV] = 1;
233     }
234     s->me.mv_penalty = default_mv_penalty;
235     s->fcode_tab     = default_fcode_tab;
236
237     s->input_picture_number  = 0;
238     s->picture_in_gop_number = 0;
239 }
240
241 av_cold int ff_dct_encode_init(MpegEncContext *s) {
242     if (ARCH_X86)
243         ff_dct_encode_init_x86(s);
244
245     if (CONFIG_H263_ENCODER)
246         ff_h263dsp_init(&s->h263dsp);
247     if (!s->dct_quantize)
248         s->dct_quantize = ff_dct_quantize_c;
249     if (!s->denoise_dct)
250         s->denoise_dct  = denoise_dct_c;
251     s->fast_dct_quantize = s->dct_quantize;
252     if (s->avctx->trellis)
253         s->dct_quantize  = dct_quantize_trellis_c;
254
255     return 0;
256 }
257
258 /* init video encoder */
259 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
260 {
261     MpegEncContext *s = avctx->priv_data;
262     int i, ret, format_supported;
263
264     mpv_encode_defaults(s);
265
266     switch (avctx->codec_id) {
267     case AV_CODEC_ID_MPEG2VIDEO:
268         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
269             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
270             av_log(avctx, AV_LOG_ERROR,
271                    "only YUV420 and YUV422 are supported\n");
272             return -1;
273         }
274         break;
275     case AV_CODEC_ID_MJPEG:
276     case AV_CODEC_ID_AMV:
277         format_supported = 0;
278         /* JPEG color space */
279         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
280             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
281             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
282             (avctx->color_range == AVCOL_RANGE_JPEG &&
283              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
284               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
285               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
286             format_supported = 1;
287         /* MPEG color space */
288         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
289                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
290                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
291                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
292             format_supported = 1;
293
294         if (!format_supported) {
295             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
296             return -1;
297         }
298         break;
299     default:
300         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
301             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
302             return -1;
303         }
304     }
305
306     switch (avctx->pix_fmt) {
307     case AV_PIX_FMT_YUVJ444P:
308     case AV_PIX_FMT_YUV444P:
309         s->chroma_format = CHROMA_444;
310         break;
311     case AV_PIX_FMT_YUVJ422P:
312     case AV_PIX_FMT_YUV422P:
313         s->chroma_format = CHROMA_422;
314         break;
315     case AV_PIX_FMT_YUVJ420P:
316     case AV_PIX_FMT_YUV420P:
317     default:
318         s->chroma_format = CHROMA_420;
319         break;
320     }
321
322     s->bit_rate = avctx->bit_rate;
323     s->width    = avctx->width;
324     s->height   = avctx->height;
325     if (avctx->gop_size > 600 &&
326         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
327         av_log(avctx, AV_LOG_WARNING,
328                "keyframe interval too large!, reducing it from %d to %d\n",
329                avctx->gop_size, 600);
330         avctx->gop_size = 600;
331     }
332     s->gop_size     = avctx->gop_size;
333     s->avctx        = avctx;
334     if (avctx->max_b_frames > MAX_B_FRAMES) {
335         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
336                "is %d.\n", MAX_B_FRAMES);
337         avctx->max_b_frames = MAX_B_FRAMES;
338     }
339     s->max_b_frames = avctx->max_b_frames;
340     s->codec_id     = avctx->codec->id;
341     s->strict_std_compliance = avctx->strict_std_compliance;
342     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
343     s->mpeg_quant         = avctx->mpeg_quant;
344     s->rtp_mode           = !!avctx->rtp_payload_size;
345     s->intra_dc_precision = avctx->intra_dc_precision;
346
347     // workaround some differences between how applications specify dc precision
348     if (s->intra_dc_precision < 0) {
349         s->intra_dc_precision += 8;
350     } else if (s->intra_dc_precision >= 8)
351         s->intra_dc_precision -= 8;
352
353     if (s->intra_dc_precision < 0) {
354         av_log(avctx, AV_LOG_ERROR,
355                 "intra dc precision must be positive, note some applications use"
356                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
357         return AVERROR(EINVAL);
358     }
359
360     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
361         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
362         return AVERROR(EINVAL);
363     }
364     s->user_specified_pts = AV_NOPTS_VALUE;
365
366     if (s->gop_size <= 1) {
367         s->intra_only = 1;
368         s->gop_size   = 12;
369     } else {
370         s->intra_only = 0;
371     }
372
373 #if FF_API_MOTION_EST
374 FF_DISABLE_DEPRECATION_WARNINGS
375     s->me_method = avctx->me_method;
376 FF_ENABLE_DEPRECATION_WARNINGS
377 #endif
378
379     /* Fixed QSCALE */
380     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
381
382 #if FF_API_MPV_OPT
383     FF_DISABLE_DEPRECATION_WARNINGS
384     if (avctx->border_masking != 0.0)
385         s->border_masking = avctx->border_masking;
386     FF_ENABLE_DEPRECATION_WARNINGS
387 #endif
388
389     s->adaptive_quant = (s->avctx->lumi_masking ||
390                          s->avctx->dark_masking ||
391                          s->avctx->temporal_cplx_masking ||
392                          s->avctx->spatial_cplx_masking  ||
393                          s->avctx->p_masking      ||
394                          s->border_masking ||
395                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
396                         !s->fixed_qscale;
397
398     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
399
400     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
401         switch(avctx->codec_id) {
402         case AV_CODEC_ID_MPEG1VIDEO:
403         case AV_CODEC_ID_MPEG2VIDEO:
404             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
405             break;
406         case AV_CODEC_ID_MPEG4:
407         case AV_CODEC_ID_MSMPEG4V1:
408         case AV_CODEC_ID_MSMPEG4V2:
409         case AV_CODEC_ID_MSMPEG4V3:
410             if       (avctx->rc_max_rate >= 15000000) {
411                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
412             } else if(avctx->rc_max_rate >=  2000000) {
413                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
414             } else if(avctx->rc_max_rate >=   384000) {
415                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
416             } else
417                 avctx->rc_buffer_size = 40;
418             avctx->rc_buffer_size *= 16384;
419             break;
420         }
421         if (avctx->rc_buffer_size) {
422             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
423         }
424     }
425
426     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
427         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
428         return -1;
429     }
430
431     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
432         av_log(avctx, AV_LOG_INFO,
433                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
434     }
435
436     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
437         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
438         return -1;
439     }
440
441     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
442         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
443         return -1;
444     }
445
446     if (avctx->rc_max_rate &&
447         avctx->rc_max_rate == avctx->bit_rate &&
448         avctx->rc_max_rate != avctx->rc_min_rate) {
449         av_log(avctx, AV_LOG_INFO,
450                "impossible bitrate constraints, this will fail\n");
451     }
452
453     if (avctx->rc_buffer_size &&
454         avctx->bit_rate * (int64_t)avctx->time_base.num >
455             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
456         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
457         return -1;
458     }
459
460     if (!s->fixed_qscale &&
461         avctx->bit_rate * av_q2d(avctx->time_base) >
462             avctx->bit_rate_tolerance) {
463         av_log(avctx, AV_LOG_WARNING,
464                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
465         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
466     }
467
468     if (s->avctx->rc_max_rate &&
469         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
470         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
471          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
472         90000LL * (avctx->rc_buffer_size - 1) >
473             s->avctx->rc_max_rate * 0xFFFFLL) {
474         av_log(avctx, AV_LOG_INFO,
475                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
476                "specified vbv buffer is too large for the given bitrate!\n");
477     }
478
479     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
480         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
481         s->codec_id != AV_CODEC_ID_FLV1) {
482         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
483         return -1;
484     }
485
486     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
487         av_log(avctx, AV_LOG_ERROR,
488                "OBMC is only supported with simple mb decision\n");
489         return -1;
490     }
491
492     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
493         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
494         return -1;
495     }
496
497     if (s->max_b_frames                    &&
498         s->codec_id != AV_CODEC_ID_MPEG4      &&
499         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
500         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
501         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
502         return -1;
503     }
504     if (s->max_b_frames < 0) {
505         av_log(avctx, AV_LOG_ERROR,
506                "max b frames must be 0 or positive for mpegvideo based encoders\n");
507         return -1;
508     }
509
510     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
511          s->codec_id == AV_CODEC_ID_H263  ||
512          s->codec_id == AV_CODEC_ID_H263P) &&
513         (avctx->sample_aspect_ratio.num > 255 ||
514          avctx->sample_aspect_ratio.den > 255)) {
515         av_log(avctx, AV_LOG_WARNING,
516                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
517                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
518         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
519                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
520     }
521
522     if ((s->codec_id == AV_CODEC_ID_H263  ||
523          s->codec_id == AV_CODEC_ID_H263P) &&
524         (avctx->width  > 2048 ||
525          avctx->height > 1152 )) {
526         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
527         return -1;
528     }
529     if ((s->codec_id == AV_CODEC_ID_H263  ||
530          s->codec_id == AV_CODEC_ID_H263P) &&
531         ((avctx->width &3) ||
532          (avctx->height&3) )) {
533         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
534         return -1;
535     }
536
537     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
538         (avctx->width  > 4095 ||
539          avctx->height > 4095 )) {
540         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
541         return -1;
542     }
543
544     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
545         (avctx->width  > 16383 ||
546          avctx->height > 16383 )) {
547         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
548         return -1;
549     }
550
551     if (s->codec_id == AV_CODEC_ID_RV10 &&
552         (avctx->width &15 ||
553          avctx->height&15 )) {
554         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
555         return AVERROR(EINVAL);
556     }
557
558     if (s->codec_id == AV_CODEC_ID_RV20 &&
559         (avctx->width &3 ||
560          avctx->height&3 )) {
561         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
562         return AVERROR(EINVAL);
563     }
564
565     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
566          s->codec_id == AV_CODEC_ID_WMV2) &&
567          avctx->width & 1) {
568          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
569          return -1;
570     }
571
572     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
573         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
574         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
575         return -1;
576     }
577
578     // FIXME mpeg2 uses that too
579     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
580                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
581         av_log(avctx, AV_LOG_ERROR,
582                "mpeg2 style quantization not supported by codec\n");
583         return -1;
584     }
585
586     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
587         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
588         return -1;
589     }
590
591     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
592         s->avctx->mb_decision != FF_MB_DECISION_RD) {
593         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
594         return -1;
595     }
596
597     if (s->avctx->scenechange_threshold < 1000000000 &&
598         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
599         av_log(avctx, AV_LOG_ERROR,
600                "closed gop with scene change detection are not supported yet, "
601                "set threshold to 1000000000\n");
602         return -1;
603     }
604
605     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
606         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
607             av_log(avctx, AV_LOG_ERROR,
608                   "low delay forcing is only available for mpeg2\n");
609             return -1;
610         }
611         if (s->max_b_frames != 0) {
612             av_log(avctx, AV_LOG_ERROR,
613                    "b frames cannot be used with low delay\n");
614             return -1;
615         }
616     }
617
618     if (s->q_scale_type == 1) {
619         if (avctx->qmax > 12) {
620             av_log(avctx, AV_LOG_ERROR,
621                    "non linear quant only supports qmax <= 12 currently\n");
622             return -1;
623         }
624     }
625
626     if (s->avctx->thread_count > 1         &&
627         s->codec_id != AV_CODEC_ID_MPEG4      &&
628         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
629         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
630         s->codec_id != AV_CODEC_ID_MJPEG      &&
631         (s->codec_id != AV_CODEC_ID_H263P)) {
632         av_log(avctx, AV_LOG_ERROR,
633                "multi threaded encoding not supported by codec\n");
634         return -1;
635     }
636
637     if (s->avctx->thread_count < 1) {
638         av_log(avctx, AV_LOG_ERROR,
639                "automatic thread number detection not supported by codec, "
640                "patch welcome\n");
641         return -1;
642     }
643
644     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
645         s->rtp_mode = 1;
646
647     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
648         s->h263_slice_structured = 1;
649
650     if (!avctx->time_base.den || !avctx->time_base.num) {
651         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
652         return -1;
653     }
654
655     if (avctx->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
656         av_log(avctx, AV_LOG_INFO,
657                "notice: b_frame_strategy only affects the first pass\n");
658         avctx->b_frame_strategy = 0;
659     }
660
661     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
662     if (i > 1) {
663         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
664         avctx->time_base.den /= i;
665         avctx->time_base.num /= i;
666         //return -1;
667     }
668
669     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
670         // (a + x * 3 / 8) / x
671         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
672         s->inter_quant_bias = 0;
673     } else {
674         s->intra_quant_bias = 0;
675         // (a - x / 4) / x
676         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
677     }
678
679     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
680         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
681         return AVERROR(EINVAL);
682     }
683
684 #if FF_API_QUANT_BIAS
685 FF_DISABLE_DEPRECATION_WARNINGS
686     if (s->intra_quant_bias == FF_DEFAULT_QUANT_BIAS &&
687         avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
688         s->intra_quant_bias = avctx->intra_quant_bias;
689     if (s->inter_quant_bias == FF_DEFAULT_QUANT_BIAS &&
690         avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
691         s->inter_quant_bias = avctx->inter_quant_bias;
692 FF_ENABLE_DEPRECATION_WARNINGS
693 #endif
694
695     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
696
697     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
698         s->avctx->time_base.den > (1 << 16) - 1) {
699         av_log(avctx, AV_LOG_ERROR,
700                "timebase %d/%d not supported by MPEG 4 standard, "
701                "the maximum admitted value for the timebase denominator "
702                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
703                (1 << 16) - 1);
704         return -1;
705     }
706     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
707
708     switch (avctx->codec->id) {
709     case AV_CODEC_ID_MPEG1VIDEO:
710         s->out_format = FMT_MPEG1;
711         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
712         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
713         break;
714     case AV_CODEC_ID_MPEG2VIDEO:
715         s->out_format = FMT_MPEG1;
716         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
717         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
718         s->rtp_mode   = 1;
719         break;
720     case AV_CODEC_ID_MJPEG:
721     case AV_CODEC_ID_AMV:
722         s->out_format = FMT_MJPEG;
723         s->intra_only = 1; /* force intra only for jpeg */
724         if (!CONFIG_MJPEG_ENCODER ||
725             ff_mjpeg_encode_init(s) < 0)
726             return -1;
727         avctx->delay = 0;
728         s->low_delay = 1;
729         break;
730     case AV_CODEC_ID_H261:
731         if (!CONFIG_H261_ENCODER)
732             return -1;
733         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
734             av_log(avctx, AV_LOG_ERROR,
735                    "The specified picture size of %dx%d is not valid for the "
736                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
737                     s->width, s->height);
738             return -1;
739         }
740         s->out_format = FMT_H261;
741         avctx->delay  = 0;
742         s->low_delay  = 1;
743         s->rtp_mode   = 0; /* Sliced encoding not supported */
744         break;
745     case AV_CODEC_ID_H263:
746         if (!CONFIG_H263_ENCODER)
747             return -1;
748         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
749                              s->width, s->height) == 8) {
750             av_log(avctx, AV_LOG_ERROR,
751                    "The specified picture size of %dx%d is not valid for "
752                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
753                    "352x288, 704x576, and 1408x1152. "
754                    "Try H.263+.\n", s->width, s->height);
755             return -1;
756         }
757         s->out_format = FMT_H263;
758         avctx->delay  = 0;
759         s->low_delay  = 1;
760         break;
761     case AV_CODEC_ID_H263P:
762         s->out_format = FMT_H263;
763         s->h263_plus  = 1;
764         /* Fx */
765         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
766         s->modified_quant  = s->h263_aic;
767         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
768         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
769
770         /* /Fx */
771         /* These are just to be sure */
772         avctx->delay = 0;
773         s->low_delay = 1;
774         break;
775     case AV_CODEC_ID_FLV1:
776         s->out_format      = FMT_H263;
777         s->h263_flv        = 2; /* format = 1; 11-bit codes */
778         s->unrestricted_mv = 1;
779         s->rtp_mode  = 0; /* don't allow GOB */
780         avctx->delay = 0;
781         s->low_delay = 1;
782         break;
783     case AV_CODEC_ID_RV10:
784         s->out_format = FMT_H263;
785         avctx->delay  = 0;
786         s->low_delay  = 1;
787         break;
788     case AV_CODEC_ID_RV20:
789         s->out_format      = FMT_H263;
790         avctx->delay       = 0;
791         s->low_delay       = 1;
792         s->modified_quant  = 1;
793         s->h263_aic        = 1;
794         s->h263_plus       = 1;
795         s->loop_filter     = 1;
796         s->unrestricted_mv = 0;
797         break;
798     case AV_CODEC_ID_MPEG4:
799         s->out_format      = FMT_H263;
800         s->h263_pred       = 1;
801         s->unrestricted_mv = 1;
802         s->low_delay       = s->max_b_frames ? 0 : 1;
803         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
804         break;
805     case AV_CODEC_ID_MSMPEG4V2:
806         s->out_format      = FMT_H263;
807         s->h263_pred       = 1;
808         s->unrestricted_mv = 1;
809         s->msmpeg4_version = 2;
810         avctx->delay       = 0;
811         s->low_delay       = 1;
812         break;
813     case AV_CODEC_ID_MSMPEG4V3:
814         s->out_format        = FMT_H263;
815         s->h263_pred         = 1;
816         s->unrestricted_mv   = 1;
817         s->msmpeg4_version   = 3;
818         s->flipflop_rounding = 1;
819         avctx->delay         = 0;
820         s->low_delay         = 1;
821         break;
822     case AV_CODEC_ID_WMV1:
823         s->out_format        = FMT_H263;
824         s->h263_pred         = 1;
825         s->unrestricted_mv   = 1;
826         s->msmpeg4_version   = 4;
827         s->flipflop_rounding = 1;
828         avctx->delay         = 0;
829         s->low_delay         = 1;
830         break;
831     case AV_CODEC_ID_WMV2:
832         s->out_format        = FMT_H263;
833         s->h263_pred         = 1;
834         s->unrestricted_mv   = 1;
835         s->msmpeg4_version   = 5;
836         s->flipflop_rounding = 1;
837         avctx->delay         = 0;
838         s->low_delay         = 1;
839         break;
840     default:
841         return -1;
842     }
843
844     avctx->has_b_frames = !s->low_delay;
845
846     s->encoding = 1;
847
848     s->progressive_frame    =
849     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
850                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
851                                 s->alternate_scan);
852
853     /* init */
854     ff_mpv_idct_init(s);
855     if (ff_mpv_common_init(s) < 0)
856         return -1;
857
858     ff_fdctdsp_init(&s->fdsp, avctx);
859     ff_me_cmp_init(&s->mecc, avctx);
860     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
861     ff_pixblockdsp_init(&s->pdsp, avctx);
862     ff_qpeldsp_init(&s->qdsp);
863
864     if (s->msmpeg4_version) {
865         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
866                           2 * 2 * (MAX_LEVEL + 1) *
867                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
868     }
869     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
870
871     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
872     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
873     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
874     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
875     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
876     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
877     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
878                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
879     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
880                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
881
882     if (s->avctx->noise_reduction) {
883         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
884                           2 * 64 * sizeof(uint16_t), fail);
885     }
886
887     ff_dct_encode_init(s);
888
889     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
890         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
891
892     s->quant_precision = 5;
893
894     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
895     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
896
897     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
898         ff_h261_encode_init(s);
899     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
900         ff_h263_encode_init(s);
901     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
902         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
903             return ret;
904     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
905         && s->out_format == FMT_MPEG1)
906         ff_mpeg1_encode_init(s);
907
908     /* init q matrix */
909     for (i = 0; i < 64; i++) {
910         int j = s->idsp.idct_permutation[i];
911         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
912             s->mpeg_quant) {
913             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
914             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
915         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
916             s->intra_matrix[j] =
917             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
918         } else {
919             /* mpeg1/2 */
920             s->chroma_intra_matrix[j] =
921             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
922             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
923         }
924         if (s->avctx->intra_matrix)
925             s->intra_matrix[j] = s->avctx->intra_matrix[i];
926         if (s->avctx->inter_matrix)
927             s->inter_matrix[j] = s->avctx->inter_matrix[i];
928     }
929
930     /* precompute matrix */
931     /* for mjpeg, we do include qscale in the matrix */
932     if (s->out_format != FMT_MJPEG) {
933         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
934                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
935                           31, 1);
936         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
937                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
938                           31, 0);
939     }
940
941     if (ff_rate_control_init(s) < 0)
942         return -1;
943
944 #if FF_API_ERROR_RATE
945     FF_DISABLE_DEPRECATION_WARNINGS
946     if (avctx->error_rate)
947         s->error_rate = avctx->error_rate;
948     FF_ENABLE_DEPRECATION_WARNINGS;
949 #endif
950
951 #if FF_API_NORMALIZE_AQP
952     FF_DISABLE_DEPRECATION_WARNINGS
953     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
954         s->mpv_flags |= FF_MPV_FLAG_NAQ;
955     FF_ENABLE_DEPRECATION_WARNINGS;
956 #endif
957
958 #if FF_API_MV0
959     FF_DISABLE_DEPRECATION_WARNINGS
960     if (avctx->flags & CODEC_FLAG_MV0)
961         s->mpv_flags |= FF_MPV_FLAG_MV0;
962     FF_ENABLE_DEPRECATION_WARNINGS
963 #endif
964
965 #if FF_API_MPV_OPT
966     FF_DISABLE_DEPRECATION_WARNINGS
967     if (avctx->rc_qsquish != 0.0)
968         s->rc_qsquish = avctx->rc_qsquish;
969     if (avctx->rc_qmod_amp != 0.0)
970         s->rc_qmod_amp = avctx->rc_qmod_amp;
971     if (avctx->rc_qmod_freq)
972         s->rc_qmod_freq = avctx->rc_qmod_freq;
973     if (avctx->rc_buffer_aggressivity != 1.0)
974         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
975     if (avctx->rc_initial_cplx != 0.0)
976         s->rc_initial_cplx = avctx->rc_initial_cplx;
977     if (avctx->lmin)
978         s->lmin = avctx->lmin;
979     if (avctx->lmax)
980         s->lmax = avctx->lmax;
981
982     if (avctx->rc_eq) {
983         av_freep(&s->rc_eq);
984         s->rc_eq = av_strdup(avctx->rc_eq);
985         if (!s->rc_eq)
986             return AVERROR(ENOMEM);
987     }
988     FF_ENABLE_DEPRECATION_WARNINGS
989 #endif
990
991     if (avctx->b_frame_strategy == 2) {
992         for (i = 0; i < s->max_b_frames + 2; i++) {
993             s->tmp_frames[i] = av_frame_alloc();
994             if (!s->tmp_frames[i])
995                 return AVERROR(ENOMEM);
996
997             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
998             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
999             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
1000
1001             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
1002             if (ret < 0)
1003                 return ret;
1004         }
1005     }
1006
1007     return 0;
1008 fail:
1009     ff_mpv_encode_end(avctx);
1010     return AVERROR_UNKNOWN;
1011 }
1012
1013 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1014 {
1015     MpegEncContext *s = avctx->priv_data;
1016     int i;
1017
1018     ff_rate_control_uninit(s);
1019
1020     ff_mpv_common_end(s);
1021     if (CONFIG_MJPEG_ENCODER &&
1022         s->out_format == FMT_MJPEG)
1023         ff_mjpeg_encode_close(s);
1024
1025     av_freep(&avctx->extradata);
1026
1027     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1028         av_frame_free(&s->tmp_frames[i]);
1029
1030     ff_free_picture_tables(&s->new_picture);
1031     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1032
1033     av_freep(&s->avctx->stats_out);
1034     av_freep(&s->ac_stats);
1035
1036     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1037     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1038     s->q_chroma_intra_matrix=   NULL;
1039     s->q_chroma_intra_matrix16= NULL;
1040     av_freep(&s->q_intra_matrix);
1041     av_freep(&s->q_inter_matrix);
1042     av_freep(&s->q_intra_matrix16);
1043     av_freep(&s->q_inter_matrix16);
1044     av_freep(&s->input_picture);
1045     av_freep(&s->reordered_input_picture);
1046     av_freep(&s->dct_offset);
1047
1048     return 0;
1049 }
1050
1051 static int get_sae(uint8_t *src, int ref, int stride)
1052 {
1053     int x,y;
1054     int acc = 0;
1055
1056     for (y = 0; y < 16; y++) {
1057         for (x = 0; x < 16; x++) {
1058             acc += FFABS(src[x + y * stride] - ref);
1059         }
1060     }
1061
1062     return acc;
1063 }
1064
1065 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1066                            uint8_t *ref, int stride)
1067 {
1068     int x, y, w, h;
1069     int acc = 0;
1070
1071     w = s->width  & ~15;
1072     h = s->height & ~15;
1073
1074     for (y = 0; y < h; y += 16) {
1075         for (x = 0; x < w; x += 16) {
1076             int offset = x + y * stride;
1077             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1078                                       stride, 16);
1079             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1080             int sae  = get_sae(src + offset, mean, stride);
1081
1082             acc += sae + 500 < sad;
1083         }
1084     }
1085     return acc;
1086 }
1087
1088 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1089 {
1090     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1091                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1092                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1093                             &s->linesize, &s->uvlinesize);
1094 }
1095
1096 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1097 {
1098     Picture *pic = NULL;
1099     int64_t pts;
1100     int i, display_picture_number = 0, ret;
1101     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1102                                                  (s->low_delay ? 0 : 1);
1103     int direct = 1;
1104
1105     if (pic_arg) {
1106         pts = pic_arg->pts;
1107         display_picture_number = s->input_picture_number++;
1108
1109         if (pts != AV_NOPTS_VALUE) {
1110             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1111                 int64_t last = s->user_specified_pts;
1112
1113                 if (pts <= last) {
1114                     av_log(s->avctx, AV_LOG_ERROR,
1115                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1116                            pts, last);
1117                     return AVERROR(EINVAL);
1118                 }
1119
1120                 if (!s->low_delay && display_picture_number == 1)
1121                     s->dts_delta = pts - last;
1122             }
1123             s->user_specified_pts = pts;
1124         } else {
1125             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1126                 s->user_specified_pts =
1127                 pts = s->user_specified_pts + 1;
1128                 av_log(s->avctx, AV_LOG_INFO,
1129                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1130                        pts);
1131             } else {
1132                 pts = display_picture_number;
1133             }
1134         }
1135     }
1136
1137     if (pic_arg) {
1138         if (!pic_arg->buf[0] ||
1139             pic_arg->linesize[0] != s->linesize ||
1140             pic_arg->linesize[1] != s->uvlinesize ||
1141             pic_arg->linesize[2] != s->uvlinesize)
1142             direct = 0;
1143         if ((s->width & 15) || (s->height & 15))
1144             direct = 0;
1145         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1146             direct = 0;
1147         if (s->linesize & (STRIDE_ALIGN-1))
1148             direct = 0;
1149
1150         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1151                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1152
1153         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1154         if (i < 0)
1155             return i;
1156
1157         pic = &s->picture[i];
1158         pic->reference = 3;
1159
1160         if (direct) {
1161             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1162                 return ret;
1163         }
1164         ret = alloc_picture(s, pic, direct);
1165         if (ret < 0)
1166             return ret;
1167
1168         if (!direct) {
1169             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1170                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1171                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1172                 // empty
1173             } else {
1174                 int h_chroma_shift, v_chroma_shift;
1175                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1176                                                  &h_chroma_shift,
1177                                                  &v_chroma_shift);
1178
1179                 for (i = 0; i < 3; i++) {
1180                     int src_stride = pic_arg->linesize[i];
1181                     int dst_stride = i ? s->uvlinesize : s->linesize;
1182                     int h_shift = i ? h_chroma_shift : 0;
1183                     int v_shift = i ? v_chroma_shift : 0;
1184                     int w = s->width  >> h_shift;
1185                     int h = s->height >> v_shift;
1186                     uint8_t *src = pic_arg->data[i];
1187                     uint8_t *dst = pic->f->data[i];
1188                     int vpad = 16;
1189
1190                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1191                         && !s->progressive_sequence
1192                         && FFALIGN(s->height, 32) - s->height > 16)
1193                         vpad = 32;
1194
1195                     if (!s->avctx->rc_buffer_size)
1196                         dst += INPLACE_OFFSET;
1197
1198                     if (src_stride == dst_stride)
1199                         memcpy(dst, src, src_stride * h);
1200                     else {
1201                         int h2 = h;
1202                         uint8_t *dst2 = dst;
1203                         while (h2--) {
1204                             memcpy(dst2, src, w);
1205                             dst2 += dst_stride;
1206                             src += src_stride;
1207                         }
1208                     }
1209                     if ((s->width & 15) || (s->height & (vpad-1))) {
1210                         s->mpvencdsp.draw_edges(dst, dst_stride,
1211                                                 w, h,
1212                                                 16 >> h_shift,
1213                                                 vpad >> v_shift,
1214                                                 EDGE_BOTTOM);
1215                     }
1216                 }
1217             }
1218         }
1219         ret = av_frame_copy_props(pic->f, pic_arg);
1220         if (ret < 0)
1221             return ret;
1222
1223         pic->f->display_picture_number = display_picture_number;
1224         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1225     }
1226
1227     /* shift buffer entries */
1228     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1229         s->input_picture[i - 1] = s->input_picture[i];
1230
1231     s->input_picture[encoding_delay] = (Picture*) pic;
1232
1233     return 0;
1234 }
1235
1236 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1237 {
1238     int x, y, plane;
1239     int score = 0;
1240     int64_t score64 = 0;
1241
1242     for (plane = 0; plane < 3; plane++) {
1243         const int stride = p->f->linesize[plane];
1244         const int bw = plane ? 1 : 2;
1245         for (y = 0; y < s->mb_height * bw; y++) {
1246             for (x = 0; x < s->mb_width * bw; x++) {
1247                 int off = p->shared ? 0 : 16;
1248                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1249                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1250                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1251
1252                 switch (FFABS(s->avctx->frame_skip_exp)) {
1253                 case 0: score    =  FFMAX(score, v);          break;
1254                 case 1: score   += FFABS(v);                  break;
1255                 case 2: score64 += v * (int64_t)v;                       break;
1256                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1257                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1258                 }
1259             }
1260         }
1261     }
1262     emms_c();
1263
1264     if (score)
1265         score64 = score;
1266     if (s->avctx->frame_skip_exp < 0)
1267         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1268                       -1.0/s->avctx->frame_skip_exp);
1269
1270     if (score64 < s->avctx->frame_skip_threshold)
1271         return 1;
1272     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1273         return 1;
1274     return 0;
1275 }
1276
1277 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1278 {
1279     AVPacket pkt = { 0 };
1280     int ret, got_output;
1281
1282     av_init_packet(&pkt);
1283     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1284     if (ret < 0)
1285         return ret;
1286
1287     ret = pkt.size;
1288     av_free_packet(&pkt);
1289     return ret;
1290 }
1291
1292 static int estimate_best_b_count(MpegEncContext *s)
1293 {
1294     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1295     AVCodecContext *c = avcodec_alloc_context3(NULL);
1296     const int scale = s->avctx->brd_scale;
1297     int i, j, out_size, p_lambda, b_lambda, lambda2;
1298     int64_t best_rd  = INT64_MAX;
1299     int best_b_count = -1;
1300
1301     if (!c)
1302         return AVERROR(ENOMEM);
1303     av_assert0(scale >= 0 && scale <= 3);
1304
1305     //emms_c();
1306     //s->next_picture_ptr->quality;
1307     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1308     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1309     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1310     if (!b_lambda) // FIXME we should do this somewhere else
1311         b_lambda = p_lambda;
1312     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1313                FF_LAMBDA_SHIFT;
1314
1315     c->width        = s->width  >> scale;
1316     c->height       = s->height >> scale;
1317     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1318     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1319     c->mb_decision  = s->avctx->mb_decision;
1320     c->me_cmp       = s->avctx->me_cmp;
1321     c->mb_cmp       = s->avctx->mb_cmp;
1322     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1323     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1324     c->time_base    = s->avctx->time_base;
1325     c->max_b_frames = s->max_b_frames;
1326
1327     if (avcodec_open2(c, codec, NULL) < 0)
1328         return -1;
1329
1330     for (i = 0; i < s->max_b_frames + 2; i++) {
1331         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1332                                                 s->next_picture_ptr;
1333         uint8_t *data[4];
1334
1335         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1336             pre_input = *pre_input_ptr;
1337             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1338
1339             if (!pre_input.shared && i) {
1340                 data[0] += INPLACE_OFFSET;
1341                 data[1] += INPLACE_OFFSET;
1342                 data[2] += INPLACE_OFFSET;
1343             }
1344
1345             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1346                                        s->tmp_frames[i]->linesize[0],
1347                                        data[0],
1348                                        pre_input.f->linesize[0],
1349                                        c->width, c->height);
1350             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1351                                        s->tmp_frames[i]->linesize[1],
1352                                        data[1],
1353                                        pre_input.f->linesize[1],
1354                                        c->width >> 1, c->height >> 1);
1355             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1356                                        s->tmp_frames[i]->linesize[2],
1357                                        data[2],
1358                                        pre_input.f->linesize[2],
1359                                        c->width >> 1, c->height >> 1);
1360         }
1361     }
1362
1363     for (j = 0; j < s->max_b_frames + 1; j++) {
1364         int64_t rd = 0;
1365
1366         if (!s->input_picture[j])
1367             break;
1368
1369         c->error[0] = c->error[1] = c->error[2] = 0;
1370
1371         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1372         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1373
1374         out_size = encode_frame(c, s->tmp_frames[0]);
1375
1376         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1377
1378         for (i = 0; i < s->max_b_frames + 1; i++) {
1379             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1380
1381             s->tmp_frames[i + 1]->pict_type = is_p ?
1382                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1383             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1384
1385             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1386
1387             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1388         }
1389
1390         /* get the delayed frames */
1391         while (out_size) {
1392             out_size = encode_frame(c, NULL);
1393             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1394         }
1395
1396         rd += c->error[0] + c->error[1] + c->error[2];
1397
1398         if (rd < best_rd) {
1399             best_rd = rd;
1400             best_b_count = j;
1401         }
1402     }
1403
1404     avcodec_close(c);
1405     av_freep(&c);
1406
1407     return best_b_count;
1408 }
1409
1410 static int select_input_picture(MpegEncContext *s)
1411 {
1412     int i, ret;
1413
1414     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1415         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1416     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1417
1418     /* set next picture type & ordering */
1419     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1420         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1421             if (s->picture_in_gop_number < s->gop_size &&
1422                 s->next_picture_ptr &&
1423                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1424                 // FIXME check that te gop check above is +-1 correct
1425                 av_frame_unref(s->input_picture[0]->f);
1426
1427                 ff_vbv_update(s, 0);
1428
1429                 goto no_output_pic;
1430             }
1431         }
1432
1433         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1434             !s->next_picture_ptr || s->intra_only) {
1435             s->reordered_input_picture[0] = s->input_picture[0];
1436             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1437             s->reordered_input_picture[0]->f->coded_picture_number =
1438                 s->coded_picture_number++;
1439         } else {
1440             int b_frames;
1441
1442             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1443                 for (i = 0; i < s->max_b_frames + 1; i++) {
1444                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1445
1446                     if (pict_num >= s->rc_context.num_entries)
1447                         break;
1448                     if (!s->input_picture[i]) {
1449                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1450                         break;
1451                     }
1452
1453                     s->input_picture[i]->f->pict_type =
1454                         s->rc_context.entry[pict_num].new_pict_type;
1455                 }
1456             }
1457
1458             if (s->avctx->b_frame_strategy == 0) {
1459                 b_frames = s->max_b_frames;
1460                 while (b_frames && !s->input_picture[b_frames])
1461                     b_frames--;
1462             } else if (s->avctx->b_frame_strategy == 1) {
1463                 for (i = 1; i < s->max_b_frames + 1; i++) {
1464                     if (s->input_picture[i] &&
1465                         s->input_picture[i]->b_frame_score == 0) {
1466                         s->input_picture[i]->b_frame_score =
1467                             get_intra_count(s,
1468                                             s->input_picture[i    ]->f->data[0],
1469                                             s->input_picture[i - 1]->f->data[0],
1470                                             s->linesize) + 1;
1471                     }
1472                 }
1473                 for (i = 0; i < s->max_b_frames + 1; i++) {
1474                     if (!s->input_picture[i] ||
1475                         s->input_picture[i]->b_frame_score - 1 >
1476                             s->mb_num / s->avctx->b_sensitivity)
1477                         break;
1478                 }
1479
1480                 b_frames = FFMAX(0, i - 1);
1481
1482                 /* reset scores */
1483                 for (i = 0; i < b_frames + 1; i++) {
1484                     s->input_picture[i]->b_frame_score = 0;
1485                 }
1486             } else if (s->avctx->b_frame_strategy == 2) {
1487                 b_frames = estimate_best_b_count(s);
1488             } else {
1489                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1490                 b_frames = 0;
1491             }
1492
1493             emms_c();
1494
1495             for (i = b_frames - 1; i >= 0; i--) {
1496                 int type = s->input_picture[i]->f->pict_type;
1497                 if (type && type != AV_PICTURE_TYPE_B)
1498                     b_frames = i;
1499             }
1500             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1501                 b_frames == s->max_b_frames) {
1502                 av_log(s->avctx, AV_LOG_ERROR,
1503                        "warning, too many b frames in a row\n");
1504             }
1505
1506             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1507                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1508                     s->gop_size > s->picture_in_gop_number) {
1509                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1510                 } else {
1511                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1512                         b_frames = 0;
1513                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1514                 }
1515             }
1516
1517             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1518                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1519                 b_frames--;
1520
1521             s->reordered_input_picture[0] = s->input_picture[b_frames];
1522             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1523                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1524             s->reordered_input_picture[0]->f->coded_picture_number =
1525                 s->coded_picture_number++;
1526             for (i = 0; i < b_frames; i++) {
1527                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1528                 s->reordered_input_picture[i + 1]->f->pict_type =
1529                     AV_PICTURE_TYPE_B;
1530                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1531                     s->coded_picture_number++;
1532             }
1533         }
1534     }
1535 no_output_pic:
1536     if (s->reordered_input_picture[0]) {
1537         s->reordered_input_picture[0]->reference =
1538            s->reordered_input_picture[0]->f->pict_type !=
1539                AV_PICTURE_TYPE_B ? 3 : 0;
1540
1541         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1542         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1543             return ret;
1544
1545         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1546             // input is a shared pix, so we can't modifiy it -> alloc a new
1547             // one & ensure that the shared one is reuseable
1548
1549             Picture *pic;
1550             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1551             if (i < 0)
1552                 return i;
1553             pic = &s->picture[i];
1554
1555             pic->reference = s->reordered_input_picture[0]->reference;
1556             if (alloc_picture(s, pic, 0) < 0) {
1557                 return -1;
1558             }
1559
1560             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1561             if (ret < 0)
1562                 return ret;
1563
1564             /* mark us unused / free shared pic */
1565             av_frame_unref(s->reordered_input_picture[0]->f);
1566             s->reordered_input_picture[0]->shared = 0;
1567
1568             s->current_picture_ptr = pic;
1569         } else {
1570             // input is not a shared pix -> reuse buffer for current_pix
1571             s->current_picture_ptr = s->reordered_input_picture[0];
1572             for (i = 0; i < 4; i++) {
1573                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1574             }
1575         }
1576         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1577         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1578                                        s->current_picture_ptr)) < 0)
1579             return ret;
1580
1581         s->picture_number = s->new_picture.f->display_picture_number;
1582     } else {
1583         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1584     }
1585     return 0;
1586 }
1587
1588 static void frame_end(MpegEncContext *s)
1589 {
1590     if (s->unrestricted_mv &&
1591         s->current_picture.reference &&
1592         !s->intra_only) {
1593         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1594         int hshift = desc->log2_chroma_w;
1595         int vshift = desc->log2_chroma_h;
1596         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1597                                 s->current_picture.f->linesize[0],
1598                                 s->h_edge_pos, s->v_edge_pos,
1599                                 EDGE_WIDTH, EDGE_WIDTH,
1600                                 EDGE_TOP | EDGE_BOTTOM);
1601         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1602                                 s->current_picture.f->linesize[1],
1603                                 s->h_edge_pos >> hshift,
1604                                 s->v_edge_pos >> vshift,
1605                                 EDGE_WIDTH >> hshift,
1606                                 EDGE_WIDTH >> vshift,
1607                                 EDGE_TOP | EDGE_BOTTOM);
1608         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1609                                 s->current_picture.f->linesize[2],
1610                                 s->h_edge_pos >> hshift,
1611                                 s->v_edge_pos >> vshift,
1612                                 EDGE_WIDTH >> hshift,
1613                                 EDGE_WIDTH >> vshift,
1614                                 EDGE_TOP | EDGE_BOTTOM);
1615     }
1616
1617     emms_c();
1618
1619     s->last_pict_type                 = s->pict_type;
1620     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1621     if (s->pict_type!= AV_PICTURE_TYPE_B)
1622         s->last_non_b_pict_type = s->pict_type;
1623
1624 #if FF_API_CODED_FRAME
1625 FF_DISABLE_DEPRECATION_WARNINGS
1626     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1627 FF_ENABLE_DEPRECATION_WARNINGS
1628 #endif
1629 }
1630
1631 static void update_noise_reduction(MpegEncContext *s)
1632 {
1633     int intra, i;
1634
1635     for (intra = 0; intra < 2; intra++) {
1636         if (s->dct_count[intra] > (1 << 16)) {
1637             for (i = 0; i < 64; i++) {
1638                 s->dct_error_sum[intra][i] >>= 1;
1639             }
1640             s->dct_count[intra] >>= 1;
1641         }
1642
1643         for (i = 0; i < 64; i++) {
1644             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1645                                        s->dct_count[intra] +
1646                                        s->dct_error_sum[intra][i] / 2) /
1647                                       (s->dct_error_sum[intra][i] + 1);
1648         }
1649     }
1650 }
1651
1652 static int frame_start(MpegEncContext *s)
1653 {
1654     int ret;
1655
1656     /* mark & release old frames */
1657     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1658         s->last_picture_ptr != s->next_picture_ptr &&
1659         s->last_picture_ptr->f->buf[0]) {
1660         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1661     }
1662
1663     s->current_picture_ptr->f->pict_type = s->pict_type;
1664     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1665
1666     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1667     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1668                                    s->current_picture_ptr)) < 0)
1669         return ret;
1670
1671     if (s->pict_type != AV_PICTURE_TYPE_B) {
1672         s->last_picture_ptr = s->next_picture_ptr;
1673         if (!s->droppable)
1674             s->next_picture_ptr = s->current_picture_ptr;
1675     }
1676
1677     if (s->last_picture_ptr) {
1678         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1679         if (s->last_picture_ptr->f->buf[0] &&
1680             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1681                                        s->last_picture_ptr)) < 0)
1682             return ret;
1683     }
1684     if (s->next_picture_ptr) {
1685         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1686         if (s->next_picture_ptr->f->buf[0] &&
1687             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1688                                        s->next_picture_ptr)) < 0)
1689             return ret;
1690     }
1691
1692     if (s->picture_structure!= PICT_FRAME) {
1693         int i;
1694         for (i = 0; i < 4; i++) {
1695             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1696                 s->current_picture.f->data[i] +=
1697                     s->current_picture.f->linesize[i];
1698             }
1699             s->current_picture.f->linesize[i] *= 2;
1700             s->last_picture.f->linesize[i]    *= 2;
1701             s->next_picture.f->linesize[i]    *= 2;
1702         }
1703     }
1704
1705     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1706         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1707         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1708     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1709         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1710         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1711     } else {
1712         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1713         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1714     }
1715
1716     if (s->dct_error_sum) {
1717         av_assert2(s->avctx->noise_reduction && s->encoding);
1718         update_noise_reduction(s);
1719     }
1720
1721     return 0;
1722 }
1723
1724 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1725                           const AVFrame *pic_arg, int *got_packet)
1726 {
1727     MpegEncContext *s = avctx->priv_data;
1728     int i, stuffing_count, ret;
1729     int context_count = s->slice_context_count;
1730
1731     s->picture_in_gop_number++;
1732
1733     if (load_input_picture(s, pic_arg) < 0)
1734         return -1;
1735
1736     if (select_input_picture(s) < 0) {
1737         return -1;
1738     }
1739
1740     /* output? */
1741     if (s->new_picture.f->data[0]) {
1742         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1743         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1744                                               :
1745                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1746         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1747             return ret;
1748         if (s->mb_info) {
1749             s->mb_info_ptr = av_packet_new_side_data(pkt,
1750                                  AV_PKT_DATA_H263_MB_INFO,
1751                                  s->mb_width*s->mb_height*12);
1752             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1753         }
1754
1755         for (i = 0; i < context_count; i++) {
1756             int start_y = s->thread_context[i]->start_mb_y;
1757             int   end_y = s->thread_context[i]->  end_mb_y;
1758             int h       = s->mb_height;
1759             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1760             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1761
1762             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1763         }
1764
1765         s->pict_type = s->new_picture.f->pict_type;
1766         //emms_c();
1767         ret = frame_start(s);
1768         if (ret < 0)
1769             return ret;
1770 vbv_retry:
1771         ret = encode_picture(s, s->picture_number);
1772         if (growing_buffer) {
1773             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1774             pkt->data = s->pb.buf;
1775             pkt->size = avctx->internal->byte_buffer_size;
1776         }
1777         if (ret < 0)
1778             return -1;
1779
1780         avctx->header_bits = s->header_bits;
1781         avctx->mv_bits     = s->mv_bits;
1782         avctx->misc_bits   = s->misc_bits;
1783         avctx->i_tex_bits  = s->i_tex_bits;
1784         avctx->p_tex_bits  = s->p_tex_bits;
1785         avctx->i_count     = s->i_count;
1786         // FIXME f/b_count in avctx
1787         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1788         avctx->skip_count  = s->skip_count;
1789
1790         frame_end(s);
1791
1792         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality, NULL, 0, s->pict_type);
1793
1794         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1795             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1796
1797         if (avctx->rc_buffer_size) {
1798             RateControlContext *rcc = &s->rc_context;
1799             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1800
1801             if (put_bits_count(&s->pb) > max_size &&
1802                 s->lambda < s->lmax) {
1803                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1804                                        (s->qscale + 1) / s->qscale);
1805                 if (s->adaptive_quant) {
1806                     int i;
1807                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1808                         s->lambda_table[i] =
1809                             FFMAX(s->lambda_table[i] + 1,
1810                                   s->lambda_table[i] * (s->qscale + 1) /
1811                                   s->qscale);
1812                 }
1813                 s->mb_skipped = 0;        // done in frame_start()
1814                 // done in encode_picture() so we must undo it
1815                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1816                     if (s->flipflop_rounding          ||
1817                         s->codec_id == AV_CODEC_ID_H263P ||
1818                         s->codec_id == AV_CODEC_ID_MPEG4)
1819                         s->no_rounding ^= 1;
1820                 }
1821                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1822                     s->time_base       = s->last_time_base;
1823                     s->last_non_b_time = s->time - s->pp_time;
1824                 }
1825                 for (i = 0; i < context_count; i++) {
1826                     PutBitContext *pb = &s->thread_context[i]->pb;
1827                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1828                 }
1829                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1830                 goto vbv_retry;
1831             }
1832
1833             av_assert0(s->avctx->rc_max_rate);
1834         }
1835
1836         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1837             ff_write_pass1_stats(s);
1838
1839         for (i = 0; i < 4; i++) {
1840             s->current_picture_ptr->f->error[i] =
1841             s->current_picture.f->error[i] =
1842                 s->current_picture.error[i];
1843             avctx->error[i] += s->current_picture_ptr->f->error[i];
1844         }
1845
1846         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1847             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1848                    avctx->i_tex_bits + avctx->p_tex_bits ==
1849                        put_bits_count(&s->pb));
1850         flush_put_bits(&s->pb);
1851         s->frame_bits  = put_bits_count(&s->pb);
1852
1853         stuffing_count = ff_vbv_update(s, s->frame_bits);
1854         s->stuffing_bits = 8*stuffing_count;
1855         if (stuffing_count) {
1856             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1857                     stuffing_count + 50) {
1858                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1859                 return -1;
1860             }
1861
1862             switch (s->codec_id) {
1863             case AV_CODEC_ID_MPEG1VIDEO:
1864             case AV_CODEC_ID_MPEG2VIDEO:
1865                 while (stuffing_count--) {
1866                     put_bits(&s->pb, 8, 0);
1867                 }
1868             break;
1869             case AV_CODEC_ID_MPEG4:
1870                 put_bits(&s->pb, 16, 0);
1871                 put_bits(&s->pb, 16, 0x1C3);
1872                 stuffing_count -= 4;
1873                 while (stuffing_count--) {
1874                     put_bits(&s->pb, 8, 0xFF);
1875                 }
1876             break;
1877             default:
1878                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1879             }
1880             flush_put_bits(&s->pb);
1881             s->frame_bits  = put_bits_count(&s->pb);
1882         }
1883
1884         /* update mpeg1/2 vbv_delay for CBR */
1885         if (s->avctx->rc_max_rate                          &&
1886             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1887             s->out_format == FMT_MPEG1                     &&
1888             90000LL * (avctx->rc_buffer_size - 1) <=
1889                 s->avctx->rc_max_rate * 0xFFFFLL) {
1890             int vbv_delay, min_delay;
1891             double inbits  = s->avctx->rc_max_rate *
1892                              av_q2d(s->avctx->time_base);
1893             int    minbits = s->frame_bits - 8 *
1894                              (s->vbv_delay_ptr - s->pb.buf - 1);
1895             double bits    = s->rc_context.buffer_index + minbits - inbits;
1896
1897             if (bits < 0)
1898                 av_log(s->avctx, AV_LOG_ERROR,
1899                        "Internal error, negative bits\n");
1900
1901             assert(s->repeat_first_field == 0);
1902
1903             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1904             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1905                         s->avctx->rc_max_rate;
1906
1907             vbv_delay = FFMAX(vbv_delay, min_delay);
1908
1909             av_assert0(vbv_delay < 0xFFFF);
1910
1911             s->vbv_delay_ptr[0] &= 0xF8;
1912             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1913             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1914             s->vbv_delay_ptr[2] &= 0x07;
1915             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1916             avctx->vbv_delay     = vbv_delay * 300;
1917         }
1918         s->total_bits     += s->frame_bits;
1919         avctx->frame_bits  = s->frame_bits;
1920
1921         pkt->pts = s->current_picture.f->pts;
1922         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1923             if (!s->current_picture.f->coded_picture_number)
1924                 pkt->dts = pkt->pts - s->dts_delta;
1925             else
1926                 pkt->dts = s->reordered_pts;
1927             s->reordered_pts = pkt->pts;
1928         } else
1929             pkt->dts = pkt->pts;
1930         if (s->current_picture.f->key_frame)
1931             pkt->flags |= AV_PKT_FLAG_KEY;
1932         if (s->mb_info)
1933             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1934     } else {
1935         s->frame_bits = 0;
1936     }
1937
1938     /* release non-reference frames */
1939     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1940         if (!s->picture[i].reference)
1941             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1942     }
1943
1944     av_assert1((s->frame_bits & 7) == 0);
1945
1946     pkt->size = s->frame_bits / 8;
1947     *got_packet = !!pkt->size;
1948     return 0;
1949 }
1950
1951 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1952                                                 int n, int threshold)
1953 {
1954     static const char tab[64] = {
1955         3, 2, 2, 1, 1, 1, 1, 1,
1956         1, 1, 1, 1, 1, 1, 1, 1,
1957         1, 1, 1, 1, 1, 1, 1, 1,
1958         0, 0, 0, 0, 0, 0, 0, 0,
1959         0, 0, 0, 0, 0, 0, 0, 0,
1960         0, 0, 0, 0, 0, 0, 0, 0,
1961         0, 0, 0, 0, 0, 0, 0, 0,
1962         0, 0, 0, 0, 0, 0, 0, 0
1963     };
1964     int score = 0;
1965     int run = 0;
1966     int i;
1967     int16_t *block = s->block[n];
1968     const int last_index = s->block_last_index[n];
1969     int skip_dc;
1970
1971     if (threshold < 0) {
1972         skip_dc = 0;
1973         threshold = -threshold;
1974     } else
1975         skip_dc = 1;
1976
1977     /* Are all we could set to zero already zero? */
1978     if (last_index <= skip_dc - 1)
1979         return;
1980
1981     for (i = 0; i <= last_index; i++) {
1982         const int j = s->intra_scantable.permutated[i];
1983         const int level = FFABS(block[j]);
1984         if (level == 1) {
1985             if (skip_dc && i == 0)
1986                 continue;
1987             score += tab[run];
1988             run = 0;
1989         } else if (level > 1) {
1990             return;
1991         } else {
1992             run++;
1993         }
1994     }
1995     if (score >= threshold)
1996         return;
1997     for (i = skip_dc; i <= last_index; i++) {
1998         const int j = s->intra_scantable.permutated[i];
1999         block[j] = 0;
2000     }
2001     if (block[0])
2002         s->block_last_index[n] = 0;
2003     else
2004         s->block_last_index[n] = -1;
2005 }
2006
2007 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2008                                int last_index)
2009 {
2010     int i;
2011     const int maxlevel = s->max_qcoeff;
2012     const int minlevel = s->min_qcoeff;
2013     int overflow = 0;
2014
2015     if (s->mb_intra) {
2016         i = 1; // skip clipping of intra dc
2017     } else
2018         i = 0;
2019
2020     for (; i <= last_index; i++) {
2021         const int j = s->intra_scantable.permutated[i];
2022         int level = block[j];
2023
2024         if (level > maxlevel) {
2025             level = maxlevel;
2026             overflow++;
2027         } else if (level < minlevel) {
2028             level = minlevel;
2029             overflow++;
2030         }
2031
2032         block[j] = level;
2033     }
2034
2035     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2036         av_log(s->avctx, AV_LOG_INFO,
2037                "warning, clipping %d dct coefficients to %d..%d\n",
2038                overflow, minlevel, maxlevel);
2039 }
2040
2041 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2042 {
2043     int x, y;
2044     // FIXME optimize
2045     for (y = 0; y < 8; y++) {
2046         for (x = 0; x < 8; x++) {
2047             int x2, y2;
2048             int sum = 0;
2049             int sqr = 0;
2050             int count = 0;
2051
2052             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2053                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2054                     int v = ptr[x2 + y2 * stride];
2055                     sum += v;
2056                     sqr += v * v;
2057                     count++;
2058                 }
2059             }
2060             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2061         }
2062     }
2063 }
2064
2065 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2066                                                 int motion_x, int motion_y,
2067                                                 int mb_block_height,
2068                                                 int mb_block_width,
2069                                                 int mb_block_count)
2070 {
2071     int16_t weight[12][64];
2072     int16_t orig[12][64];
2073     const int mb_x = s->mb_x;
2074     const int mb_y = s->mb_y;
2075     int i;
2076     int skip_dct[12];
2077     int dct_offset = s->linesize * 8; // default for progressive frames
2078     int uv_dct_offset = s->uvlinesize * 8;
2079     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2080     ptrdiff_t wrap_y, wrap_c;
2081
2082     for (i = 0; i < mb_block_count; i++)
2083         skip_dct[i] = s->skipdct;
2084
2085     if (s->adaptive_quant) {
2086         const int last_qp = s->qscale;
2087         const int mb_xy = mb_x + mb_y * s->mb_stride;
2088
2089         s->lambda = s->lambda_table[mb_xy];
2090         update_qscale(s);
2091
2092         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2093             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2094             s->dquant = s->qscale - last_qp;
2095
2096             if (s->out_format == FMT_H263) {
2097                 s->dquant = av_clip(s->dquant, -2, 2);
2098
2099                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2100                     if (!s->mb_intra) {
2101                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2102                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2103                                 s->dquant = 0;
2104                         }
2105                         if (s->mv_type == MV_TYPE_8X8)
2106                             s->dquant = 0;
2107                     }
2108                 }
2109             }
2110         }
2111         ff_set_qscale(s, last_qp + s->dquant);
2112     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2113         ff_set_qscale(s, s->qscale + s->dquant);
2114
2115     wrap_y = s->linesize;
2116     wrap_c = s->uvlinesize;
2117     ptr_y  = s->new_picture.f->data[0] +
2118              (mb_y * 16 * wrap_y)              + mb_x * 16;
2119     ptr_cb = s->new_picture.f->data[1] +
2120              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2121     ptr_cr = s->new_picture.f->data[2] +
2122              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2123
2124     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2125         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2126         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2127         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2128         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2129                                  wrap_y, wrap_y,
2130                                  16, 16, mb_x * 16, mb_y * 16,
2131                                  s->width, s->height);
2132         ptr_y = ebuf;
2133         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2134                                  wrap_c, wrap_c,
2135                                  mb_block_width, mb_block_height,
2136                                  mb_x * mb_block_width, mb_y * mb_block_height,
2137                                  cw, ch);
2138         ptr_cb = ebuf + 16 * wrap_y;
2139         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2140                                  wrap_c, wrap_c,
2141                                  mb_block_width, mb_block_height,
2142                                  mb_x * mb_block_width, mb_y * mb_block_height,
2143                                  cw, ch);
2144         ptr_cr = ebuf + 16 * wrap_y + 16;
2145     }
2146
2147     if (s->mb_intra) {
2148         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2149             int progressive_score, interlaced_score;
2150
2151             s->interlaced_dct = 0;
2152             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2153                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2154                                                      NULL, wrap_y, 8) - 400;
2155
2156             if (progressive_score > 0) {
2157                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2158                                                         NULL, wrap_y * 2, 8) +
2159                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2160                                                         NULL, wrap_y * 2, 8);
2161                 if (progressive_score > interlaced_score) {
2162                     s->interlaced_dct = 1;
2163
2164                     dct_offset = wrap_y;
2165                     uv_dct_offset = wrap_c;
2166                     wrap_y <<= 1;
2167                     if (s->chroma_format == CHROMA_422 ||
2168                         s->chroma_format == CHROMA_444)
2169                         wrap_c <<= 1;
2170                 }
2171             }
2172         }
2173
2174         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2175         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2176         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2177         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2178
2179         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2180             skip_dct[4] = 1;
2181             skip_dct[5] = 1;
2182         } else {
2183             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2184             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2185             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2186                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2187                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2188             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2189                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2190                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2191                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2192                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2193                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2194                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2195             }
2196         }
2197     } else {
2198         op_pixels_func (*op_pix)[4];
2199         qpel_mc_func (*op_qpix)[16];
2200         uint8_t *dest_y, *dest_cb, *dest_cr;
2201
2202         dest_y  = s->dest[0];
2203         dest_cb = s->dest[1];
2204         dest_cr = s->dest[2];
2205
2206         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2207             op_pix  = s->hdsp.put_pixels_tab;
2208             op_qpix = s->qdsp.put_qpel_pixels_tab;
2209         } else {
2210             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2211             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2212         }
2213
2214         if (s->mv_dir & MV_DIR_FORWARD) {
2215             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2216                           s->last_picture.f->data,
2217                           op_pix, op_qpix);
2218             op_pix  = s->hdsp.avg_pixels_tab;
2219             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2220         }
2221         if (s->mv_dir & MV_DIR_BACKWARD) {
2222             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2223                           s->next_picture.f->data,
2224                           op_pix, op_qpix);
2225         }
2226
2227         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2228             int progressive_score, interlaced_score;
2229
2230             s->interlaced_dct = 0;
2231             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2232                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2233                                                      ptr_y + wrap_y * 8,
2234                                                      wrap_y, 8) - 400;
2235
2236             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2237                 progressive_score -= 400;
2238
2239             if (progressive_score > 0) {
2240                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2241                                                         wrap_y * 2, 8) +
2242                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2243                                                         ptr_y + wrap_y,
2244                                                         wrap_y * 2, 8);
2245
2246                 if (progressive_score > interlaced_score) {
2247                     s->interlaced_dct = 1;
2248
2249                     dct_offset = wrap_y;
2250                     uv_dct_offset = wrap_c;
2251                     wrap_y <<= 1;
2252                     if (s->chroma_format == CHROMA_422)
2253                         wrap_c <<= 1;
2254                 }
2255             }
2256         }
2257
2258         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2259         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2260         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2261                             dest_y + dct_offset, wrap_y);
2262         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2263                             dest_y + dct_offset + 8, wrap_y);
2264
2265         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2266             skip_dct[4] = 1;
2267             skip_dct[5] = 1;
2268         } else {
2269             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2270             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2271             if (!s->chroma_y_shift) { /* 422 */
2272                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2273                                     dest_cb + uv_dct_offset, wrap_c);
2274                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2275                                     dest_cr + uv_dct_offset, wrap_c);
2276             }
2277         }
2278         /* pre quantization */
2279         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2280                 2 * s->qscale * s->qscale) {
2281             // FIXME optimize
2282             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2283                 skip_dct[0] = 1;
2284             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2285                 skip_dct[1] = 1;
2286             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2287                                wrap_y, 8) < 20 * s->qscale)
2288                 skip_dct[2] = 1;
2289             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2290                                wrap_y, 8) < 20 * s->qscale)
2291                 skip_dct[3] = 1;
2292             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2293                 skip_dct[4] = 1;
2294             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2295                 skip_dct[5] = 1;
2296             if (!s->chroma_y_shift) { /* 422 */
2297                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2298                                    dest_cb + uv_dct_offset,
2299                                    wrap_c, 8) < 20 * s->qscale)
2300                     skip_dct[6] = 1;
2301                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2302                                    dest_cr + uv_dct_offset,
2303                                    wrap_c, 8) < 20 * s->qscale)
2304                     skip_dct[7] = 1;
2305             }
2306         }
2307     }
2308
2309     if (s->quantizer_noise_shaping) {
2310         if (!skip_dct[0])
2311             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2312         if (!skip_dct[1])
2313             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2314         if (!skip_dct[2])
2315             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2316         if (!skip_dct[3])
2317             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2318         if (!skip_dct[4])
2319             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2320         if (!skip_dct[5])
2321             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2322         if (!s->chroma_y_shift) { /* 422 */
2323             if (!skip_dct[6])
2324                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2325                                   wrap_c);
2326             if (!skip_dct[7])
2327                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2328                                   wrap_c);
2329         }
2330         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2331     }
2332
2333     /* DCT & quantize */
2334     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2335     {
2336         for (i = 0; i < mb_block_count; i++) {
2337             if (!skip_dct[i]) {
2338                 int overflow;
2339                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2340                 // FIXME we could decide to change to quantizer instead of
2341                 // clipping
2342                 // JS: I don't think that would be a good idea it could lower
2343                 //     quality instead of improve it. Just INTRADC clipping
2344                 //     deserves changes in quantizer
2345                 if (overflow)
2346                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2347             } else
2348                 s->block_last_index[i] = -1;
2349         }
2350         if (s->quantizer_noise_shaping) {
2351             for (i = 0; i < mb_block_count; i++) {
2352                 if (!skip_dct[i]) {
2353                     s->block_last_index[i] =
2354                         dct_quantize_refine(s, s->block[i], weight[i],
2355                                             orig[i], i, s->qscale);
2356                 }
2357             }
2358         }
2359
2360         if (s->luma_elim_threshold && !s->mb_intra)
2361             for (i = 0; i < 4; i++)
2362                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2363         if (s->chroma_elim_threshold && !s->mb_intra)
2364             for (i = 4; i < mb_block_count; i++)
2365                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2366
2367         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2368             for (i = 0; i < mb_block_count; i++) {
2369                 if (s->block_last_index[i] == -1)
2370                     s->coded_score[i] = INT_MAX / 256;
2371             }
2372         }
2373     }
2374
2375     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2376         s->block_last_index[4] =
2377         s->block_last_index[5] = 0;
2378         s->block[4][0] =
2379         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2380         if (!s->chroma_y_shift) { /* 422 / 444 */
2381             for (i=6; i<12; i++) {
2382                 s->block_last_index[i] = 0;
2383                 s->block[i][0] = s->block[4][0];
2384             }
2385         }
2386     }
2387
2388     // non c quantize code returns incorrect block_last_index FIXME
2389     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2390         for (i = 0; i < mb_block_count; i++) {
2391             int j;
2392             if (s->block_last_index[i] > 0) {
2393                 for (j = 63; j > 0; j--) {
2394                     if (s->block[i][s->intra_scantable.permutated[j]])
2395                         break;
2396                 }
2397                 s->block_last_index[i] = j;
2398             }
2399         }
2400     }
2401
2402     /* huffman encode */
2403     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2404     case AV_CODEC_ID_MPEG1VIDEO:
2405     case AV_CODEC_ID_MPEG2VIDEO:
2406         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2407             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2408         break;
2409     case AV_CODEC_ID_MPEG4:
2410         if (CONFIG_MPEG4_ENCODER)
2411             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2412         break;
2413     case AV_CODEC_ID_MSMPEG4V2:
2414     case AV_CODEC_ID_MSMPEG4V3:
2415     case AV_CODEC_ID_WMV1:
2416         if (CONFIG_MSMPEG4_ENCODER)
2417             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2418         break;
2419     case AV_CODEC_ID_WMV2:
2420         if (CONFIG_WMV2_ENCODER)
2421             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2422         break;
2423     case AV_CODEC_ID_H261:
2424         if (CONFIG_H261_ENCODER)
2425             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2426         break;
2427     case AV_CODEC_ID_H263:
2428     case AV_CODEC_ID_H263P:
2429     case AV_CODEC_ID_FLV1:
2430     case AV_CODEC_ID_RV10:
2431     case AV_CODEC_ID_RV20:
2432         if (CONFIG_H263_ENCODER)
2433             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2434         break;
2435     case AV_CODEC_ID_MJPEG:
2436     case AV_CODEC_ID_AMV:
2437         if (CONFIG_MJPEG_ENCODER)
2438             ff_mjpeg_encode_mb(s, s->block);
2439         break;
2440     default:
2441         av_assert1(0);
2442     }
2443 }
2444
2445 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2446 {
2447     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2448     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2449     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2450 }
2451
2452 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2453     int i;
2454
2455     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2456
2457     /* mpeg1 */
2458     d->mb_skip_run= s->mb_skip_run;
2459     for(i=0; i<3; i++)
2460         d->last_dc[i] = s->last_dc[i];
2461
2462     /* statistics */
2463     d->mv_bits= s->mv_bits;
2464     d->i_tex_bits= s->i_tex_bits;
2465     d->p_tex_bits= s->p_tex_bits;
2466     d->i_count= s->i_count;
2467     d->f_count= s->f_count;
2468     d->b_count= s->b_count;
2469     d->skip_count= s->skip_count;
2470     d->misc_bits= s->misc_bits;
2471     d->last_bits= 0;
2472
2473     d->mb_skipped= 0;
2474     d->qscale= s->qscale;
2475     d->dquant= s->dquant;
2476
2477     d->esc3_level_length= s->esc3_level_length;
2478 }
2479
2480 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2481     int i;
2482
2483     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2484     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2485
2486     /* mpeg1 */
2487     d->mb_skip_run= s->mb_skip_run;
2488     for(i=0; i<3; i++)
2489         d->last_dc[i] = s->last_dc[i];
2490
2491     /* statistics */
2492     d->mv_bits= s->mv_bits;
2493     d->i_tex_bits= s->i_tex_bits;
2494     d->p_tex_bits= s->p_tex_bits;
2495     d->i_count= s->i_count;
2496     d->f_count= s->f_count;
2497     d->b_count= s->b_count;
2498     d->skip_count= s->skip_count;
2499     d->misc_bits= s->misc_bits;
2500
2501     d->mb_intra= s->mb_intra;
2502     d->mb_skipped= s->mb_skipped;
2503     d->mv_type= s->mv_type;
2504     d->mv_dir= s->mv_dir;
2505     d->pb= s->pb;
2506     if(s->data_partitioning){
2507         d->pb2= s->pb2;
2508         d->tex_pb= s->tex_pb;
2509     }
2510     d->block= s->block;
2511     for(i=0; i<8; i++)
2512         d->block_last_index[i]= s->block_last_index[i];
2513     d->interlaced_dct= s->interlaced_dct;
2514     d->qscale= s->qscale;
2515
2516     d->esc3_level_length= s->esc3_level_length;
2517 }
2518
2519 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2520                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2521                            int *dmin, int *next_block, int motion_x, int motion_y)
2522 {
2523     int score;
2524     uint8_t *dest_backup[3];
2525
2526     copy_context_before_encode(s, backup, type);
2527
2528     s->block= s->blocks[*next_block];
2529     s->pb= pb[*next_block];
2530     if(s->data_partitioning){
2531         s->pb2   = pb2   [*next_block];
2532         s->tex_pb= tex_pb[*next_block];
2533     }
2534
2535     if(*next_block){
2536         memcpy(dest_backup, s->dest, sizeof(s->dest));
2537         s->dest[0] = s->sc.rd_scratchpad;
2538         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2539         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2540         av_assert0(s->linesize >= 32); //FIXME
2541     }
2542
2543     encode_mb(s, motion_x, motion_y);
2544
2545     score= put_bits_count(&s->pb);
2546     if(s->data_partitioning){
2547         score+= put_bits_count(&s->pb2);
2548         score+= put_bits_count(&s->tex_pb);
2549     }
2550
2551     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2552         ff_mpv_decode_mb(s, s->block);
2553
2554         score *= s->lambda2;
2555         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2556     }
2557
2558     if(*next_block){
2559         memcpy(s->dest, dest_backup, sizeof(s->dest));
2560     }
2561
2562     if(score<*dmin){
2563         *dmin= score;
2564         *next_block^=1;
2565
2566         copy_context_after_encode(best, s, type);
2567     }
2568 }
2569
2570 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2571     uint32_t *sq = ff_square_tab + 256;
2572     int acc=0;
2573     int x,y;
2574
2575     if(w==16 && h==16)
2576         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2577     else if(w==8 && h==8)
2578         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2579
2580     for(y=0; y<h; y++){
2581         for(x=0; x<w; x++){
2582             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2583         }
2584     }
2585
2586     av_assert2(acc>=0);
2587
2588     return acc;
2589 }
2590
2591 static int sse_mb(MpegEncContext *s){
2592     int w= 16;
2593     int h= 16;
2594
2595     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2596     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2597
2598     if(w==16 && h==16)
2599       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2600         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2601                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2602                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2603       }else{
2604         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2605                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2606                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2607       }
2608     else
2609         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2610                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2611                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2612 }
2613
2614 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2615     MpegEncContext *s= *(void**)arg;
2616
2617
2618     s->me.pre_pass=1;
2619     s->me.dia_size= s->avctx->pre_dia_size;
2620     s->first_slice_line=1;
2621     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2622         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2623             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2624         }
2625         s->first_slice_line=0;
2626     }
2627
2628     s->me.pre_pass=0;
2629
2630     return 0;
2631 }
2632
2633 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2634     MpegEncContext *s= *(void**)arg;
2635
2636     ff_check_alignment();
2637
2638     s->me.dia_size= s->avctx->dia_size;
2639     s->first_slice_line=1;
2640     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2641         s->mb_x=0; //for block init below
2642         ff_init_block_index(s);
2643         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2644             s->block_index[0]+=2;
2645             s->block_index[1]+=2;
2646             s->block_index[2]+=2;
2647             s->block_index[3]+=2;
2648
2649             /* compute motion vector & mb_type and store in context */
2650             if(s->pict_type==AV_PICTURE_TYPE_B)
2651                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2652             else
2653                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2654         }
2655         s->first_slice_line=0;
2656     }
2657     return 0;
2658 }
2659
2660 static int mb_var_thread(AVCodecContext *c, void *arg){
2661     MpegEncContext *s= *(void**)arg;
2662     int mb_x, mb_y;
2663
2664     ff_check_alignment();
2665
2666     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2667         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2668             int xx = mb_x * 16;
2669             int yy = mb_y * 16;
2670             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2671             int varc;
2672             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2673
2674             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2675                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2676
2677             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2678             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2679             s->me.mb_var_sum_temp    += varc;
2680         }
2681     }
2682     return 0;
2683 }
2684
2685 static void write_slice_end(MpegEncContext *s){
2686     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2687         if(s->partitioned_frame){
2688             ff_mpeg4_merge_partitions(s);
2689         }
2690
2691         ff_mpeg4_stuffing(&s->pb);
2692     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2693         ff_mjpeg_encode_stuffing(s);
2694     }
2695
2696     avpriv_align_put_bits(&s->pb);
2697     flush_put_bits(&s->pb);
2698
2699     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2700         s->misc_bits+= get_bits_diff(s);
2701 }
2702
2703 static void write_mb_info(MpegEncContext *s)
2704 {
2705     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2706     int offset = put_bits_count(&s->pb);
2707     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2708     int gobn = s->mb_y / s->gob_index;
2709     int pred_x, pred_y;
2710     if (CONFIG_H263_ENCODER)
2711         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2712     bytestream_put_le32(&ptr, offset);
2713     bytestream_put_byte(&ptr, s->qscale);
2714     bytestream_put_byte(&ptr, gobn);
2715     bytestream_put_le16(&ptr, mba);
2716     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2717     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2718     /* 4MV not implemented */
2719     bytestream_put_byte(&ptr, 0); /* hmv2 */
2720     bytestream_put_byte(&ptr, 0); /* vmv2 */
2721 }
2722
2723 static void update_mb_info(MpegEncContext *s, int startcode)
2724 {
2725     if (!s->mb_info)
2726         return;
2727     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2728         s->mb_info_size += 12;
2729         s->prev_mb_info = s->last_mb_info;
2730     }
2731     if (startcode) {
2732         s->prev_mb_info = put_bits_count(&s->pb)/8;
2733         /* This might have incremented mb_info_size above, and we return without
2734          * actually writing any info into that slot yet. But in that case,
2735          * this will be called again at the start of the after writing the
2736          * start code, actually writing the mb info. */
2737         return;
2738     }
2739
2740     s->last_mb_info = put_bits_count(&s->pb)/8;
2741     if (!s->mb_info_size)
2742         s->mb_info_size += 12;
2743     write_mb_info(s);
2744 }
2745
2746 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2747 {
2748     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2749         && s->slice_context_count == 1
2750         && s->pb.buf == s->avctx->internal->byte_buffer) {
2751         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2752         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2753
2754         uint8_t *new_buffer = NULL;
2755         int new_buffer_size = 0;
2756
2757         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2758                               s->avctx->internal->byte_buffer_size + size_increase);
2759         if (!new_buffer)
2760             return AVERROR(ENOMEM);
2761
2762         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2763         av_free(s->avctx->internal->byte_buffer);
2764         s->avctx->internal->byte_buffer      = new_buffer;
2765         s->avctx->internal->byte_buffer_size = new_buffer_size;
2766         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2767         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2768         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2769     }
2770     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2771         return AVERROR(EINVAL);
2772     return 0;
2773 }
2774
2775 static int encode_thread(AVCodecContext *c, void *arg){
2776     MpegEncContext *s= *(void**)arg;
2777     int mb_x, mb_y, pdif = 0;
2778     int chr_h= 16>>s->chroma_y_shift;
2779     int i, j;
2780     MpegEncContext best_s = { 0 }, backup_s;
2781     uint8_t bit_buf[2][MAX_MB_BYTES];
2782     uint8_t bit_buf2[2][MAX_MB_BYTES];
2783     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2784     PutBitContext pb[2], pb2[2], tex_pb[2];
2785
2786     ff_check_alignment();
2787
2788     for(i=0; i<2; i++){
2789         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2790         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2791         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2792     }
2793
2794     s->last_bits= put_bits_count(&s->pb);
2795     s->mv_bits=0;
2796     s->misc_bits=0;
2797     s->i_tex_bits=0;
2798     s->p_tex_bits=0;
2799     s->i_count=0;
2800     s->f_count=0;
2801     s->b_count=0;
2802     s->skip_count=0;
2803
2804     for(i=0; i<3; i++){
2805         /* init last dc values */
2806         /* note: quant matrix value (8) is implied here */
2807         s->last_dc[i] = 128 << s->intra_dc_precision;
2808
2809         s->current_picture.error[i] = 0;
2810     }
2811     if(s->codec_id==AV_CODEC_ID_AMV){
2812         s->last_dc[0] = 128*8/13;
2813         s->last_dc[1] = 128*8/14;
2814         s->last_dc[2] = 128*8/14;
2815     }
2816     s->mb_skip_run = 0;
2817     memset(s->last_mv, 0, sizeof(s->last_mv));
2818
2819     s->last_mv_dir = 0;
2820
2821     switch(s->codec_id){
2822     case AV_CODEC_ID_H263:
2823     case AV_CODEC_ID_H263P:
2824     case AV_CODEC_ID_FLV1:
2825         if (CONFIG_H263_ENCODER)
2826             s->gob_index = H263_GOB_HEIGHT(s->height);
2827         break;
2828     case AV_CODEC_ID_MPEG4:
2829         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2830             ff_mpeg4_init_partitions(s);
2831         break;
2832     }
2833
2834     s->resync_mb_x=0;
2835     s->resync_mb_y=0;
2836     s->first_slice_line = 1;
2837     s->ptr_lastgob = s->pb.buf;
2838     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2839         s->mb_x=0;
2840         s->mb_y= mb_y;
2841
2842         ff_set_qscale(s, s->qscale);
2843         ff_init_block_index(s);
2844
2845         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2846             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2847             int mb_type= s->mb_type[xy];
2848 //            int d;
2849             int dmin= INT_MAX;
2850             int dir;
2851             int size_increase =  s->avctx->internal->byte_buffer_size/4
2852                                + s->mb_width*MAX_MB_BYTES;
2853
2854             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2855             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2856                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2857                 return -1;
2858             }
2859             if(s->data_partitioning){
2860                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2861                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2862                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2863                     return -1;
2864                 }
2865             }
2866
2867             s->mb_x = mb_x;
2868             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2869             ff_update_block_index(s);
2870
2871             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2872                 ff_h261_reorder_mb_index(s);
2873                 xy= s->mb_y*s->mb_stride + s->mb_x;
2874                 mb_type= s->mb_type[xy];
2875             }
2876
2877             /* write gob / video packet header  */
2878             if(s->rtp_mode){
2879                 int current_packet_size, is_gob_start;
2880
2881                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2882
2883                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2884
2885                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2886
2887                 switch(s->codec_id){
2888                 case AV_CODEC_ID_H263:
2889                 case AV_CODEC_ID_H263P:
2890                     if(!s->h263_slice_structured)
2891                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2892                     break;
2893                 case AV_CODEC_ID_MPEG2VIDEO:
2894                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2895                 case AV_CODEC_ID_MPEG1VIDEO:
2896                     if(s->mb_skip_run) is_gob_start=0;
2897                     break;
2898                 case AV_CODEC_ID_MJPEG:
2899                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2900                     break;
2901                 }
2902
2903                 if(is_gob_start){
2904                     if(s->start_mb_y != mb_y || mb_x!=0){
2905                         write_slice_end(s);
2906
2907                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2908                             ff_mpeg4_init_partitions(s);
2909                         }
2910                     }
2911
2912                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2913                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2914
2915                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2916                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2917                         int d = 100 / s->error_rate;
2918                         if(r % d == 0){
2919                             current_packet_size=0;
2920                             s->pb.buf_ptr= s->ptr_lastgob;
2921                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2922                         }
2923                     }
2924
2925                     if (s->avctx->rtp_callback){
2926                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2927                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2928                     }
2929                     update_mb_info(s, 1);
2930
2931                     switch(s->codec_id){
2932                     case AV_CODEC_ID_MPEG4:
2933                         if (CONFIG_MPEG4_ENCODER) {
2934                             ff_mpeg4_encode_video_packet_header(s);
2935                             ff_mpeg4_clean_buffers(s);
2936                         }
2937                     break;
2938                     case AV_CODEC_ID_MPEG1VIDEO:
2939                     case AV_CODEC_ID_MPEG2VIDEO:
2940                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2941                             ff_mpeg1_encode_slice_header(s);
2942                             ff_mpeg1_clean_buffers(s);
2943                         }
2944                     break;
2945                     case AV_CODEC_ID_H263:
2946                     case AV_CODEC_ID_H263P:
2947                         if (CONFIG_H263_ENCODER)
2948                             ff_h263_encode_gob_header(s, mb_y);
2949                     break;
2950                     }
2951
2952                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2953                         int bits= put_bits_count(&s->pb);
2954                         s->misc_bits+= bits - s->last_bits;
2955                         s->last_bits= bits;
2956                     }
2957
2958                     s->ptr_lastgob += current_packet_size;
2959                     s->first_slice_line=1;
2960                     s->resync_mb_x=mb_x;
2961                     s->resync_mb_y=mb_y;
2962                 }
2963             }
2964
2965             if(  (s->resync_mb_x   == s->mb_x)
2966                && s->resync_mb_y+1 == s->mb_y){
2967                 s->first_slice_line=0;
2968             }
2969
2970             s->mb_skipped=0;
2971             s->dquant=0; //only for QP_RD
2972
2973             update_mb_info(s, 0);
2974
2975             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2976                 int next_block=0;
2977                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2978
2979                 copy_context_before_encode(&backup_s, s, -1);
2980                 backup_s.pb= s->pb;
2981                 best_s.data_partitioning= s->data_partitioning;
2982                 best_s.partitioned_frame= s->partitioned_frame;
2983                 if(s->data_partitioning){
2984                     backup_s.pb2= s->pb2;
2985                     backup_s.tex_pb= s->tex_pb;
2986                 }
2987
2988                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2989                     s->mv_dir = MV_DIR_FORWARD;
2990                     s->mv_type = MV_TYPE_16X16;
2991                     s->mb_intra= 0;
2992                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2993                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2994                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2995                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2996                 }
2997                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2998                     s->mv_dir = MV_DIR_FORWARD;
2999                     s->mv_type = MV_TYPE_FIELD;
3000                     s->mb_intra= 0;
3001                     for(i=0; i<2; i++){
3002                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3003                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3004                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3005                     }
3006                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3007                                  &dmin, &next_block, 0, 0);
3008                 }
3009                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3010                     s->mv_dir = MV_DIR_FORWARD;
3011                     s->mv_type = MV_TYPE_16X16;
3012                     s->mb_intra= 0;
3013                     s->mv[0][0][0] = 0;
3014                     s->mv[0][0][1] = 0;
3015                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3016                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3017                 }
3018                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3019                     s->mv_dir = MV_DIR_FORWARD;
3020                     s->mv_type = MV_TYPE_8X8;
3021                     s->mb_intra= 0;
3022                     for(i=0; i<4; i++){
3023                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3024                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3025                     }
3026                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3027                                  &dmin, &next_block, 0, 0);
3028                 }
3029                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3030                     s->mv_dir = MV_DIR_FORWARD;
3031                     s->mv_type = MV_TYPE_16X16;
3032                     s->mb_intra= 0;
3033                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3034                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3035                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3036                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3037                 }
3038                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3039                     s->mv_dir = MV_DIR_BACKWARD;
3040                     s->mv_type = MV_TYPE_16X16;
3041                     s->mb_intra= 0;
3042                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3043                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3044                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3045                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3046                 }
3047                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3048                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3049                     s->mv_type = MV_TYPE_16X16;
3050                     s->mb_intra= 0;
3051                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3052                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3053                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3054                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3055                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3056                                  &dmin, &next_block, 0, 0);
3057                 }
3058                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3059                     s->mv_dir = MV_DIR_FORWARD;
3060                     s->mv_type = MV_TYPE_FIELD;
3061                     s->mb_intra= 0;
3062                     for(i=0; i<2; i++){
3063                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3064                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3065                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3066                     }
3067                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3068                                  &dmin, &next_block, 0, 0);
3069                 }
3070                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3071                     s->mv_dir = MV_DIR_BACKWARD;
3072                     s->mv_type = MV_TYPE_FIELD;
3073                     s->mb_intra= 0;
3074                     for(i=0; i<2; i++){
3075                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3076                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3077                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3078                     }
3079                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3080                                  &dmin, &next_block, 0, 0);
3081                 }
3082                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3083                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3084                     s->mv_type = MV_TYPE_FIELD;
3085                     s->mb_intra= 0;
3086                     for(dir=0; dir<2; dir++){
3087                         for(i=0; i<2; i++){
3088                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3089                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3090                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3091                         }
3092                     }
3093                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3094                                  &dmin, &next_block, 0, 0);
3095                 }
3096                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3097                     s->mv_dir = 0;
3098                     s->mv_type = MV_TYPE_16X16;
3099                     s->mb_intra= 1;
3100                     s->mv[0][0][0] = 0;
3101                     s->mv[0][0][1] = 0;
3102                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3103                                  &dmin, &next_block, 0, 0);
3104                     if(s->h263_pred || s->h263_aic){
3105                         if(best_s.mb_intra)
3106                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3107                         else
3108                             ff_clean_intra_table_entries(s); //old mode?
3109                     }
3110                 }
3111
3112                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3113                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3114                         const int last_qp= backup_s.qscale;
3115                         int qpi, qp, dc[6];
3116                         int16_t ac[6][16];
3117                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3118                         static const int dquant_tab[4]={-1,1,-2,2};
3119                         int storecoefs = s->mb_intra && s->dc_val[0];
3120
3121                         av_assert2(backup_s.dquant == 0);
3122
3123                         //FIXME intra
3124                         s->mv_dir= best_s.mv_dir;
3125                         s->mv_type = MV_TYPE_16X16;
3126                         s->mb_intra= best_s.mb_intra;
3127                         s->mv[0][0][0] = best_s.mv[0][0][0];
3128                         s->mv[0][0][1] = best_s.mv[0][0][1];
3129                         s->mv[1][0][0] = best_s.mv[1][0][0];
3130                         s->mv[1][0][1] = best_s.mv[1][0][1];
3131
3132                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3133                         for(; qpi<4; qpi++){
3134                             int dquant= dquant_tab[qpi];
3135                             qp= last_qp + dquant;
3136                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3137                                 continue;
3138                             backup_s.dquant= dquant;
3139                             if(storecoefs){
3140                                 for(i=0; i<6; i++){
3141                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3142                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3143                                 }
3144                             }
3145
3146                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3147                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3148                             if(best_s.qscale != qp){
3149                                 if(storecoefs){
3150                                     for(i=0; i<6; i++){
3151                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3152                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3153                                     }
3154                                 }
3155                             }
3156                         }
3157                     }
3158                 }
3159                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3160                     int mx= s->b_direct_mv_table[xy][0];
3161                     int my= s->b_direct_mv_table[xy][1];
3162
3163                     backup_s.dquant = 0;
3164                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3165                     s->mb_intra= 0;
3166                     ff_mpeg4_set_direct_mv(s, mx, my);
3167                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3168                                  &dmin, &next_block, mx, my);
3169                 }
3170                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3171                     backup_s.dquant = 0;
3172                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3173                     s->mb_intra= 0;
3174                     ff_mpeg4_set_direct_mv(s, 0, 0);
3175                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3176                                  &dmin, &next_block, 0, 0);
3177                 }
3178                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3179                     int coded=0;
3180                     for(i=0; i<6; i++)
3181                         coded |= s->block_last_index[i];
3182                     if(coded){
3183                         int mx,my;
3184                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3185                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3186                             mx=my=0; //FIXME find the one we actually used
3187                             ff_mpeg4_set_direct_mv(s, mx, my);
3188                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3189                             mx= s->mv[1][0][0];
3190                             my= s->mv[1][0][1];
3191                         }else{
3192                             mx= s->mv[0][0][0];
3193                             my= s->mv[0][0][1];
3194                         }
3195
3196                         s->mv_dir= best_s.mv_dir;
3197                         s->mv_type = best_s.mv_type;
3198                         s->mb_intra= 0;
3199 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3200                         s->mv[0][0][1] = best_s.mv[0][0][1];
3201                         s->mv[1][0][0] = best_s.mv[1][0][0];
3202                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3203                         backup_s.dquant= 0;
3204                         s->skipdct=1;
3205                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3206                                         &dmin, &next_block, mx, my);
3207                         s->skipdct=0;
3208                     }
3209                 }
3210
3211                 s->current_picture.qscale_table[xy] = best_s.qscale;
3212
3213                 copy_context_after_encode(s, &best_s, -1);
3214
3215                 pb_bits_count= put_bits_count(&s->pb);
3216                 flush_put_bits(&s->pb);
3217                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3218                 s->pb= backup_s.pb;
3219
3220                 if(s->data_partitioning){
3221                     pb2_bits_count= put_bits_count(&s->pb2);
3222                     flush_put_bits(&s->pb2);
3223                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3224                     s->pb2= backup_s.pb2;
3225
3226                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3227                     flush_put_bits(&s->tex_pb);
3228                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3229                     s->tex_pb= backup_s.tex_pb;
3230                 }
3231                 s->last_bits= put_bits_count(&s->pb);
3232
3233                 if (CONFIG_H263_ENCODER &&
3234                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3235                     ff_h263_update_motion_val(s);
3236
3237                 if(next_block==0){ //FIXME 16 vs linesize16
3238                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3239                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3240                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3241                 }
3242
3243                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3244                     ff_mpv_decode_mb(s, s->block);
3245             } else {
3246                 int motion_x = 0, motion_y = 0;
3247                 s->mv_type=MV_TYPE_16X16;
3248                 // only one MB-Type possible
3249
3250                 switch(mb_type){
3251                 case CANDIDATE_MB_TYPE_INTRA:
3252                     s->mv_dir = 0;
3253                     s->mb_intra= 1;
3254                     motion_x= s->mv[0][0][0] = 0;
3255                     motion_y= s->mv[0][0][1] = 0;
3256                     break;
3257                 case CANDIDATE_MB_TYPE_INTER:
3258                     s->mv_dir = MV_DIR_FORWARD;
3259                     s->mb_intra= 0;
3260                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3261                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3262                     break;
3263                 case CANDIDATE_MB_TYPE_INTER_I:
3264                     s->mv_dir = MV_DIR_FORWARD;
3265                     s->mv_type = MV_TYPE_FIELD;
3266                     s->mb_intra= 0;
3267                     for(i=0; i<2; i++){
3268                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3269                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3270                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3271                     }
3272                     break;
3273                 case CANDIDATE_MB_TYPE_INTER4V:
3274                     s->mv_dir = MV_DIR_FORWARD;
3275                     s->mv_type = MV_TYPE_8X8;
3276                     s->mb_intra= 0;
3277                     for(i=0; i<4; i++){
3278                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3279                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3280                     }
3281                     break;
3282                 case CANDIDATE_MB_TYPE_DIRECT:
3283                     if (CONFIG_MPEG4_ENCODER) {
3284                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3285                         s->mb_intra= 0;
3286                         motion_x=s->b_direct_mv_table[xy][0];
3287                         motion_y=s->b_direct_mv_table[xy][1];
3288                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3289                     }
3290                     break;
3291                 case CANDIDATE_MB_TYPE_DIRECT0:
3292                     if (CONFIG_MPEG4_ENCODER) {
3293                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3294                         s->mb_intra= 0;
3295                         ff_mpeg4_set_direct_mv(s, 0, 0);
3296                     }
3297                     break;
3298                 case CANDIDATE_MB_TYPE_BIDIR:
3299                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3300                     s->mb_intra= 0;
3301                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3302                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3303                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3304                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3305                     break;
3306                 case CANDIDATE_MB_TYPE_BACKWARD:
3307                     s->mv_dir = MV_DIR_BACKWARD;
3308                     s->mb_intra= 0;
3309                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3310                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3311                     break;
3312                 case CANDIDATE_MB_TYPE_FORWARD:
3313                     s->mv_dir = MV_DIR_FORWARD;
3314                     s->mb_intra= 0;
3315                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3316                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3317                     break;
3318                 case CANDIDATE_MB_TYPE_FORWARD_I:
3319                     s->mv_dir = MV_DIR_FORWARD;
3320                     s->mv_type = MV_TYPE_FIELD;
3321                     s->mb_intra= 0;
3322                     for(i=0; i<2; i++){
3323                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3324                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3325                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3326                     }
3327                     break;
3328                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3329                     s->mv_dir = MV_DIR_BACKWARD;
3330                     s->mv_type = MV_TYPE_FIELD;
3331                     s->mb_intra= 0;
3332                     for(i=0; i<2; i++){
3333                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3334                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3335                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3336                     }
3337                     break;
3338                 case CANDIDATE_MB_TYPE_BIDIR_I:
3339                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3340                     s->mv_type = MV_TYPE_FIELD;
3341                     s->mb_intra= 0;
3342                     for(dir=0; dir<2; dir++){
3343                         for(i=0; i<2; i++){
3344                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3345                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3346                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3347                         }
3348                     }
3349                     break;
3350                 default:
3351                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3352                 }
3353
3354                 encode_mb(s, motion_x, motion_y);
3355
3356                 // RAL: Update last macroblock type
3357                 s->last_mv_dir = s->mv_dir;
3358
3359                 if (CONFIG_H263_ENCODER &&
3360                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3361                     ff_h263_update_motion_val(s);
3362
3363                 ff_mpv_decode_mb(s, s->block);
3364             }
3365
3366             /* clean the MV table in IPS frames for direct mode in B frames */
3367             if(s->mb_intra /* && I,P,S_TYPE */){
3368                 s->p_mv_table[xy][0]=0;
3369                 s->p_mv_table[xy][1]=0;
3370             }
3371
3372             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3373                 int w= 16;
3374                 int h= 16;
3375
3376                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3377                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3378
3379                 s->current_picture.error[0] += sse(
3380                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3381                     s->dest[0], w, h, s->linesize);
3382                 s->current_picture.error[1] += sse(
3383                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3384                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3385                 s->current_picture.error[2] += sse(
3386                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3387                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3388             }
3389             if(s->loop_filter){
3390                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3391                     ff_h263_loop_filter(s);
3392             }
3393             ff_dlog(s->avctx, "MB %d %d bits\n",
3394                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3395         }
3396     }
3397
3398     //not beautiful here but we must write it before flushing so it has to be here
3399     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3400         ff_msmpeg4_encode_ext_header(s);
3401
3402     write_slice_end(s);
3403
3404     /* Send the last GOB if RTP */
3405     if (s->avctx->rtp_callback) {
3406         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3407         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3408         /* Call the RTP callback to send the last GOB */
3409         emms_c();
3410         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3411     }
3412
3413     return 0;
3414 }
3415
3416 #define MERGE(field) dst->field += src->field; src->field=0
3417 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3418     MERGE(me.scene_change_score);
3419     MERGE(me.mc_mb_var_sum_temp);
3420     MERGE(me.mb_var_sum_temp);
3421 }
3422
3423 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3424     int i;
3425
3426     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3427     MERGE(dct_count[1]);
3428     MERGE(mv_bits);
3429     MERGE(i_tex_bits);
3430     MERGE(p_tex_bits);
3431     MERGE(i_count);
3432     MERGE(f_count);
3433     MERGE(b_count);
3434     MERGE(skip_count);
3435     MERGE(misc_bits);
3436     MERGE(er.error_count);
3437     MERGE(padding_bug_score);
3438     MERGE(current_picture.error[0]);
3439     MERGE(current_picture.error[1]);
3440     MERGE(current_picture.error[2]);
3441
3442     if(dst->avctx->noise_reduction){
3443         for(i=0; i<64; i++){
3444             MERGE(dct_error_sum[0][i]);
3445             MERGE(dct_error_sum[1][i]);
3446         }
3447     }
3448
3449     assert(put_bits_count(&src->pb) % 8 ==0);
3450     assert(put_bits_count(&dst->pb) % 8 ==0);
3451     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3452     flush_put_bits(&dst->pb);
3453 }
3454
3455 static int estimate_qp(MpegEncContext *s, int dry_run){
3456     if (s->next_lambda){
3457         s->current_picture_ptr->f->quality =
3458         s->current_picture.f->quality = s->next_lambda;
3459         if(!dry_run) s->next_lambda= 0;
3460     } else if (!s->fixed_qscale) {
3461         s->current_picture_ptr->f->quality =
3462         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3463         if (s->current_picture.f->quality < 0)
3464             return -1;
3465     }
3466
3467     if(s->adaptive_quant){
3468         switch(s->codec_id){
3469         case AV_CODEC_ID_MPEG4:
3470             if (CONFIG_MPEG4_ENCODER)
3471                 ff_clean_mpeg4_qscales(s);
3472             break;
3473         case AV_CODEC_ID_H263:
3474         case AV_CODEC_ID_H263P:
3475         case AV_CODEC_ID_FLV1:
3476             if (CONFIG_H263_ENCODER)
3477                 ff_clean_h263_qscales(s);
3478             break;
3479         default:
3480             ff_init_qscale_tab(s);
3481         }
3482
3483         s->lambda= s->lambda_table[0];
3484         //FIXME broken
3485     }else
3486         s->lambda = s->current_picture.f->quality;
3487     update_qscale(s);
3488     return 0;
3489 }
3490
3491 /* must be called before writing the header */
3492 static void set_frame_distances(MpegEncContext * s){
3493     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3494     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3495
3496     if(s->pict_type==AV_PICTURE_TYPE_B){
3497         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3498         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3499     }else{
3500         s->pp_time= s->time - s->last_non_b_time;
3501         s->last_non_b_time= s->time;
3502         assert(s->picture_number==0 || s->pp_time > 0);
3503     }
3504 }
3505
3506 static int encode_picture(MpegEncContext *s, int picture_number)
3507 {
3508     int i, ret;
3509     int bits;
3510     int context_count = s->slice_context_count;
3511
3512     s->picture_number = picture_number;
3513
3514     /* Reset the average MB variance */
3515     s->me.mb_var_sum_temp    =
3516     s->me.mc_mb_var_sum_temp = 0;
3517
3518     /* we need to initialize some time vars before we can encode b-frames */
3519     // RAL: Condition added for MPEG1VIDEO
3520     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3521         set_frame_distances(s);
3522     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3523         ff_set_mpeg4_time(s);
3524
3525     s->me.scene_change_score=0;
3526
3527 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3528
3529     if(s->pict_type==AV_PICTURE_TYPE_I){
3530         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3531         else                        s->no_rounding=0;
3532     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3533         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3534             s->no_rounding ^= 1;
3535     }
3536
3537     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3538         if (estimate_qp(s,1) < 0)
3539             return -1;
3540         ff_get_2pass_fcode(s);
3541     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3542         if(s->pict_type==AV_PICTURE_TYPE_B)
3543             s->lambda= s->last_lambda_for[s->pict_type];
3544         else
3545             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3546         update_qscale(s);
3547     }
3548
3549     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3550         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3551         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3552         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3553         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3554     }
3555
3556     s->mb_intra=0; //for the rate distortion & bit compare functions
3557     for(i=1; i<context_count; i++){
3558         ret = ff_update_duplicate_context(s->thread_context[i], s);
3559         if (ret < 0)
3560             return ret;
3561     }
3562
3563     if(ff_init_me(s)<0)
3564         return -1;
3565
3566     /* Estimate motion for every MB */
3567     if(s->pict_type != AV_PICTURE_TYPE_I){
3568         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3569         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3570         if (s->pict_type != AV_PICTURE_TYPE_B) {
3571             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3572                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3573             }
3574         }
3575
3576         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3577     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3578         /* I-Frame */
3579         for(i=0; i<s->mb_stride*s->mb_height; i++)
3580             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3581
3582         if(!s->fixed_qscale){
3583             /* finding spatial complexity for I-frame rate control */
3584             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3585         }
3586     }
3587     for(i=1; i<context_count; i++){
3588         merge_context_after_me(s, s->thread_context[i]);
3589     }
3590     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3591     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3592     emms_c();
3593
3594     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3595         s->pict_type= AV_PICTURE_TYPE_I;
3596         for(i=0; i<s->mb_stride*s->mb_height; i++)
3597             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3598         if(s->msmpeg4_version >= 3)
3599             s->no_rounding=1;
3600         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3601                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3602     }
3603
3604     if(!s->umvplus){
3605         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3606             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3607
3608             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3609                 int a,b;
3610                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3611                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3612                 s->f_code= FFMAX3(s->f_code, a, b);
3613             }
3614
3615             ff_fix_long_p_mvs(s);
3616             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3617             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3618                 int j;
3619                 for(i=0; i<2; i++){
3620                     for(j=0; j<2; j++)
3621                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3622                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3623                 }
3624             }
3625         }
3626
3627         if(s->pict_type==AV_PICTURE_TYPE_B){
3628             int a, b;
3629
3630             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3631             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3632             s->f_code = FFMAX(a, b);
3633
3634             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3635             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3636             s->b_code = FFMAX(a, b);
3637
3638             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3639             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3640             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3641             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3642             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3643                 int dir, j;
3644                 for(dir=0; dir<2; dir++){
3645                     for(i=0; i<2; i++){
3646                         for(j=0; j<2; j++){
3647                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3648                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3649                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3650                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3651                         }
3652                     }
3653                 }
3654             }
3655         }
3656     }
3657
3658     if (estimate_qp(s, 0) < 0)
3659         return -1;
3660
3661     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3662         s->pict_type == AV_PICTURE_TYPE_I &&
3663         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3664         s->qscale= 3; //reduce clipping problems
3665
3666     if (s->out_format == FMT_MJPEG) {
3667         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3668         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3669
3670         if (s->avctx->intra_matrix) {
3671             chroma_matrix =
3672             luma_matrix = s->avctx->intra_matrix;
3673         }
3674         if (s->avctx->chroma_intra_matrix)
3675             chroma_matrix = s->avctx->chroma_intra_matrix;
3676
3677         /* for mjpeg, we do include qscale in the matrix */
3678         for(i=1;i<64;i++){
3679             int j = s->idsp.idct_permutation[i];
3680
3681             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3682             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3683         }
3684         s->y_dc_scale_table=
3685         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3686         s->chroma_intra_matrix[0] =
3687         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3688         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3689                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3690         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3691                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3692         s->qscale= 8;
3693     }
3694     if(s->codec_id == AV_CODEC_ID_AMV){
3695         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3696         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3697         for(i=1;i<64;i++){
3698             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3699
3700             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3701             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3702         }
3703         s->y_dc_scale_table= y;
3704         s->c_dc_scale_table= c;
3705         s->intra_matrix[0] = 13;
3706         s->chroma_intra_matrix[0] = 14;
3707         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3708                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3709         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3710                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3711         s->qscale= 8;
3712     }
3713
3714     //FIXME var duplication
3715     s->current_picture_ptr->f->key_frame =
3716     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3717     s->current_picture_ptr->f->pict_type =
3718     s->current_picture.f->pict_type = s->pict_type;
3719
3720     if (s->current_picture.f->key_frame)
3721         s->picture_in_gop_number=0;
3722
3723     s->mb_x = s->mb_y = 0;
3724     s->last_bits= put_bits_count(&s->pb);
3725     switch(s->out_format) {
3726     case FMT_MJPEG:
3727         if (CONFIG_MJPEG_ENCODER)
3728             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3729                                            s->intra_matrix, s->chroma_intra_matrix);
3730         break;
3731     case FMT_H261:
3732         if (CONFIG_H261_ENCODER)
3733             ff_h261_encode_picture_header(s, picture_number);
3734         break;
3735     case FMT_H263:
3736         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3737             ff_wmv2_encode_picture_header(s, picture_number);
3738         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3739             ff_msmpeg4_encode_picture_header(s, picture_number);
3740         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3741             ff_mpeg4_encode_picture_header(s, picture_number);
3742         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3743             ret = ff_rv10_encode_picture_header(s, picture_number);
3744             if (ret < 0)
3745                 return ret;
3746         }
3747         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3748             ff_rv20_encode_picture_header(s, picture_number);
3749         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3750             ff_flv_encode_picture_header(s, picture_number);
3751         else if (CONFIG_H263_ENCODER)
3752             ff_h263_encode_picture_header(s, picture_number);
3753         break;
3754     case FMT_MPEG1:
3755         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3756             ff_mpeg1_encode_picture_header(s, picture_number);
3757         break;
3758     default:
3759         av_assert0(0);
3760     }
3761     bits= put_bits_count(&s->pb);
3762     s->header_bits= bits - s->last_bits;
3763
3764     for(i=1; i<context_count; i++){
3765         update_duplicate_context_after_me(s->thread_context[i], s);
3766     }
3767     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3768     for(i=1; i<context_count; i++){
3769         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3770             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3771         merge_context_after_encode(s, s->thread_context[i]);
3772     }
3773     emms_c();
3774     return 0;
3775 }
3776
3777 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3778     const int intra= s->mb_intra;
3779     int i;
3780
3781     s->dct_count[intra]++;
3782
3783     for(i=0; i<64; i++){
3784         int level= block[i];
3785
3786         if(level){
3787             if(level>0){
3788                 s->dct_error_sum[intra][i] += level;
3789                 level -= s->dct_offset[intra][i];
3790                 if(level<0) level=0;
3791             }else{
3792                 s->dct_error_sum[intra][i] -= level;
3793                 level += s->dct_offset[intra][i];
3794                 if(level>0) level=0;
3795             }
3796             block[i]= level;
3797         }
3798     }
3799 }
3800
3801 static int dct_quantize_trellis_c(MpegEncContext *s,
3802                                   int16_t *block, int n,
3803                                   int qscale, int *overflow){
3804     const int *qmat;
3805     const uint16_t *matrix;
3806     const uint8_t *scantable= s->intra_scantable.scantable;
3807     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3808     int max=0;
3809     unsigned int threshold1, threshold2;
3810     int bias=0;
3811     int run_tab[65];
3812     int level_tab[65];
3813     int score_tab[65];
3814     int survivor[65];
3815     int survivor_count;
3816     int last_run=0;
3817     int last_level=0;
3818     int last_score= 0;
3819     int last_i;
3820     int coeff[2][64];
3821     int coeff_count[64];
3822     int qmul, qadd, start_i, last_non_zero, i, dc;
3823     const int esc_length= s->ac_esc_length;
3824     uint8_t * length;
3825     uint8_t * last_length;
3826     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3827
3828     s->fdsp.fdct(block);
3829
3830     if(s->dct_error_sum)
3831         s->denoise_dct(s, block);
3832     qmul= qscale*16;
3833     qadd= ((qscale-1)|1)*8;
3834
3835     if (s->mb_intra) {
3836         int q;
3837         if (!s->h263_aic) {
3838             if (n < 4)
3839                 q = s->y_dc_scale;
3840             else
3841                 q = s->c_dc_scale;
3842             q = q << 3;
3843         } else{
3844             /* For AIC we skip quant/dequant of INTRADC */
3845             q = 1 << 3;
3846             qadd=0;
3847         }
3848
3849         /* note: block[0] is assumed to be positive */
3850         block[0] = (block[0] + (q >> 1)) / q;
3851         start_i = 1;
3852         last_non_zero = 0;
3853         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3854         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3855         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3856             bias= 1<<(QMAT_SHIFT-1);
3857
3858         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3859             length     = s->intra_chroma_ac_vlc_length;
3860             last_length= s->intra_chroma_ac_vlc_last_length;
3861         } else {
3862             length     = s->intra_ac_vlc_length;
3863             last_length= s->intra_ac_vlc_last_length;
3864         }
3865     } else {
3866         start_i = 0;
3867         last_non_zero = -1;
3868         qmat = s->q_inter_matrix[qscale];
3869         matrix = s->inter_matrix;
3870         length     = s->inter_ac_vlc_length;
3871         last_length= s->inter_ac_vlc_last_length;
3872     }
3873     last_i= start_i;
3874
3875     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3876     threshold2= (threshold1<<1);
3877
3878     for(i=63; i>=start_i; i--) {
3879         const int j = scantable[i];
3880         int level = block[j] * qmat[j];
3881
3882         if(((unsigned)(level+threshold1))>threshold2){
3883             last_non_zero = i;
3884             break;
3885         }
3886     }
3887
3888     for(i=start_i; i<=last_non_zero; i++) {
3889         const int j = scantable[i];
3890         int level = block[j] * qmat[j];
3891
3892 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3893 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3894         if(((unsigned)(level+threshold1))>threshold2){
3895             if(level>0){
3896                 level= (bias + level)>>QMAT_SHIFT;
3897                 coeff[0][i]= level;
3898                 coeff[1][i]= level-1;
3899 //                coeff[2][k]= level-2;
3900             }else{
3901                 level= (bias - level)>>QMAT_SHIFT;
3902                 coeff[0][i]= -level;
3903                 coeff[1][i]= -level+1;
3904 //                coeff[2][k]= -level+2;
3905             }
3906             coeff_count[i]= FFMIN(level, 2);
3907             av_assert2(coeff_count[i]);
3908             max |=level;
3909         }else{
3910             coeff[0][i]= (level>>31)|1;
3911             coeff_count[i]= 1;
3912         }
3913     }
3914
3915     *overflow= s->max_qcoeff < max; //overflow might have happened
3916
3917     if(last_non_zero < start_i){
3918         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3919         return last_non_zero;
3920     }
3921
3922     score_tab[start_i]= 0;
3923     survivor[0]= start_i;
3924     survivor_count= 1;
3925
3926     for(i=start_i; i<=last_non_zero; i++){
3927         int level_index, j, zero_distortion;
3928         int dct_coeff= FFABS(block[ scantable[i] ]);
3929         int best_score=256*256*256*120;
3930
3931         if (s->fdsp.fdct == ff_fdct_ifast)
3932             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3933         zero_distortion= dct_coeff*dct_coeff;
3934
3935         for(level_index=0; level_index < coeff_count[i]; level_index++){
3936             int distortion;
3937             int level= coeff[level_index][i];
3938             const int alevel= FFABS(level);
3939             int unquant_coeff;
3940
3941             av_assert2(level);
3942
3943             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3944                 unquant_coeff= alevel*qmul + qadd;
3945             } else if(s->out_format == FMT_MJPEG) {
3946                 j = s->idsp.idct_permutation[scantable[i]];
3947                 unquant_coeff = alevel * matrix[j] * 8;
3948             }else{ //MPEG1
3949                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3950                 if(s->mb_intra){
3951                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3952                         unquant_coeff =   (unquant_coeff - 1) | 1;
3953                 }else{
3954                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3955                         unquant_coeff =   (unquant_coeff - 1) | 1;
3956                 }
3957                 unquant_coeff<<= 3;
3958             }
3959
3960             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3961             level+=64;
3962             if((level&(~127)) == 0){
3963                 for(j=survivor_count-1; j>=0; j--){
3964                     int run= i - survivor[j];
3965                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3966                     score += score_tab[i-run];
3967
3968                     if(score < best_score){
3969                         best_score= score;
3970                         run_tab[i+1]= run;
3971                         level_tab[i+1]= level-64;
3972                     }
3973                 }
3974
3975                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3976                     for(j=survivor_count-1; j>=0; j--){
3977                         int run= i - survivor[j];
3978                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3979                         score += score_tab[i-run];
3980                         if(score < last_score){
3981                             last_score= score;
3982                             last_run= run;
3983                             last_level= level-64;
3984                             last_i= i+1;
3985                         }
3986                     }
3987                 }
3988             }else{
3989                 distortion += esc_length*lambda;
3990                 for(j=survivor_count-1; j>=0; j--){
3991                     int run= i - survivor[j];
3992                     int score= distortion + score_tab[i-run];
3993
3994                     if(score < best_score){
3995                         best_score= score;
3996                         run_tab[i+1]= run;
3997                         level_tab[i+1]= level-64;
3998                     }
3999                 }
4000
4001                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4002                   for(j=survivor_count-1; j>=0; j--){
4003                         int run= i - survivor[j];
4004                         int score= distortion + score_tab[i-run];
4005                         if(score < last_score){
4006                             last_score= score;
4007                             last_run= run;
4008                             last_level= level-64;
4009                             last_i= i+1;
4010                         }
4011                     }
4012                 }
4013             }
4014         }
4015
4016         score_tab[i+1]= best_score;
4017
4018         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4019         if(last_non_zero <= 27){
4020             for(; survivor_count; survivor_count--){
4021                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4022                     break;
4023             }
4024         }else{
4025             for(; survivor_count; survivor_count--){
4026                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4027                     break;
4028             }
4029         }
4030
4031         survivor[ survivor_count++ ]= i+1;
4032     }
4033
4034     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4035         last_score= 256*256*256*120;
4036         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4037             int score= score_tab[i];
4038             if(i) score += lambda*2; //FIXME exacter?
4039
4040             if(score < last_score){
4041                 last_score= score;
4042                 last_i= i;
4043                 last_level= level_tab[i];
4044                 last_run= run_tab[i];
4045             }
4046         }
4047     }
4048
4049     s->coded_score[n] = last_score;
4050
4051     dc= FFABS(block[0]);
4052     last_non_zero= last_i - 1;
4053     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4054
4055     if(last_non_zero < start_i)
4056         return last_non_zero;
4057
4058     if(last_non_zero == 0 && start_i == 0){
4059         int best_level= 0;
4060         int best_score= dc * dc;
4061
4062         for(i=0; i<coeff_count[0]; i++){
4063             int level= coeff[i][0];
4064             int alevel= FFABS(level);
4065             int unquant_coeff, score, distortion;
4066
4067             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4068                     unquant_coeff= (alevel*qmul + qadd)>>3;
4069             }else{ //MPEG1
4070                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4071                     unquant_coeff =   (unquant_coeff - 1) | 1;
4072             }
4073             unquant_coeff = (unquant_coeff + 4) >> 3;
4074             unquant_coeff<<= 3 + 3;
4075
4076             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4077             level+=64;
4078             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4079             else                    score= distortion + esc_length*lambda;
4080
4081             if(score < best_score){
4082                 best_score= score;
4083                 best_level= level - 64;
4084             }
4085         }
4086         block[0]= best_level;
4087         s->coded_score[n] = best_score - dc*dc;
4088         if(best_level == 0) return -1;
4089         else                return last_non_zero;
4090     }
4091
4092     i= last_i;
4093     av_assert2(last_level);
4094
4095     block[ perm_scantable[last_non_zero] ]= last_level;
4096     i -= last_run + 1;
4097
4098     for(; i>start_i; i -= run_tab[i] + 1){
4099         block[ perm_scantable[i-1] ]= level_tab[i];
4100     }
4101
4102     return last_non_zero;
4103 }
4104
4105 //#define REFINE_STATS 1
4106 static int16_t basis[64][64];
4107
4108 static void build_basis(uint8_t *perm){
4109     int i, j, x, y;
4110     emms_c();
4111     for(i=0; i<8; i++){
4112         for(j=0; j<8; j++){
4113             for(y=0; y<8; y++){
4114                 for(x=0; x<8; x++){
4115                     double s= 0.25*(1<<BASIS_SHIFT);
4116                     int index= 8*i + j;
4117                     int perm_index= perm[index];
4118                     if(i==0) s*= sqrt(0.5);
4119                     if(j==0) s*= sqrt(0.5);
4120                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4121                 }
4122             }
4123         }
4124     }
4125 }
4126
4127 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4128                         int16_t *block, int16_t *weight, int16_t *orig,
4129                         int n, int qscale){
4130     int16_t rem[64];
4131     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4132     const uint8_t *scantable= s->intra_scantable.scantable;
4133     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4134 //    unsigned int threshold1, threshold2;
4135 //    int bias=0;
4136     int run_tab[65];
4137     int prev_run=0;
4138     int prev_level=0;
4139     int qmul, qadd, start_i, last_non_zero, i, dc;
4140     uint8_t * length;
4141     uint8_t * last_length;
4142     int lambda;
4143     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4144 #ifdef REFINE_STATS
4145 static int count=0;
4146 static int after_last=0;
4147 static int to_zero=0;
4148 static int from_zero=0;
4149 static int raise=0;
4150 static int lower=0;
4151 static int messed_sign=0;
4152 #endif
4153
4154     if(basis[0][0] == 0)
4155         build_basis(s->idsp.idct_permutation);
4156
4157     qmul= qscale*2;
4158     qadd= (qscale-1)|1;
4159     if (s->mb_intra) {
4160         if (!s->h263_aic) {
4161             if (n < 4)
4162                 q = s->y_dc_scale;
4163             else
4164                 q = s->c_dc_scale;
4165         } else{
4166             /* For AIC we skip quant/dequant of INTRADC */
4167             q = 1;
4168             qadd=0;
4169         }
4170         q <<= RECON_SHIFT-3;
4171         /* note: block[0] is assumed to be positive */
4172         dc= block[0]*q;
4173 //        block[0] = (block[0] + (q >> 1)) / q;
4174         start_i = 1;
4175 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4176 //            bias= 1<<(QMAT_SHIFT-1);
4177         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4178             length     = s->intra_chroma_ac_vlc_length;
4179             last_length= s->intra_chroma_ac_vlc_last_length;
4180         } else {
4181             length     = s->intra_ac_vlc_length;
4182             last_length= s->intra_ac_vlc_last_length;
4183         }
4184     } else {
4185         dc= 0;
4186         start_i = 0;
4187         length     = s->inter_ac_vlc_length;
4188         last_length= s->inter_ac_vlc_last_length;
4189     }
4190     last_non_zero = s->block_last_index[n];
4191
4192 #ifdef REFINE_STATS
4193 {START_TIMER
4194 #endif
4195     dc += (1<<(RECON_SHIFT-1));
4196     for(i=0; i<64; i++){
4197         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4198     }
4199 #ifdef REFINE_STATS
4200 STOP_TIMER("memset rem[]")}
4201 #endif
4202     sum=0;
4203     for(i=0; i<64; i++){
4204         int one= 36;
4205         int qns=4;
4206         int w;
4207
4208         w= FFABS(weight[i]) + qns*one;
4209         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4210
4211         weight[i] = w;
4212 //        w=weight[i] = (63*qns + (w/2)) / w;
4213
4214         av_assert2(w>0);
4215         av_assert2(w<(1<<6));
4216         sum += w*w;
4217     }
4218     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4219 #ifdef REFINE_STATS
4220 {START_TIMER
4221 #endif
4222     run=0;
4223     rle_index=0;
4224     for(i=start_i; i<=last_non_zero; i++){
4225         int j= perm_scantable[i];
4226         const int level= block[j];
4227         int coeff;
4228
4229         if(level){
4230             if(level<0) coeff= qmul*level - qadd;
4231             else        coeff= qmul*level + qadd;
4232             run_tab[rle_index++]=run;
4233             run=0;
4234
4235             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4236         }else{
4237             run++;
4238         }
4239     }
4240 #ifdef REFINE_STATS
4241 if(last_non_zero>0){
4242 STOP_TIMER("init rem[]")
4243 }
4244 }
4245
4246 {START_TIMER
4247 #endif
4248     for(;;){
4249         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4250         int best_coeff=0;
4251         int best_change=0;
4252         int run2, best_unquant_change=0, analyze_gradient;
4253 #ifdef REFINE_STATS
4254 {START_TIMER
4255 #endif
4256         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4257
4258         if(analyze_gradient){
4259 #ifdef REFINE_STATS
4260 {START_TIMER
4261 #endif
4262             for(i=0; i<64; i++){
4263                 int w= weight[i];
4264
4265                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4266             }
4267 #ifdef REFINE_STATS
4268 STOP_TIMER("rem*w*w")}
4269 {START_TIMER
4270 #endif
4271             s->fdsp.fdct(d1);
4272 #ifdef REFINE_STATS
4273 STOP_TIMER("dct")}
4274 #endif
4275         }
4276
4277         if(start_i){
4278             const int level= block[0];
4279             int change, old_coeff;
4280
4281             av_assert2(s->mb_intra);
4282
4283             old_coeff= q*level;
4284
4285             for(change=-1; change<=1; change+=2){
4286                 int new_level= level + change;
4287                 int score, new_coeff;
4288
4289                 new_coeff= q*new_level;
4290                 if(new_coeff >= 2048 || new_coeff < 0)
4291                     continue;
4292
4293                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4294                                                   new_coeff - old_coeff);
4295                 if(score<best_score){
4296                     best_score= score;
4297                     best_coeff= 0;
4298                     best_change= change;
4299                     best_unquant_change= new_coeff - old_coeff;
4300                 }
4301             }
4302         }
4303
4304         run=0;
4305         rle_index=0;
4306         run2= run_tab[rle_index++];
4307         prev_level=0;
4308         prev_run=0;
4309
4310         for(i=start_i; i<64; i++){
4311             int j= perm_scantable[i];
4312             const int level= block[j];
4313             int change, old_coeff;
4314
4315             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4316                 break;
4317
4318             if(level){
4319                 if(level<0) old_coeff= qmul*level - qadd;
4320                 else        old_coeff= qmul*level + qadd;
4321                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4322             }else{
4323                 old_coeff=0;
4324                 run2--;
4325                 av_assert2(run2>=0 || i >= last_non_zero );
4326             }
4327
4328             for(change=-1; change<=1; change+=2){
4329                 int new_level= level + change;
4330                 int score, new_coeff, unquant_change;
4331
4332                 score=0;
4333                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4334                    continue;
4335
4336                 if(new_level){
4337                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4338                     else            new_coeff= qmul*new_level + qadd;
4339                     if(new_coeff >= 2048 || new_coeff <= -2048)
4340                         continue;
4341                     //FIXME check for overflow
4342
4343                     if(level){
4344                         if(level < 63 && level > -63){
4345                             if(i < last_non_zero)
4346                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4347                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4348                             else
4349                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4350                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4351                         }
4352                     }else{
4353                         av_assert2(FFABS(new_level)==1);
4354
4355                         if(analyze_gradient){
4356                             int g= d1[ scantable[i] ];
4357                             if(g && (g^new_level) >= 0)
4358                                 continue;
4359                         }
4360
4361                         if(i < last_non_zero){
4362                             int next_i= i + run2 + 1;
4363                             int next_level= block[ perm_scantable[next_i] ] + 64;
4364
4365                             if(next_level&(~127))
4366                                 next_level= 0;
4367
4368                             if(next_i < last_non_zero)
4369                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4370                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4371                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4372                             else
4373                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4374                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4375                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4376                         }else{
4377                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4378                             if(prev_level){
4379                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4380                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4381                             }
4382                         }
4383                     }
4384                 }else{
4385                     new_coeff=0;
4386                     av_assert2(FFABS(level)==1);
4387
4388                     if(i < last_non_zero){
4389                         int next_i= i + run2 + 1;
4390                         int next_level= block[ perm_scantable[next_i] ] + 64;
4391
4392                         if(next_level&(~127))
4393                             next_level= 0;
4394
4395                         if(next_i < last_non_zero)
4396                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4397                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4398                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4399                         else
4400                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4401                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4402                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4403                     }else{
4404                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4405                         if(prev_level){
4406                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4407                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4408                         }
4409                     }
4410                 }
4411
4412                 score *= lambda;
4413
4414                 unquant_change= new_coeff - old_coeff;
4415                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4416
4417                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4418                                                    unquant_change);
4419                 if(score<best_score){
4420                     best_score= score;
4421                     best_coeff= i;
4422                     best_change= change;
4423                     best_unquant_change= unquant_change;
4424                 }
4425             }
4426             if(level){
4427                 prev_level= level + 64;
4428                 if(prev_level&(~127))
4429                     prev_level= 0;
4430                 prev_run= run;
4431                 run=0;
4432             }else{
4433                 run++;
4434             }
4435         }
4436 #ifdef REFINE_STATS
4437 STOP_TIMER("iterative step")}
4438 #endif
4439
4440         if(best_change){
4441             int j= perm_scantable[ best_coeff ];
4442
4443             block[j] += best_change;
4444
4445             if(best_coeff > last_non_zero){
4446                 last_non_zero= best_coeff;
4447                 av_assert2(block[j]);
4448 #ifdef REFINE_STATS
4449 after_last++;
4450 #endif
4451             }else{
4452 #ifdef REFINE_STATS
4453 if(block[j]){
4454     if(block[j] - best_change){
4455         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4456             raise++;
4457         }else{
4458             lower++;
4459         }
4460     }else{
4461         from_zero++;
4462     }
4463 }else{
4464     to_zero++;
4465 }
4466 #endif
4467                 for(; last_non_zero>=start_i; last_non_zero--){
4468                     if(block[perm_scantable[last_non_zero]])
4469                         break;
4470                 }
4471             }
4472 #ifdef REFINE_STATS
4473 count++;
4474 if(256*256*256*64 % count == 0){
4475     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4476 }
4477 #endif
4478             run=0;
4479             rle_index=0;
4480             for(i=start_i; i<=last_non_zero; i++){
4481                 int j= perm_scantable[i];
4482                 const int level= block[j];
4483
4484                  if(level){
4485                      run_tab[rle_index++]=run;
4486                      run=0;
4487                  }else{
4488                      run++;
4489                  }
4490             }
4491
4492             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4493         }else{
4494             break;
4495         }
4496     }
4497 #ifdef REFINE_STATS
4498 if(last_non_zero>0){
4499 STOP_TIMER("iterative search")
4500 }
4501 }
4502 #endif
4503
4504     return last_non_zero;
4505 }
4506
4507 /**
4508  * Permute an 8x8 block according to permuatation.
4509  * @param block the block which will be permuted according to
4510  *              the given permutation vector
4511  * @param permutation the permutation vector
4512  * @param last the last non zero coefficient in scantable order, used to
4513  *             speed the permutation up
4514  * @param scantable the used scantable, this is only used to speed the
4515  *                  permutation up, the block is not (inverse) permutated
4516  *                  to scantable order!
4517  */
4518 static void block_permute(int16_t *block, uint8_t *permutation,
4519                           const uint8_t *scantable, int last)
4520 {
4521     int i;
4522     int16_t temp[64];
4523
4524     if (last <= 0)
4525         return;
4526     //FIXME it is ok but not clean and might fail for some permutations
4527     // if (permutation[1] == 1)
4528     // return;
4529
4530     for (i = 0; i <= last; i++) {
4531         const int j = scantable[i];
4532         temp[j] = block[j];
4533         block[j] = 0;
4534     }
4535
4536     for (i = 0; i <= last; i++) {
4537         const int j = scantable[i];
4538         const int perm_j = permutation[j];
4539         block[perm_j] = temp[j];
4540     }
4541 }
4542
4543 int ff_dct_quantize_c(MpegEncContext *s,
4544                         int16_t *block, int n,
4545                         int qscale, int *overflow)
4546 {
4547     int i, j, level, last_non_zero, q, start_i;
4548     const int *qmat;
4549     const uint8_t *scantable= s->intra_scantable.scantable;
4550     int bias;
4551     int max=0;
4552     unsigned int threshold1, threshold2;
4553
4554     s->fdsp.fdct(block);
4555
4556     if(s->dct_error_sum)
4557         s->denoise_dct(s, block);
4558
4559     if (s->mb_intra) {
4560         if (!s->h263_aic) {
4561             if (n < 4)
4562                 q = s->y_dc_scale;
4563             else
4564                 q = s->c_dc_scale;
4565             q = q << 3;
4566         } else
4567             /* For AIC we skip quant/dequant of INTRADC */
4568             q = 1 << 3;
4569
4570         /* note: block[0] is assumed to be positive */
4571         block[0] = (block[0] + (q >> 1)) / q;
4572         start_i = 1;
4573         last_non_zero = 0;
4574         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4575         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4576     } else {
4577         start_i = 0;
4578         last_non_zero = -1;
4579         qmat = s->q_inter_matrix[qscale];
4580         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4581     }
4582     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4583     threshold2= (threshold1<<1);
4584     for(i=63;i>=start_i;i--) {
4585         j = scantable[i];
4586         level = block[j] * qmat[j];
4587
4588         if(((unsigned)(level+threshold1))>threshold2){
4589             last_non_zero = i;
4590             break;
4591         }else{
4592             block[j]=0;
4593         }
4594     }
4595     for(i=start_i; i<=last_non_zero; i++) {
4596         j = scantable[i];
4597         level = block[j] * qmat[j];
4598
4599 //        if(   bias+level >= (1<<QMAT_SHIFT)
4600 //           || bias-level >= (1<<QMAT_SHIFT)){
4601         if(((unsigned)(level+threshold1))>threshold2){
4602             if(level>0){
4603                 level= (bias + level)>>QMAT_SHIFT;
4604                 block[j]= level;
4605             }else{
4606                 level= (bias - level)>>QMAT_SHIFT;
4607                 block[j]= -level;
4608             }
4609             max |=level;
4610         }else{
4611             block[j]=0;
4612         }
4613     }
4614     *overflow= s->max_qcoeff < max; //overflow might have happened
4615
4616     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4617     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4618         block_permute(block, s->idsp.idct_permutation,
4619                       scantable, last_non_zero);
4620
4621     return last_non_zero;
4622 }
4623
4624 #define OFFSET(x) offsetof(MpegEncContext, x)
4625 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4626 static const AVOption h263_options[] = {
4627     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4628     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4629     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4630     FF_MPV_COMMON_OPTS
4631     { NULL },
4632 };
4633
4634 static const AVClass h263_class = {
4635     .class_name = "H.263 encoder",
4636     .item_name  = av_default_item_name,
4637     .option     = h263_options,
4638     .version    = LIBAVUTIL_VERSION_INT,
4639 };
4640
4641 AVCodec ff_h263_encoder = {
4642     .name           = "h263",
4643     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4644     .type           = AVMEDIA_TYPE_VIDEO,
4645     .id             = AV_CODEC_ID_H263,
4646     .priv_data_size = sizeof(MpegEncContext),
4647     .init           = ff_mpv_encode_init,
4648     .encode2        = ff_mpv_encode_picture,
4649     .close          = ff_mpv_encode_end,
4650     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4651     .priv_class     = &h263_class,
4652 };
4653
4654 static const AVOption h263p_options[] = {
4655     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4656     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4657     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4658     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4659     FF_MPV_COMMON_OPTS
4660     { NULL },
4661 };
4662 static const AVClass h263p_class = {
4663     .class_name = "H.263p encoder",
4664     .item_name  = av_default_item_name,
4665     .option     = h263p_options,
4666     .version    = LIBAVUTIL_VERSION_INT,
4667 };
4668
4669 AVCodec ff_h263p_encoder = {
4670     .name           = "h263p",
4671     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4672     .type           = AVMEDIA_TYPE_VIDEO,
4673     .id             = AV_CODEC_ID_H263P,
4674     .priv_data_size = sizeof(MpegEncContext),
4675     .init           = ff_mpv_encode_init,
4676     .encode2        = ff_mpv_encode_picture,
4677     .close          = ff_mpv_encode_end,
4678     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4679     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4680     .priv_class     = &h263p_class,
4681 };
4682
4683 static const AVClass msmpeg4v2_class = {
4684     .class_name = "msmpeg4v2 encoder",
4685     .item_name  = av_default_item_name,
4686     .option     = ff_mpv_generic_options,
4687     .version    = LIBAVUTIL_VERSION_INT,
4688 };
4689
4690 AVCodec ff_msmpeg4v2_encoder = {
4691     .name           = "msmpeg4v2",
4692     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4693     .type           = AVMEDIA_TYPE_VIDEO,
4694     .id             = AV_CODEC_ID_MSMPEG4V2,
4695     .priv_data_size = sizeof(MpegEncContext),
4696     .init           = ff_mpv_encode_init,
4697     .encode2        = ff_mpv_encode_picture,
4698     .close          = ff_mpv_encode_end,
4699     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4700     .priv_class     = &msmpeg4v2_class,
4701 };
4702
4703 static const AVClass msmpeg4v3_class = {
4704     .class_name = "msmpeg4v3 encoder",
4705     .item_name  = av_default_item_name,
4706     .option     = ff_mpv_generic_options,
4707     .version    = LIBAVUTIL_VERSION_INT,
4708 };
4709
4710 AVCodec ff_msmpeg4v3_encoder = {
4711     .name           = "msmpeg4",
4712     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4713     .type           = AVMEDIA_TYPE_VIDEO,
4714     .id             = AV_CODEC_ID_MSMPEG4V3,
4715     .priv_data_size = sizeof(MpegEncContext),
4716     .init           = ff_mpv_encode_init,
4717     .encode2        = ff_mpv_encode_picture,
4718     .close          = ff_mpv_encode_end,
4719     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4720     .priv_class     = &msmpeg4v3_class,
4721 };
4722
4723 static const AVClass wmv1_class = {
4724     .class_name = "wmv1 encoder",
4725     .item_name  = av_default_item_name,
4726     .option     = ff_mpv_generic_options,
4727     .version    = LIBAVUTIL_VERSION_INT,
4728 };
4729
4730 AVCodec ff_wmv1_encoder = {
4731     .name           = "wmv1",
4732     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4733     .type           = AVMEDIA_TYPE_VIDEO,
4734     .id             = AV_CODEC_ID_WMV1,
4735     .priv_data_size = sizeof(MpegEncContext),
4736     .init           = ff_mpv_encode_init,
4737     .encode2        = ff_mpv_encode_picture,
4738     .close          = ff_mpv_encode_end,
4739     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4740     .priv_class     = &wmv1_class,
4741 };