]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit '7cad1bf0759ada2a1fc3e80bb232a5377dd4fda4'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  *
24  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
25  */
26
27 /**
28  * @file
29  * The simplest mpeg encoder (well, it was the simplest!).
30  */
31
32 #include <stdint.h>
33
34 #include "libavutil/internal.h"
35 #include "libavutil/intmath.h"
36 #include "libavutil/mathematics.h"
37 #include "libavutil/pixdesc.h"
38 #include "libavutil/opt.h"
39 #include "libavutil/timer.h"
40 #include "avcodec.h"
41 #include "dct.h"
42 #include "idctdsp.h"
43 #include "mpeg12.h"
44 #include "mpegvideo.h"
45 #include "mpegvideodata.h"
46 #include "h261.h"
47 #include "h263.h"
48 #include "h263data.h"
49 #include "mjpegenc_common.h"
50 #include "mathops.h"
51 #include "mpegutils.h"
52 #include "mjpegenc.h"
53 #include "msmpeg4.h"
54 #include "pixblockdsp.h"
55 #include "qpeldsp.h"
56 #include "faandct.h"
57 #include "thread.h"
58 #include "aandcttab.h"
59 #include "flv.h"
60 #include "mpeg4video.h"
61 #include "internal.h"
62 #include "bytestream.h"
63 #include "wmv2.h"
64 #include "rv10.h"
65 #include <limits.h>
66 #include "sp5x.h"
67
68 #define QUANT_BIAS_SHIFT 8
69
70 #define QMAT_SHIFT_MMX 16
71 #define QMAT_SHIFT 21
72
73 static int encode_picture(MpegEncContext *s, int picture_number);
74 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
75 static int sse_mb(MpegEncContext *s);
76 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
77 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
78
79 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
80 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
81
82 const AVOption ff_mpv_generic_options[] = {
83     FF_MPV_COMMON_OPTS
84     { NULL },
85 };
86
87 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
88                        uint16_t (*qmat16)[2][64],
89                        const uint16_t *quant_matrix,
90                        int bias, int qmin, int qmax, int intra)
91 {
92     FDCTDSPContext *fdsp = &s->fdsp;
93     int qscale;
94     int shift = 0;
95
96     for (qscale = qmin; qscale <= qmax; qscale++) {
97         int i;
98         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
99 #if CONFIG_FAANDCT
100             fdsp->fdct == ff_faandct            ||
101 #endif /* CONFIG_FAANDCT */
102             fdsp->fdct == ff_jpeg_fdct_islow_10) {
103             for (i = 0; i < 64; i++) {
104                 const int j = s->idsp.idct_permutation[i];
105                 int64_t den = (int64_t) qscale * quant_matrix[j];
106                 /* 16 <= qscale * quant_matrix[i] <= 7905
107                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
108                  *             19952 <=              x  <= 249205026
109                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
110                  *           3444240 >= (1 << 36) / (x) >= 275 */
111
112                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
113             }
114         } else if (fdsp->fdct == ff_fdct_ifast) {
115             for (i = 0; i < 64; i++) {
116                 const int j = s->idsp.idct_permutation[i];
117                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
118                 /* 16 <= qscale * quant_matrix[i] <= 7905
119                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
120                  *             19952 <=              x  <= 249205026
121                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
122                  *           3444240 >= (1 << 36) / (x) >= 275 */
123
124                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
125             }
126         } else {
127             for (i = 0; i < 64; i++) {
128                 const int j = s->idsp.idct_permutation[i];
129                 int64_t den = (int64_t) qscale * quant_matrix[j];
130                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
131                  * Assume x = qscale * quant_matrix[i]
132                  * So             16 <=              x  <= 7905
133                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
134                  * so          32768 >= (1 << 19) / (x) >= 67 */
135                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
136                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
137                 //                    (qscale * quant_matrix[i]);
138                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
139
140                 if (qmat16[qscale][0][i] == 0 ||
141                     qmat16[qscale][0][i] == 128 * 256)
142                     qmat16[qscale][0][i] = 128 * 256 - 1;
143                 qmat16[qscale][1][i] =
144                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
145                                 qmat16[qscale][0][i]);
146             }
147         }
148
149         for (i = intra; i < 64; i++) {
150             int64_t max = 8191;
151             if (fdsp->fdct == ff_fdct_ifast) {
152                 max = (8191LL * ff_aanscales[i]) >> 14;
153             }
154             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
155                 shift++;
156             }
157         }
158     }
159     if (shift) {
160         av_log(NULL, AV_LOG_INFO,
161                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
162                QMAT_SHIFT - shift);
163     }
164 }
165
166 static inline void update_qscale(MpegEncContext *s)
167 {
168     if (s->q_scale_type == 1) {
169         int i;
170         int bestdiff=INT_MAX;
171         int best = 1;
172         static const uint8_t non_linear_qscale[] = {
173             1,2,3,4,5,6,7,8,9,10,11,12,14,16,18,20,24,26,28
174         };
175
176         for (i = 0 ; i<FF_ARRAY_ELEMS(non_linear_qscale); i++) {
177             int diff = FFABS((non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 7)) - (int)s->lambda * 139);
178             if (non_linear_qscale[i] < s->avctx->qmin ||
179                 (non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
180                 continue;
181             if (diff < bestdiff) {
182                 bestdiff = diff;
183                 best = non_linear_qscale[i];
184             }
185         }
186         s->qscale = best;
187     } else {
188         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
189                     (FF_LAMBDA_SHIFT + 7);
190         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
191     }
192
193     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
194                  FF_LAMBDA_SHIFT;
195 }
196
197 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
198 {
199     int i;
200
201     if (matrix) {
202         put_bits(pb, 1, 1);
203         for (i = 0; i < 64; i++) {
204             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
205         }
206     } else
207         put_bits(pb, 1, 0);
208 }
209
210 /**
211  * init s->current_picture.qscale_table from s->lambda_table
212  */
213 void ff_init_qscale_tab(MpegEncContext *s)
214 {
215     int8_t * const qscale_table = s->current_picture.qscale_table;
216     int i;
217
218     for (i = 0; i < s->mb_num; i++) {
219         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
220         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
221         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
222                                                   s->avctx->qmax);
223     }
224 }
225
226 static void update_duplicate_context_after_me(MpegEncContext *dst,
227                                               MpegEncContext *src)
228 {
229 #define COPY(a) dst->a= src->a
230     COPY(pict_type);
231     COPY(current_picture);
232     COPY(f_code);
233     COPY(b_code);
234     COPY(qscale);
235     COPY(lambda);
236     COPY(lambda2);
237     COPY(picture_in_gop_number);
238     COPY(gop_picture_number);
239     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
240     COPY(progressive_frame);    // FIXME don't set in encode_header
241     COPY(partitioned_frame);    // FIXME don't set in encode_header
242 #undef COPY
243 }
244
245 /**
246  * Set the given MpegEncContext to defaults for encoding.
247  * the changed fields will not depend upon the prior state of the MpegEncContext.
248  */
249 static void mpv_encode_defaults(MpegEncContext *s)
250 {
251     int i;
252     ff_mpv_common_defaults(s);
253
254     for (i = -16; i < 16; i++) {
255         default_fcode_tab[i + MAX_MV] = 1;
256     }
257     s->me.mv_penalty = default_mv_penalty;
258     s->fcode_tab     = default_fcode_tab;
259
260     s->input_picture_number  = 0;
261     s->picture_in_gop_number = 0;
262 }
263
264 av_cold int ff_dct_encode_init(MpegEncContext *s) {
265     if (ARCH_X86)
266         ff_dct_encode_init_x86(s);
267
268     if (CONFIG_H263_ENCODER)
269         ff_h263dsp_init(&s->h263dsp);
270     if (!s->dct_quantize)
271         s->dct_quantize = ff_dct_quantize_c;
272     if (!s->denoise_dct)
273         s->denoise_dct  = denoise_dct_c;
274     s->fast_dct_quantize = s->dct_quantize;
275     if (s->avctx->trellis)
276         s->dct_quantize  = dct_quantize_trellis_c;
277
278     return 0;
279 }
280
281 /* init video encoder */
282 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
283 {
284     MpegEncContext *s = avctx->priv_data;
285     int i, ret, format_supported;
286
287     mpv_encode_defaults(s);
288
289     switch (avctx->codec_id) {
290     case AV_CODEC_ID_MPEG2VIDEO:
291         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
292             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
293             av_log(avctx, AV_LOG_ERROR,
294                    "only YUV420 and YUV422 are supported\n");
295             return -1;
296         }
297         break;
298     case AV_CODEC_ID_MJPEG:
299     case AV_CODEC_ID_AMV:
300         format_supported = 0;
301         /* JPEG color space */
302         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
303             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
304             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
305             (avctx->color_range == AVCOL_RANGE_JPEG &&
306              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
307               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
308               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
309             format_supported = 1;
310         /* MPEG color space */
311         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
312                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
313                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
314                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
315             format_supported = 1;
316
317         if (!format_supported) {
318             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
319             return -1;
320         }
321         break;
322     default:
323         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
324             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
325             return -1;
326         }
327     }
328
329     switch (avctx->pix_fmt) {
330     case AV_PIX_FMT_YUVJ444P:
331     case AV_PIX_FMT_YUV444P:
332         s->chroma_format = CHROMA_444;
333         break;
334     case AV_PIX_FMT_YUVJ422P:
335     case AV_PIX_FMT_YUV422P:
336         s->chroma_format = CHROMA_422;
337         break;
338     case AV_PIX_FMT_YUVJ420P:
339     case AV_PIX_FMT_YUV420P:
340     default:
341         s->chroma_format = CHROMA_420;
342         break;
343     }
344
345     s->bit_rate = avctx->bit_rate;
346     s->width    = avctx->width;
347     s->height   = avctx->height;
348     if (avctx->gop_size > 600 &&
349         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
350         av_log(avctx, AV_LOG_WARNING,
351                "keyframe interval too large!, reducing it from %d to %d\n",
352                avctx->gop_size, 600);
353         avctx->gop_size = 600;
354     }
355     s->gop_size     = avctx->gop_size;
356     s->avctx        = avctx;
357     if (avctx->max_b_frames > MAX_B_FRAMES) {
358         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
359                "is %d.\n", MAX_B_FRAMES);
360         avctx->max_b_frames = MAX_B_FRAMES;
361     }
362     s->max_b_frames = avctx->max_b_frames;
363     s->codec_id     = avctx->codec->id;
364     s->strict_std_compliance = avctx->strict_std_compliance;
365     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
366     s->mpeg_quant         = avctx->mpeg_quant;
367     s->rtp_mode           = !!avctx->rtp_payload_size;
368     s->intra_dc_precision = avctx->intra_dc_precision;
369
370     // workaround some differences between how applications specify dc precision
371     if (s->intra_dc_precision < 0) {
372         s->intra_dc_precision += 8;
373     } else if (s->intra_dc_precision >= 8)
374         s->intra_dc_precision -= 8;
375
376     if (s->intra_dc_precision < 0) {
377         av_log(avctx, AV_LOG_ERROR,
378                 "intra dc precision must be positive, note some applications use"
379                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
380         return AVERROR(EINVAL);
381     }
382
383     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
384         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
385         return AVERROR(EINVAL);
386     }
387     s->user_specified_pts = AV_NOPTS_VALUE;
388
389     if (s->gop_size <= 1) {
390         s->intra_only = 1;
391         s->gop_size   = 12;
392     } else {
393         s->intra_only = 0;
394     }
395
396 #if FF_API_MOTION_EST
397 FF_DISABLE_DEPRECATION_WARNINGS
398     s->me_method = avctx->me_method;
399 FF_ENABLE_DEPRECATION_WARNINGS
400 #endif
401
402     /* Fixed QSCALE */
403     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
404
405 #if FF_API_MPV_OPT
406     FF_DISABLE_DEPRECATION_WARNINGS
407     if (avctx->border_masking != 0.0)
408         s->border_masking = avctx->border_masking;
409     FF_ENABLE_DEPRECATION_WARNINGS
410 #endif
411
412     s->adaptive_quant = (s->avctx->lumi_masking ||
413                          s->avctx->dark_masking ||
414                          s->avctx->temporal_cplx_masking ||
415                          s->avctx->spatial_cplx_masking  ||
416                          s->avctx->p_masking      ||
417                          s->border_masking ||
418                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
419                         !s->fixed_qscale;
420
421     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
422
423     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
424         switch(avctx->codec_id) {
425         case AV_CODEC_ID_MPEG1VIDEO:
426         case AV_CODEC_ID_MPEG2VIDEO:
427             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
428             break;
429         case AV_CODEC_ID_MPEG4:
430         case AV_CODEC_ID_MSMPEG4V1:
431         case AV_CODEC_ID_MSMPEG4V2:
432         case AV_CODEC_ID_MSMPEG4V3:
433             if       (avctx->rc_max_rate >= 15000000) {
434                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
435             } else if(avctx->rc_max_rate >=  2000000) {
436                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
437             } else if(avctx->rc_max_rate >=   384000) {
438                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
439             } else
440                 avctx->rc_buffer_size = 40;
441             avctx->rc_buffer_size *= 16384;
442             break;
443         }
444         if (avctx->rc_buffer_size) {
445             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
446         }
447     }
448
449     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
450         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
451         return -1;
452     }
453
454     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
455         av_log(avctx, AV_LOG_INFO,
456                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
457     }
458
459     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
460         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
461         return -1;
462     }
463
464     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
465         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
466         return -1;
467     }
468
469     if (avctx->rc_max_rate &&
470         avctx->rc_max_rate == avctx->bit_rate &&
471         avctx->rc_max_rate != avctx->rc_min_rate) {
472         av_log(avctx, AV_LOG_INFO,
473                "impossible bitrate constraints, this will fail\n");
474     }
475
476     if (avctx->rc_buffer_size &&
477         avctx->bit_rate * (int64_t)avctx->time_base.num >
478             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
479         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
480         return -1;
481     }
482
483     if (!s->fixed_qscale &&
484         avctx->bit_rate * av_q2d(avctx->time_base) >
485             avctx->bit_rate_tolerance) {
486         av_log(avctx, AV_LOG_WARNING,
487                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
488         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
489     }
490
491     if (s->avctx->rc_max_rate &&
492         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
493         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
494          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
495         90000LL * (avctx->rc_buffer_size - 1) >
496             s->avctx->rc_max_rate * 0xFFFFLL) {
497         av_log(avctx, AV_LOG_INFO,
498                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
499                "specified vbv buffer is too large for the given bitrate!\n");
500     }
501
502     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
503         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
504         s->codec_id != AV_CODEC_ID_FLV1) {
505         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
506         return -1;
507     }
508
509     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
510         av_log(avctx, AV_LOG_ERROR,
511                "OBMC is only supported with simple mb decision\n");
512         return -1;
513     }
514
515     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
516         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
517         return -1;
518     }
519
520     if (s->max_b_frames                    &&
521         s->codec_id != AV_CODEC_ID_MPEG4      &&
522         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
523         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
524         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
525         return -1;
526     }
527     if (s->max_b_frames < 0) {
528         av_log(avctx, AV_LOG_ERROR,
529                "max b frames must be 0 or positive for mpegvideo based encoders\n");
530         return -1;
531     }
532
533     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
534          s->codec_id == AV_CODEC_ID_H263  ||
535          s->codec_id == AV_CODEC_ID_H263P) &&
536         (avctx->sample_aspect_ratio.num > 255 ||
537          avctx->sample_aspect_ratio.den > 255)) {
538         av_log(avctx, AV_LOG_WARNING,
539                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
540                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
541         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
542                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
543     }
544
545     if ((s->codec_id == AV_CODEC_ID_H263  ||
546          s->codec_id == AV_CODEC_ID_H263P) &&
547         (avctx->width  > 2048 ||
548          avctx->height > 1152 )) {
549         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
550         return -1;
551     }
552     if ((s->codec_id == AV_CODEC_ID_H263  ||
553          s->codec_id == AV_CODEC_ID_H263P) &&
554         ((avctx->width &3) ||
555          (avctx->height&3) )) {
556         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
557         return -1;
558     }
559
560     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
561         (avctx->width  > 4095 ||
562          avctx->height > 4095 )) {
563         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
564         return -1;
565     }
566
567     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
568         (avctx->width  > 16383 ||
569          avctx->height > 16383 )) {
570         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
571         return -1;
572     }
573
574     if (s->codec_id == AV_CODEC_ID_RV10 &&
575         (avctx->width &15 ||
576          avctx->height&15 )) {
577         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
578         return AVERROR(EINVAL);
579     }
580
581     if (s->codec_id == AV_CODEC_ID_RV20 &&
582         (avctx->width &3 ||
583          avctx->height&3 )) {
584         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
585         return AVERROR(EINVAL);
586     }
587
588     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
589          s->codec_id == AV_CODEC_ID_WMV2) &&
590          avctx->width & 1) {
591          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
592          return -1;
593     }
594
595     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
596         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
597         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
598         return -1;
599     }
600
601     // FIXME mpeg2 uses that too
602     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
603                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
604         av_log(avctx, AV_LOG_ERROR,
605                "mpeg2 style quantization not supported by codec\n");
606         return -1;
607     }
608
609     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
610         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
611         return -1;
612     }
613
614     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
615         s->avctx->mb_decision != FF_MB_DECISION_RD) {
616         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
617         return -1;
618     }
619
620     if (s->avctx->scenechange_threshold < 1000000000 &&
621         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
622         av_log(avctx, AV_LOG_ERROR,
623                "closed gop with scene change detection are not supported yet, "
624                "set threshold to 1000000000\n");
625         return -1;
626     }
627
628     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
629         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
630             av_log(avctx, AV_LOG_ERROR,
631                   "low delay forcing is only available for mpeg2\n");
632             return -1;
633         }
634         if (s->max_b_frames != 0) {
635             av_log(avctx, AV_LOG_ERROR,
636                    "b frames cannot be used with low delay\n");
637             return -1;
638         }
639     }
640
641     if (s->q_scale_type == 1) {
642         if (avctx->qmax > 28) {
643             av_log(avctx, AV_LOG_ERROR,
644                    "non linear quant only supports qmax <= 28 currently\n");
645             return -1;
646         }
647     }
648
649     if (s->avctx->thread_count > 1         &&
650         s->codec_id != AV_CODEC_ID_MPEG4      &&
651         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
652         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
653         s->codec_id != AV_CODEC_ID_MJPEG      &&
654         (s->codec_id != AV_CODEC_ID_H263P)) {
655         av_log(avctx, AV_LOG_ERROR,
656                "multi threaded encoding not supported by codec\n");
657         return -1;
658     }
659
660     if (s->avctx->thread_count < 1) {
661         av_log(avctx, AV_LOG_ERROR,
662                "automatic thread number detection not supported by codec, "
663                "patch welcome\n");
664         return -1;
665     }
666
667     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
668         s->rtp_mode = 1;
669
670     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
671         s->h263_slice_structured = 1;
672
673     if (!avctx->time_base.den || !avctx->time_base.num) {
674         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
675         return -1;
676     }
677
678     if (avctx->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
679         av_log(avctx, AV_LOG_INFO,
680                "notice: b_frame_strategy only affects the first pass\n");
681         avctx->b_frame_strategy = 0;
682     }
683
684     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
685     if (i > 1) {
686         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
687         avctx->time_base.den /= i;
688         avctx->time_base.num /= i;
689         //return -1;
690     }
691
692     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
693         // (a + x * 3 / 8) / x
694         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
695         s->inter_quant_bias = 0;
696     } else {
697         s->intra_quant_bias = 0;
698         // (a - x / 4) / x
699         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
700     }
701
702     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
703         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
704         return AVERROR(EINVAL);
705     }
706
707 #if FF_API_QUANT_BIAS
708 FF_DISABLE_DEPRECATION_WARNINGS
709     if (s->intra_quant_bias == FF_DEFAULT_QUANT_BIAS &&
710         avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
711         s->intra_quant_bias = avctx->intra_quant_bias;
712     if (s->inter_quant_bias == FF_DEFAULT_QUANT_BIAS &&
713         avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
714         s->inter_quant_bias = avctx->inter_quant_bias;
715 FF_ENABLE_DEPRECATION_WARNINGS
716 #endif
717
718     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
719
720     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
721         s->avctx->time_base.den > (1 << 16) - 1) {
722         av_log(avctx, AV_LOG_ERROR,
723                "timebase %d/%d not supported by MPEG 4 standard, "
724                "the maximum admitted value for the timebase denominator "
725                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
726                (1 << 16) - 1);
727         return -1;
728     }
729     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
730
731     switch (avctx->codec->id) {
732     case AV_CODEC_ID_MPEG1VIDEO:
733         s->out_format = FMT_MPEG1;
734         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
735         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
736         break;
737     case AV_CODEC_ID_MPEG2VIDEO:
738         s->out_format = FMT_MPEG1;
739         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
740         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
741         s->rtp_mode   = 1;
742         break;
743     case AV_CODEC_ID_MJPEG:
744     case AV_CODEC_ID_AMV:
745         s->out_format = FMT_MJPEG;
746         s->intra_only = 1; /* force intra only for jpeg */
747         if (!CONFIG_MJPEG_ENCODER ||
748             ff_mjpeg_encode_init(s) < 0)
749             return -1;
750         avctx->delay = 0;
751         s->low_delay = 1;
752         break;
753     case AV_CODEC_ID_H261:
754         if (!CONFIG_H261_ENCODER)
755             return -1;
756         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
757             av_log(avctx, AV_LOG_ERROR,
758                    "The specified picture size of %dx%d is not valid for the "
759                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
760                     s->width, s->height);
761             return -1;
762         }
763         s->out_format = FMT_H261;
764         avctx->delay  = 0;
765         s->low_delay  = 1;
766         s->rtp_mode   = 0; /* Sliced encoding not supported */
767         break;
768     case AV_CODEC_ID_H263:
769         if (!CONFIG_H263_ENCODER)
770             return -1;
771         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
772                              s->width, s->height) == 8) {
773             av_log(avctx, AV_LOG_ERROR,
774                    "The specified picture size of %dx%d is not valid for "
775                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
776                    "352x288, 704x576, and 1408x1152. "
777                    "Try H.263+.\n", s->width, s->height);
778             return -1;
779         }
780         s->out_format = FMT_H263;
781         avctx->delay  = 0;
782         s->low_delay  = 1;
783         break;
784     case AV_CODEC_ID_H263P:
785         s->out_format = FMT_H263;
786         s->h263_plus  = 1;
787         /* Fx */
788         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
789         s->modified_quant  = s->h263_aic;
790         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
791         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
792
793         /* /Fx */
794         /* These are just to be sure */
795         avctx->delay = 0;
796         s->low_delay = 1;
797         break;
798     case AV_CODEC_ID_FLV1:
799         s->out_format      = FMT_H263;
800         s->h263_flv        = 2; /* format = 1; 11-bit codes */
801         s->unrestricted_mv = 1;
802         s->rtp_mode  = 0; /* don't allow GOB */
803         avctx->delay = 0;
804         s->low_delay = 1;
805         break;
806     case AV_CODEC_ID_RV10:
807         s->out_format = FMT_H263;
808         avctx->delay  = 0;
809         s->low_delay  = 1;
810         break;
811     case AV_CODEC_ID_RV20:
812         s->out_format      = FMT_H263;
813         avctx->delay       = 0;
814         s->low_delay       = 1;
815         s->modified_quant  = 1;
816         s->h263_aic        = 1;
817         s->h263_plus       = 1;
818         s->loop_filter     = 1;
819         s->unrestricted_mv = 0;
820         break;
821     case AV_CODEC_ID_MPEG4:
822         s->out_format      = FMT_H263;
823         s->h263_pred       = 1;
824         s->unrestricted_mv = 1;
825         s->low_delay       = s->max_b_frames ? 0 : 1;
826         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
827         break;
828     case AV_CODEC_ID_MSMPEG4V2:
829         s->out_format      = FMT_H263;
830         s->h263_pred       = 1;
831         s->unrestricted_mv = 1;
832         s->msmpeg4_version = 2;
833         avctx->delay       = 0;
834         s->low_delay       = 1;
835         break;
836     case AV_CODEC_ID_MSMPEG4V3:
837         s->out_format        = FMT_H263;
838         s->h263_pred         = 1;
839         s->unrestricted_mv   = 1;
840         s->msmpeg4_version   = 3;
841         s->flipflop_rounding = 1;
842         avctx->delay         = 0;
843         s->low_delay         = 1;
844         break;
845     case AV_CODEC_ID_WMV1:
846         s->out_format        = FMT_H263;
847         s->h263_pred         = 1;
848         s->unrestricted_mv   = 1;
849         s->msmpeg4_version   = 4;
850         s->flipflop_rounding = 1;
851         avctx->delay         = 0;
852         s->low_delay         = 1;
853         break;
854     case AV_CODEC_ID_WMV2:
855         s->out_format        = FMT_H263;
856         s->h263_pred         = 1;
857         s->unrestricted_mv   = 1;
858         s->msmpeg4_version   = 5;
859         s->flipflop_rounding = 1;
860         avctx->delay         = 0;
861         s->low_delay         = 1;
862         break;
863     default:
864         return -1;
865     }
866
867     avctx->has_b_frames = !s->low_delay;
868
869     s->encoding = 1;
870
871     s->progressive_frame    =
872     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
873                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
874                                 s->alternate_scan);
875
876     /* init */
877     ff_mpv_idct_init(s);
878     if (ff_mpv_common_init(s) < 0)
879         return -1;
880
881     ff_fdctdsp_init(&s->fdsp, avctx);
882     ff_me_cmp_init(&s->mecc, avctx);
883     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
884     ff_pixblockdsp_init(&s->pdsp, avctx);
885     ff_qpeldsp_init(&s->qdsp);
886
887     if (s->msmpeg4_version) {
888         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
889                           2 * 2 * (MAX_LEVEL + 1) *
890                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
891     }
892     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
893
894     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
895     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
896     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
897     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
898     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
899     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
900     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
901                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
902     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
903                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
904
905     if (s->avctx->noise_reduction) {
906         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
907                           2 * 64 * sizeof(uint16_t), fail);
908     }
909
910     ff_dct_encode_init(s);
911
912     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
913         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
914
915     s->quant_precision = 5;
916
917     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
918     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
919
920     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
921         ff_h261_encode_init(s);
922     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
923         ff_h263_encode_init(s);
924     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
925         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
926             return ret;
927     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
928         && s->out_format == FMT_MPEG1)
929         ff_mpeg1_encode_init(s);
930
931     /* init q matrix */
932     for (i = 0; i < 64; i++) {
933         int j = s->idsp.idct_permutation[i];
934         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
935             s->mpeg_quant) {
936             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
937             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
938         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
939             s->intra_matrix[j] =
940             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
941         } else {
942             /* mpeg1/2 */
943             s->chroma_intra_matrix[j] =
944             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
945             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
946         }
947         if (s->avctx->intra_matrix)
948             s->intra_matrix[j] = s->avctx->intra_matrix[i];
949         if (s->avctx->inter_matrix)
950             s->inter_matrix[j] = s->avctx->inter_matrix[i];
951     }
952
953     /* precompute matrix */
954     /* for mjpeg, we do include qscale in the matrix */
955     if (s->out_format != FMT_MJPEG) {
956         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
957                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
958                           31, 1);
959         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
960                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
961                           31, 0);
962     }
963
964     if (ff_rate_control_init(s) < 0)
965         return -1;
966
967 #if FF_API_ERROR_RATE
968     FF_DISABLE_DEPRECATION_WARNINGS
969     if (avctx->error_rate)
970         s->error_rate = avctx->error_rate;
971     FF_ENABLE_DEPRECATION_WARNINGS;
972 #endif
973
974 #if FF_API_NORMALIZE_AQP
975     FF_DISABLE_DEPRECATION_WARNINGS
976     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
977         s->mpv_flags |= FF_MPV_FLAG_NAQ;
978     FF_ENABLE_DEPRECATION_WARNINGS;
979 #endif
980
981 #if FF_API_MV0
982     FF_DISABLE_DEPRECATION_WARNINGS
983     if (avctx->flags & CODEC_FLAG_MV0)
984         s->mpv_flags |= FF_MPV_FLAG_MV0;
985     FF_ENABLE_DEPRECATION_WARNINGS
986 #endif
987
988 #if FF_API_MPV_OPT
989     FF_DISABLE_DEPRECATION_WARNINGS
990     if (avctx->rc_qsquish != 0.0)
991         s->rc_qsquish = avctx->rc_qsquish;
992     if (avctx->rc_qmod_amp != 0.0)
993         s->rc_qmod_amp = avctx->rc_qmod_amp;
994     if (avctx->rc_qmod_freq)
995         s->rc_qmod_freq = avctx->rc_qmod_freq;
996     if (avctx->rc_buffer_aggressivity != 1.0)
997         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
998     if (avctx->rc_initial_cplx != 0.0)
999         s->rc_initial_cplx = avctx->rc_initial_cplx;
1000     if (avctx->lmin)
1001         s->lmin = avctx->lmin;
1002     if (avctx->lmax)
1003         s->lmax = avctx->lmax;
1004
1005     if (avctx->rc_eq) {
1006         av_freep(&s->rc_eq);
1007         s->rc_eq = av_strdup(avctx->rc_eq);
1008         if (!s->rc_eq)
1009             return AVERROR(ENOMEM);
1010     }
1011     FF_ENABLE_DEPRECATION_WARNINGS
1012 #endif
1013
1014     if (avctx->b_frame_strategy == 2) {
1015         for (i = 0; i < s->max_b_frames + 2; i++) {
1016             s->tmp_frames[i] = av_frame_alloc();
1017             if (!s->tmp_frames[i])
1018                 return AVERROR(ENOMEM);
1019
1020             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
1021             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
1022             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
1023
1024             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
1025             if (ret < 0)
1026                 return ret;
1027         }
1028     }
1029
1030     return 0;
1031 fail:
1032     ff_mpv_encode_end(avctx);
1033     return AVERROR_UNKNOWN;
1034 }
1035
1036 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1037 {
1038     MpegEncContext *s = avctx->priv_data;
1039     int i;
1040
1041     ff_rate_control_uninit(s);
1042
1043     ff_mpv_common_end(s);
1044     if (CONFIG_MJPEG_ENCODER &&
1045         s->out_format == FMT_MJPEG)
1046         ff_mjpeg_encode_close(s);
1047
1048     av_freep(&avctx->extradata);
1049
1050     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1051         av_frame_free(&s->tmp_frames[i]);
1052
1053     ff_free_picture_tables(&s->new_picture);
1054     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1055
1056     av_freep(&s->avctx->stats_out);
1057     av_freep(&s->ac_stats);
1058
1059     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1060     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1061     s->q_chroma_intra_matrix=   NULL;
1062     s->q_chroma_intra_matrix16= NULL;
1063     av_freep(&s->q_intra_matrix);
1064     av_freep(&s->q_inter_matrix);
1065     av_freep(&s->q_intra_matrix16);
1066     av_freep(&s->q_inter_matrix16);
1067     av_freep(&s->input_picture);
1068     av_freep(&s->reordered_input_picture);
1069     av_freep(&s->dct_offset);
1070
1071     return 0;
1072 }
1073
1074 static int get_sae(uint8_t *src, int ref, int stride)
1075 {
1076     int x,y;
1077     int acc = 0;
1078
1079     for (y = 0; y < 16; y++) {
1080         for (x = 0; x < 16; x++) {
1081             acc += FFABS(src[x + y * stride] - ref);
1082         }
1083     }
1084
1085     return acc;
1086 }
1087
1088 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1089                            uint8_t *ref, int stride)
1090 {
1091     int x, y, w, h;
1092     int acc = 0;
1093
1094     w = s->width  & ~15;
1095     h = s->height & ~15;
1096
1097     for (y = 0; y < h; y += 16) {
1098         for (x = 0; x < w; x += 16) {
1099             int offset = x + y * stride;
1100             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1101                                       stride, 16);
1102             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1103             int sae  = get_sae(src + offset, mean, stride);
1104
1105             acc += sae + 500 < sad;
1106         }
1107     }
1108     return acc;
1109 }
1110
1111 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1112 {
1113     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1114                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1115                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1116                             &s->linesize, &s->uvlinesize);
1117 }
1118
1119 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1120 {
1121     Picture *pic = NULL;
1122     int64_t pts;
1123     int i, display_picture_number = 0, ret;
1124     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1125                                                  (s->low_delay ? 0 : 1);
1126     int direct = 1;
1127
1128     if (pic_arg) {
1129         pts = pic_arg->pts;
1130         display_picture_number = s->input_picture_number++;
1131
1132         if (pts != AV_NOPTS_VALUE) {
1133             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1134                 int64_t last = s->user_specified_pts;
1135
1136                 if (pts <= last) {
1137                     av_log(s->avctx, AV_LOG_ERROR,
1138                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1139                            pts, last);
1140                     return AVERROR(EINVAL);
1141                 }
1142
1143                 if (!s->low_delay && display_picture_number == 1)
1144                     s->dts_delta = pts - last;
1145             }
1146             s->user_specified_pts = pts;
1147         } else {
1148             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1149                 s->user_specified_pts =
1150                 pts = s->user_specified_pts + 1;
1151                 av_log(s->avctx, AV_LOG_INFO,
1152                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1153                        pts);
1154             } else {
1155                 pts = display_picture_number;
1156             }
1157         }
1158     }
1159
1160     if (pic_arg) {
1161         if (!pic_arg->buf[0] ||
1162             pic_arg->linesize[0] != s->linesize ||
1163             pic_arg->linesize[1] != s->uvlinesize ||
1164             pic_arg->linesize[2] != s->uvlinesize)
1165             direct = 0;
1166         if ((s->width & 15) || (s->height & 15))
1167             direct = 0;
1168         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1169             direct = 0;
1170         if (s->linesize & (STRIDE_ALIGN-1))
1171             direct = 0;
1172
1173         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1174                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1175
1176         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1177         if (i < 0)
1178             return i;
1179
1180         pic = &s->picture[i];
1181         pic->reference = 3;
1182
1183         if (direct) {
1184             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1185                 return ret;
1186         }
1187         ret = alloc_picture(s, pic, direct);
1188         if (ret < 0)
1189             return ret;
1190
1191         if (!direct) {
1192             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1193                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1194                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1195                 // empty
1196             } else {
1197                 int h_chroma_shift, v_chroma_shift;
1198                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1199                                                  &h_chroma_shift,
1200                                                  &v_chroma_shift);
1201
1202                 for (i = 0; i < 3; i++) {
1203                     int src_stride = pic_arg->linesize[i];
1204                     int dst_stride = i ? s->uvlinesize : s->linesize;
1205                     int h_shift = i ? h_chroma_shift : 0;
1206                     int v_shift = i ? v_chroma_shift : 0;
1207                     int w = s->width  >> h_shift;
1208                     int h = s->height >> v_shift;
1209                     uint8_t *src = pic_arg->data[i];
1210                     uint8_t *dst = pic->f->data[i];
1211                     int vpad = 16;
1212
1213                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1214                         && !s->progressive_sequence
1215                         && FFALIGN(s->height, 32) - s->height > 16)
1216                         vpad = 32;
1217
1218                     if (!s->avctx->rc_buffer_size)
1219                         dst += INPLACE_OFFSET;
1220
1221                     if (src_stride == dst_stride)
1222                         memcpy(dst, src, src_stride * h);
1223                     else {
1224                         int h2 = h;
1225                         uint8_t *dst2 = dst;
1226                         while (h2--) {
1227                             memcpy(dst2, src, w);
1228                             dst2 += dst_stride;
1229                             src += src_stride;
1230                         }
1231                     }
1232                     if ((s->width & 15) || (s->height & (vpad-1))) {
1233                         s->mpvencdsp.draw_edges(dst, dst_stride,
1234                                                 w, h,
1235                                                 16 >> h_shift,
1236                                                 vpad >> v_shift,
1237                                                 EDGE_BOTTOM);
1238                     }
1239                 }
1240             }
1241         }
1242         ret = av_frame_copy_props(pic->f, pic_arg);
1243         if (ret < 0)
1244             return ret;
1245
1246         pic->f->display_picture_number = display_picture_number;
1247         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1248     }
1249
1250     /* shift buffer entries */
1251     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1252         s->input_picture[i - 1] = s->input_picture[i];
1253
1254     s->input_picture[encoding_delay] = (Picture*) pic;
1255
1256     return 0;
1257 }
1258
1259 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1260 {
1261     int x, y, plane;
1262     int score = 0;
1263     int64_t score64 = 0;
1264
1265     for (plane = 0; plane < 3; plane++) {
1266         const int stride = p->f->linesize[plane];
1267         const int bw = plane ? 1 : 2;
1268         for (y = 0; y < s->mb_height * bw; y++) {
1269             for (x = 0; x < s->mb_width * bw; x++) {
1270                 int off = p->shared ? 0 : 16;
1271                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1272                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1273                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1274
1275                 switch (FFABS(s->avctx->frame_skip_exp)) {
1276                 case 0: score    =  FFMAX(score, v);          break;
1277                 case 1: score   += FFABS(v);                  break;
1278                 case 2: score64 += v * (int64_t)v;                       break;
1279                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1280                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1281                 }
1282             }
1283         }
1284     }
1285     emms_c();
1286
1287     if (score)
1288         score64 = score;
1289     if (s->avctx->frame_skip_exp < 0)
1290         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1291                       -1.0/s->avctx->frame_skip_exp);
1292
1293     if (score64 < s->avctx->frame_skip_threshold)
1294         return 1;
1295     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1296         return 1;
1297     return 0;
1298 }
1299
1300 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1301 {
1302     AVPacket pkt = { 0 };
1303     int ret, got_output;
1304
1305     av_init_packet(&pkt);
1306     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1307     if (ret < 0)
1308         return ret;
1309
1310     ret = pkt.size;
1311     av_free_packet(&pkt);
1312     return ret;
1313 }
1314
1315 static int estimate_best_b_count(MpegEncContext *s)
1316 {
1317     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1318     AVCodecContext *c = avcodec_alloc_context3(NULL);
1319     const int scale = s->avctx->brd_scale;
1320     int i, j, out_size, p_lambda, b_lambda, lambda2;
1321     int64_t best_rd  = INT64_MAX;
1322     int best_b_count = -1;
1323
1324     if (!c)
1325         return AVERROR(ENOMEM);
1326     av_assert0(scale >= 0 && scale <= 3);
1327
1328     //emms_c();
1329     //s->next_picture_ptr->quality;
1330     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1331     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1332     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1333     if (!b_lambda) // FIXME we should do this somewhere else
1334         b_lambda = p_lambda;
1335     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1336                FF_LAMBDA_SHIFT;
1337
1338     c->width        = s->width  >> scale;
1339     c->height       = s->height >> scale;
1340     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1341     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1342     c->mb_decision  = s->avctx->mb_decision;
1343     c->me_cmp       = s->avctx->me_cmp;
1344     c->mb_cmp       = s->avctx->mb_cmp;
1345     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1346     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1347     c->time_base    = s->avctx->time_base;
1348     c->max_b_frames = s->max_b_frames;
1349
1350     if (avcodec_open2(c, codec, NULL) < 0)
1351         return -1;
1352
1353     for (i = 0; i < s->max_b_frames + 2; i++) {
1354         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1355                                                 s->next_picture_ptr;
1356         uint8_t *data[4];
1357
1358         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1359             pre_input = *pre_input_ptr;
1360             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1361
1362             if (!pre_input.shared && i) {
1363                 data[0] += INPLACE_OFFSET;
1364                 data[1] += INPLACE_OFFSET;
1365                 data[2] += INPLACE_OFFSET;
1366             }
1367
1368             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1369                                        s->tmp_frames[i]->linesize[0],
1370                                        data[0],
1371                                        pre_input.f->linesize[0],
1372                                        c->width, c->height);
1373             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1374                                        s->tmp_frames[i]->linesize[1],
1375                                        data[1],
1376                                        pre_input.f->linesize[1],
1377                                        c->width >> 1, c->height >> 1);
1378             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1379                                        s->tmp_frames[i]->linesize[2],
1380                                        data[2],
1381                                        pre_input.f->linesize[2],
1382                                        c->width >> 1, c->height >> 1);
1383         }
1384     }
1385
1386     for (j = 0; j < s->max_b_frames + 1; j++) {
1387         int64_t rd = 0;
1388
1389         if (!s->input_picture[j])
1390             break;
1391
1392         c->error[0] = c->error[1] = c->error[2] = 0;
1393
1394         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1395         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1396
1397         out_size = encode_frame(c, s->tmp_frames[0]);
1398
1399         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1400
1401         for (i = 0; i < s->max_b_frames + 1; i++) {
1402             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1403
1404             s->tmp_frames[i + 1]->pict_type = is_p ?
1405                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1406             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1407
1408             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1409
1410             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1411         }
1412
1413         /* get the delayed frames */
1414         while (out_size) {
1415             out_size = encode_frame(c, NULL);
1416             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1417         }
1418
1419         rd += c->error[0] + c->error[1] + c->error[2];
1420
1421         if (rd < best_rd) {
1422             best_rd = rd;
1423             best_b_count = j;
1424         }
1425     }
1426
1427     avcodec_close(c);
1428     av_freep(&c);
1429
1430     return best_b_count;
1431 }
1432
1433 static int select_input_picture(MpegEncContext *s)
1434 {
1435     int i, ret;
1436
1437     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1438         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1439     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1440
1441     /* set next picture type & ordering */
1442     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1443         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1444             if (s->picture_in_gop_number < s->gop_size &&
1445                 s->next_picture_ptr &&
1446                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1447                 // FIXME check that te gop check above is +-1 correct
1448                 av_frame_unref(s->input_picture[0]->f);
1449
1450                 ff_vbv_update(s, 0);
1451
1452                 goto no_output_pic;
1453             }
1454         }
1455
1456         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1457             !s->next_picture_ptr || s->intra_only) {
1458             s->reordered_input_picture[0] = s->input_picture[0];
1459             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1460             s->reordered_input_picture[0]->f->coded_picture_number =
1461                 s->coded_picture_number++;
1462         } else {
1463             int b_frames;
1464
1465             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1466                 for (i = 0; i < s->max_b_frames + 1; i++) {
1467                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1468
1469                     if (pict_num >= s->rc_context.num_entries)
1470                         break;
1471                     if (!s->input_picture[i]) {
1472                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1473                         break;
1474                     }
1475
1476                     s->input_picture[i]->f->pict_type =
1477                         s->rc_context.entry[pict_num].new_pict_type;
1478                 }
1479             }
1480
1481             if (s->avctx->b_frame_strategy == 0) {
1482                 b_frames = s->max_b_frames;
1483                 while (b_frames && !s->input_picture[b_frames])
1484                     b_frames--;
1485             } else if (s->avctx->b_frame_strategy == 1) {
1486                 for (i = 1; i < s->max_b_frames + 1; i++) {
1487                     if (s->input_picture[i] &&
1488                         s->input_picture[i]->b_frame_score == 0) {
1489                         s->input_picture[i]->b_frame_score =
1490                             get_intra_count(s,
1491                                             s->input_picture[i    ]->f->data[0],
1492                                             s->input_picture[i - 1]->f->data[0],
1493                                             s->linesize) + 1;
1494                     }
1495                 }
1496                 for (i = 0; i < s->max_b_frames + 1; i++) {
1497                     if (!s->input_picture[i] ||
1498                         s->input_picture[i]->b_frame_score - 1 >
1499                             s->mb_num / s->avctx->b_sensitivity)
1500                         break;
1501                 }
1502
1503                 b_frames = FFMAX(0, i - 1);
1504
1505                 /* reset scores */
1506                 for (i = 0; i < b_frames + 1; i++) {
1507                     s->input_picture[i]->b_frame_score = 0;
1508                 }
1509             } else if (s->avctx->b_frame_strategy == 2) {
1510                 b_frames = estimate_best_b_count(s);
1511             } else {
1512                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1513                 b_frames = 0;
1514             }
1515
1516             emms_c();
1517
1518             for (i = b_frames - 1; i >= 0; i--) {
1519                 int type = s->input_picture[i]->f->pict_type;
1520                 if (type && type != AV_PICTURE_TYPE_B)
1521                     b_frames = i;
1522             }
1523             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1524                 b_frames == s->max_b_frames) {
1525                 av_log(s->avctx, AV_LOG_ERROR,
1526                        "warning, too many b frames in a row\n");
1527             }
1528
1529             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1530                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1531                     s->gop_size > s->picture_in_gop_number) {
1532                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1533                 } else {
1534                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1535                         b_frames = 0;
1536                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1537                 }
1538             }
1539
1540             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1541                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1542                 b_frames--;
1543
1544             s->reordered_input_picture[0] = s->input_picture[b_frames];
1545             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1546                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1547             s->reordered_input_picture[0]->f->coded_picture_number =
1548                 s->coded_picture_number++;
1549             for (i = 0; i < b_frames; i++) {
1550                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1551                 s->reordered_input_picture[i + 1]->f->pict_type =
1552                     AV_PICTURE_TYPE_B;
1553                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1554                     s->coded_picture_number++;
1555             }
1556         }
1557     }
1558 no_output_pic:
1559     if (s->reordered_input_picture[0]) {
1560         s->reordered_input_picture[0]->reference =
1561            s->reordered_input_picture[0]->f->pict_type !=
1562                AV_PICTURE_TYPE_B ? 3 : 0;
1563
1564         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1565         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1566             return ret;
1567
1568         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1569             // input is a shared pix, so we can't modifiy it -> alloc a new
1570             // one & ensure that the shared one is reuseable
1571
1572             Picture *pic;
1573             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1574             if (i < 0)
1575                 return i;
1576             pic = &s->picture[i];
1577
1578             pic->reference = s->reordered_input_picture[0]->reference;
1579             if (alloc_picture(s, pic, 0) < 0) {
1580                 return -1;
1581             }
1582
1583             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1584             if (ret < 0)
1585                 return ret;
1586
1587             /* mark us unused / free shared pic */
1588             av_frame_unref(s->reordered_input_picture[0]->f);
1589             s->reordered_input_picture[0]->shared = 0;
1590
1591             s->current_picture_ptr = pic;
1592         } else {
1593             // input is not a shared pix -> reuse buffer for current_pix
1594             s->current_picture_ptr = s->reordered_input_picture[0];
1595             for (i = 0; i < 4; i++) {
1596                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1597             }
1598         }
1599         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1600         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1601                                        s->current_picture_ptr)) < 0)
1602             return ret;
1603
1604         s->picture_number = s->new_picture.f->display_picture_number;
1605     } else {
1606         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1607     }
1608     return 0;
1609 }
1610
1611 static void frame_end(MpegEncContext *s)
1612 {
1613     if (s->unrestricted_mv &&
1614         s->current_picture.reference &&
1615         !s->intra_only) {
1616         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1617         int hshift = desc->log2_chroma_w;
1618         int vshift = desc->log2_chroma_h;
1619         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1620                                 s->current_picture.f->linesize[0],
1621                                 s->h_edge_pos, s->v_edge_pos,
1622                                 EDGE_WIDTH, EDGE_WIDTH,
1623                                 EDGE_TOP | EDGE_BOTTOM);
1624         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1625                                 s->current_picture.f->linesize[1],
1626                                 s->h_edge_pos >> hshift,
1627                                 s->v_edge_pos >> vshift,
1628                                 EDGE_WIDTH >> hshift,
1629                                 EDGE_WIDTH >> vshift,
1630                                 EDGE_TOP | EDGE_BOTTOM);
1631         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1632                                 s->current_picture.f->linesize[2],
1633                                 s->h_edge_pos >> hshift,
1634                                 s->v_edge_pos >> vshift,
1635                                 EDGE_WIDTH >> hshift,
1636                                 EDGE_WIDTH >> vshift,
1637                                 EDGE_TOP | EDGE_BOTTOM);
1638     }
1639
1640     emms_c();
1641
1642     s->last_pict_type                 = s->pict_type;
1643     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1644     if (s->pict_type!= AV_PICTURE_TYPE_B)
1645         s->last_non_b_pict_type = s->pict_type;
1646
1647 #if FF_API_CODED_FRAME
1648 FF_DISABLE_DEPRECATION_WARNINGS
1649     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1650 FF_ENABLE_DEPRECATION_WARNINGS
1651 #endif
1652 }
1653
1654 static void update_noise_reduction(MpegEncContext *s)
1655 {
1656     int intra, i;
1657
1658     for (intra = 0; intra < 2; intra++) {
1659         if (s->dct_count[intra] > (1 << 16)) {
1660             for (i = 0; i < 64; i++) {
1661                 s->dct_error_sum[intra][i] >>= 1;
1662             }
1663             s->dct_count[intra] >>= 1;
1664         }
1665
1666         for (i = 0; i < 64; i++) {
1667             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1668                                        s->dct_count[intra] +
1669                                        s->dct_error_sum[intra][i] / 2) /
1670                                       (s->dct_error_sum[intra][i] + 1);
1671         }
1672     }
1673 }
1674
1675 static int frame_start(MpegEncContext *s)
1676 {
1677     int ret;
1678
1679     /* mark & release old frames */
1680     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1681         s->last_picture_ptr != s->next_picture_ptr &&
1682         s->last_picture_ptr->f->buf[0]) {
1683         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1684     }
1685
1686     s->current_picture_ptr->f->pict_type = s->pict_type;
1687     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1688
1689     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1690     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1691                                    s->current_picture_ptr)) < 0)
1692         return ret;
1693
1694     if (s->pict_type != AV_PICTURE_TYPE_B) {
1695         s->last_picture_ptr = s->next_picture_ptr;
1696         if (!s->droppable)
1697             s->next_picture_ptr = s->current_picture_ptr;
1698     }
1699
1700     if (s->last_picture_ptr) {
1701         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1702         if (s->last_picture_ptr->f->buf[0] &&
1703             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1704                                        s->last_picture_ptr)) < 0)
1705             return ret;
1706     }
1707     if (s->next_picture_ptr) {
1708         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1709         if (s->next_picture_ptr->f->buf[0] &&
1710             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1711                                        s->next_picture_ptr)) < 0)
1712             return ret;
1713     }
1714
1715     if (s->picture_structure!= PICT_FRAME) {
1716         int i;
1717         for (i = 0; i < 4; i++) {
1718             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1719                 s->current_picture.f->data[i] +=
1720                     s->current_picture.f->linesize[i];
1721             }
1722             s->current_picture.f->linesize[i] *= 2;
1723             s->last_picture.f->linesize[i]    *= 2;
1724             s->next_picture.f->linesize[i]    *= 2;
1725         }
1726     }
1727
1728     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1729         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1730         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1731     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1732         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1733         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1734     } else {
1735         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1736         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1737     }
1738
1739     if (s->dct_error_sum) {
1740         av_assert2(s->avctx->noise_reduction && s->encoding);
1741         update_noise_reduction(s);
1742     }
1743
1744     return 0;
1745 }
1746
1747 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1748                           const AVFrame *pic_arg, int *got_packet)
1749 {
1750     MpegEncContext *s = avctx->priv_data;
1751     int i, stuffing_count, ret;
1752     int context_count = s->slice_context_count;
1753
1754     s->vbv_ignore_qmax = 0;
1755
1756     s->picture_in_gop_number++;
1757
1758     if (load_input_picture(s, pic_arg) < 0)
1759         return -1;
1760
1761     if (select_input_picture(s) < 0) {
1762         return -1;
1763     }
1764
1765     /* output? */
1766     if (s->new_picture.f->data[0]) {
1767         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1768         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1769                                               :
1770                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1771         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1772             return ret;
1773         if (s->mb_info) {
1774             s->mb_info_ptr = av_packet_new_side_data(pkt,
1775                                  AV_PKT_DATA_H263_MB_INFO,
1776                                  s->mb_width*s->mb_height*12);
1777             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1778         }
1779
1780         for (i = 0; i < context_count; i++) {
1781             int start_y = s->thread_context[i]->start_mb_y;
1782             int   end_y = s->thread_context[i]->  end_mb_y;
1783             int h       = s->mb_height;
1784             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1785             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1786
1787             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1788         }
1789
1790         s->pict_type = s->new_picture.f->pict_type;
1791         //emms_c();
1792         ret = frame_start(s);
1793         if (ret < 0)
1794             return ret;
1795 vbv_retry:
1796         ret = encode_picture(s, s->picture_number);
1797         if (growing_buffer) {
1798             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1799             pkt->data = s->pb.buf;
1800             pkt->size = avctx->internal->byte_buffer_size;
1801         }
1802         if (ret < 0)
1803             return -1;
1804
1805         avctx->header_bits = s->header_bits;
1806         avctx->mv_bits     = s->mv_bits;
1807         avctx->misc_bits   = s->misc_bits;
1808         avctx->i_tex_bits  = s->i_tex_bits;
1809         avctx->p_tex_bits  = s->p_tex_bits;
1810         avctx->i_count     = s->i_count;
1811         // FIXME f/b_count in avctx
1812         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1813         avctx->skip_count  = s->skip_count;
1814
1815         frame_end(s);
1816
1817         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1818             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1819
1820         if (avctx->rc_buffer_size) {
1821             RateControlContext *rcc = &s->rc_context;
1822             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1823
1824             if (put_bits_count(&s->pb) > max_size &&
1825                 s->lambda < s->lmax) {
1826                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1827                                        (s->qscale + 1) / s->qscale);
1828                 if (s->adaptive_quant) {
1829                     int i;
1830                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1831                         s->lambda_table[i] =
1832                             FFMAX(s->lambda_table[i] + 1,
1833                                   s->lambda_table[i] * (s->qscale + 1) /
1834                                   s->qscale);
1835                 }
1836                 s->mb_skipped = 0;        // done in frame_start()
1837                 // done in encode_picture() so we must undo it
1838                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1839                     if (s->flipflop_rounding          ||
1840                         s->codec_id == AV_CODEC_ID_H263P ||
1841                         s->codec_id == AV_CODEC_ID_MPEG4)
1842                         s->no_rounding ^= 1;
1843                 }
1844                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1845                     s->time_base       = s->last_time_base;
1846                     s->last_non_b_time = s->time - s->pp_time;
1847                 }
1848                 for (i = 0; i < context_count; i++) {
1849                     PutBitContext *pb = &s->thread_context[i]->pb;
1850                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1851                 }
1852                 s->vbv_ignore_qmax = 1;
1853                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1854                 goto vbv_retry;
1855             }
1856
1857             av_assert0(s->avctx->rc_max_rate);
1858         }
1859
1860         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1861             ff_write_pass1_stats(s);
1862
1863         for (i = 0; i < 4; i++) {
1864             s->current_picture_ptr->f->error[i] =
1865             s->current_picture.f->error[i] =
1866                 s->current_picture.error[i];
1867             avctx->error[i] += s->current_picture_ptr->f->error[i];
1868         }
1869         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1870                                        s->current_picture_ptr->f->error,
1871                                        (s->avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1872                                        s->pict_type);
1873
1874         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1875             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1876                    avctx->i_tex_bits + avctx->p_tex_bits ==
1877                        put_bits_count(&s->pb));
1878         flush_put_bits(&s->pb);
1879         s->frame_bits  = put_bits_count(&s->pb);
1880
1881         stuffing_count = ff_vbv_update(s, s->frame_bits);
1882         s->stuffing_bits = 8*stuffing_count;
1883         if (stuffing_count) {
1884             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1885                     stuffing_count + 50) {
1886                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1887                 return -1;
1888             }
1889
1890             switch (s->codec_id) {
1891             case AV_CODEC_ID_MPEG1VIDEO:
1892             case AV_CODEC_ID_MPEG2VIDEO:
1893                 while (stuffing_count--) {
1894                     put_bits(&s->pb, 8, 0);
1895                 }
1896             break;
1897             case AV_CODEC_ID_MPEG4:
1898                 put_bits(&s->pb, 16, 0);
1899                 put_bits(&s->pb, 16, 0x1C3);
1900                 stuffing_count -= 4;
1901                 while (stuffing_count--) {
1902                     put_bits(&s->pb, 8, 0xFF);
1903                 }
1904             break;
1905             default:
1906                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1907             }
1908             flush_put_bits(&s->pb);
1909             s->frame_bits  = put_bits_count(&s->pb);
1910         }
1911
1912         /* update mpeg1/2 vbv_delay for CBR */
1913         if (s->avctx->rc_max_rate                          &&
1914             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1915             s->out_format == FMT_MPEG1                     &&
1916             90000LL * (avctx->rc_buffer_size - 1) <=
1917                 s->avctx->rc_max_rate * 0xFFFFLL) {
1918             int vbv_delay, min_delay;
1919             double inbits  = s->avctx->rc_max_rate *
1920                              av_q2d(s->avctx->time_base);
1921             int    minbits = s->frame_bits - 8 *
1922                              (s->vbv_delay_ptr - s->pb.buf - 1);
1923             double bits    = s->rc_context.buffer_index + minbits - inbits;
1924
1925             if (bits < 0)
1926                 av_log(s->avctx, AV_LOG_ERROR,
1927                        "Internal error, negative bits\n");
1928
1929             assert(s->repeat_first_field == 0);
1930
1931             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1932             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1933                         s->avctx->rc_max_rate;
1934
1935             vbv_delay = FFMAX(vbv_delay, min_delay);
1936
1937             av_assert0(vbv_delay < 0xFFFF);
1938
1939             s->vbv_delay_ptr[0] &= 0xF8;
1940             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1941             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1942             s->vbv_delay_ptr[2] &= 0x07;
1943             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1944             avctx->vbv_delay     = vbv_delay * 300;
1945         }
1946         s->total_bits     += s->frame_bits;
1947         avctx->frame_bits  = s->frame_bits;
1948
1949         pkt->pts = s->current_picture.f->pts;
1950         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1951             if (!s->current_picture.f->coded_picture_number)
1952                 pkt->dts = pkt->pts - s->dts_delta;
1953             else
1954                 pkt->dts = s->reordered_pts;
1955             s->reordered_pts = pkt->pts;
1956         } else
1957             pkt->dts = pkt->pts;
1958         if (s->current_picture.f->key_frame)
1959             pkt->flags |= AV_PKT_FLAG_KEY;
1960         if (s->mb_info)
1961             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1962     } else {
1963         s->frame_bits = 0;
1964     }
1965
1966     /* release non-reference frames */
1967     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1968         if (!s->picture[i].reference)
1969             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1970     }
1971
1972     av_assert1((s->frame_bits & 7) == 0);
1973
1974     pkt->size = s->frame_bits / 8;
1975     *got_packet = !!pkt->size;
1976     return 0;
1977 }
1978
1979 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1980                                                 int n, int threshold)
1981 {
1982     static const char tab[64] = {
1983         3, 2, 2, 1, 1, 1, 1, 1,
1984         1, 1, 1, 1, 1, 1, 1, 1,
1985         1, 1, 1, 1, 1, 1, 1, 1,
1986         0, 0, 0, 0, 0, 0, 0, 0,
1987         0, 0, 0, 0, 0, 0, 0, 0,
1988         0, 0, 0, 0, 0, 0, 0, 0,
1989         0, 0, 0, 0, 0, 0, 0, 0,
1990         0, 0, 0, 0, 0, 0, 0, 0
1991     };
1992     int score = 0;
1993     int run = 0;
1994     int i;
1995     int16_t *block = s->block[n];
1996     const int last_index = s->block_last_index[n];
1997     int skip_dc;
1998
1999     if (threshold < 0) {
2000         skip_dc = 0;
2001         threshold = -threshold;
2002     } else
2003         skip_dc = 1;
2004
2005     /* Are all we could set to zero already zero? */
2006     if (last_index <= skip_dc - 1)
2007         return;
2008
2009     for (i = 0; i <= last_index; i++) {
2010         const int j = s->intra_scantable.permutated[i];
2011         const int level = FFABS(block[j]);
2012         if (level == 1) {
2013             if (skip_dc && i == 0)
2014                 continue;
2015             score += tab[run];
2016             run = 0;
2017         } else if (level > 1) {
2018             return;
2019         } else {
2020             run++;
2021         }
2022     }
2023     if (score >= threshold)
2024         return;
2025     for (i = skip_dc; i <= last_index; i++) {
2026         const int j = s->intra_scantable.permutated[i];
2027         block[j] = 0;
2028     }
2029     if (block[0])
2030         s->block_last_index[n] = 0;
2031     else
2032         s->block_last_index[n] = -1;
2033 }
2034
2035 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2036                                int last_index)
2037 {
2038     int i;
2039     const int maxlevel = s->max_qcoeff;
2040     const int minlevel = s->min_qcoeff;
2041     int overflow = 0;
2042
2043     if (s->mb_intra) {
2044         i = 1; // skip clipping of intra dc
2045     } else
2046         i = 0;
2047
2048     for (; i <= last_index; i++) {
2049         const int j = s->intra_scantable.permutated[i];
2050         int level = block[j];
2051
2052         if (level > maxlevel) {
2053             level = maxlevel;
2054             overflow++;
2055         } else if (level < minlevel) {
2056             level = minlevel;
2057             overflow++;
2058         }
2059
2060         block[j] = level;
2061     }
2062
2063     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2064         av_log(s->avctx, AV_LOG_INFO,
2065                "warning, clipping %d dct coefficients to %d..%d\n",
2066                overflow, minlevel, maxlevel);
2067 }
2068
2069 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2070 {
2071     int x, y;
2072     // FIXME optimize
2073     for (y = 0; y < 8; y++) {
2074         for (x = 0; x < 8; x++) {
2075             int x2, y2;
2076             int sum = 0;
2077             int sqr = 0;
2078             int count = 0;
2079
2080             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2081                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2082                     int v = ptr[x2 + y2 * stride];
2083                     sum += v;
2084                     sqr += v * v;
2085                     count++;
2086                 }
2087             }
2088             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2089         }
2090     }
2091 }
2092
2093 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2094                                                 int motion_x, int motion_y,
2095                                                 int mb_block_height,
2096                                                 int mb_block_width,
2097                                                 int mb_block_count)
2098 {
2099     int16_t weight[12][64];
2100     int16_t orig[12][64];
2101     const int mb_x = s->mb_x;
2102     const int mb_y = s->mb_y;
2103     int i;
2104     int skip_dct[12];
2105     int dct_offset = s->linesize * 8; // default for progressive frames
2106     int uv_dct_offset = s->uvlinesize * 8;
2107     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2108     ptrdiff_t wrap_y, wrap_c;
2109
2110     for (i = 0; i < mb_block_count; i++)
2111         skip_dct[i] = s->skipdct;
2112
2113     if (s->adaptive_quant) {
2114         const int last_qp = s->qscale;
2115         const int mb_xy = mb_x + mb_y * s->mb_stride;
2116
2117         s->lambda = s->lambda_table[mb_xy];
2118         update_qscale(s);
2119
2120         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2121             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2122             s->dquant = s->qscale - last_qp;
2123
2124             if (s->out_format == FMT_H263) {
2125                 s->dquant = av_clip(s->dquant, -2, 2);
2126
2127                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2128                     if (!s->mb_intra) {
2129                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2130                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2131                                 s->dquant = 0;
2132                         }
2133                         if (s->mv_type == MV_TYPE_8X8)
2134                             s->dquant = 0;
2135                     }
2136                 }
2137             }
2138         }
2139         ff_set_qscale(s, last_qp + s->dquant);
2140     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2141         ff_set_qscale(s, s->qscale + s->dquant);
2142
2143     wrap_y = s->linesize;
2144     wrap_c = s->uvlinesize;
2145     ptr_y  = s->new_picture.f->data[0] +
2146              (mb_y * 16 * wrap_y)              + mb_x * 16;
2147     ptr_cb = s->new_picture.f->data[1] +
2148              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2149     ptr_cr = s->new_picture.f->data[2] +
2150              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2151
2152     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2153         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2154         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2155         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2156         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2157                                  wrap_y, wrap_y,
2158                                  16, 16, mb_x * 16, mb_y * 16,
2159                                  s->width, s->height);
2160         ptr_y = ebuf;
2161         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2162                                  wrap_c, wrap_c,
2163                                  mb_block_width, mb_block_height,
2164                                  mb_x * mb_block_width, mb_y * mb_block_height,
2165                                  cw, ch);
2166         ptr_cb = ebuf + 16 * wrap_y;
2167         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2168                                  wrap_c, wrap_c,
2169                                  mb_block_width, mb_block_height,
2170                                  mb_x * mb_block_width, mb_y * mb_block_height,
2171                                  cw, ch);
2172         ptr_cr = ebuf + 16 * wrap_y + 16;
2173     }
2174
2175     if (s->mb_intra) {
2176         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2177             int progressive_score, interlaced_score;
2178
2179             s->interlaced_dct = 0;
2180             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2181                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2182                                                      NULL, wrap_y, 8) - 400;
2183
2184             if (progressive_score > 0) {
2185                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2186                                                         NULL, wrap_y * 2, 8) +
2187                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2188                                                         NULL, wrap_y * 2, 8);
2189                 if (progressive_score > interlaced_score) {
2190                     s->interlaced_dct = 1;
2191
2192                     dct_offset = wrap_y;
2193                     uv_dct_offset = wrap_c;
2194                     wrap_y <<= 1;
2195                     if (s->chroma_format == CHROMA_422 ||
2196                         s->chroma_format == CHROMA_444)
2197                         wrap_c <<= 1;
2198                 }
2199             }
2200         }
2201
2202         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2203         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2204         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2205         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2206
2207         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2208             skip_dct[4] = 1;
2209             skip_dct[5] = 1;
2210         } else {
2211             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2212             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2213             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2214                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2215                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2216             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2217                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2218                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2219                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2220                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2221                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2222                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2223             }
2224         }
2225     } else {
2226         op_pixels_func (*op_pix)[4];
2227         qpel_mc_func (*op_qpix)[16];
2228         uint8_t *dest_y, *dest_cb, *dest_cr;
2229
2230         dest_y  = s->dest[0];
2231         dest_cb = s->dest[1];
2232         dest_cr = s->dest[2];
2233
2234         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2235             op_pix  = s->hdsp.put_pixels_tab;
2236             op_qpix = s->qdsp.put_qpel_pixels_tab;
2237         } else {
2238             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2239             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2240         }
2241
2242         if (s->mv_dir & MV_DIR_FORWARD) {
2243             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2244                           s->last_picture.f->data,
2245                           op_pix, op_qpix);
2246             op_pix  = s->hdsp.avg_pixels_tab;
2247             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2248         }
2249         if (s->mv_dir & MV_DIR_BACKWARD) {
2250             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2251                           s->next_picture.f->data,
2252                           op_pix, op_qpix);
2253         }
2254
2255         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2256             int progressive_score, interlaced_score;
2257
2258             s->interlaced_dct = 0;
2259             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2260                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2261                                                      ptr_y + wrap_y * 8,
2262                                                      wrap_y, 8) - 400;
2263
2264             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2265                 progressive_score -= 400;
2266
2267             if (progressive_score > 0) {
2268                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2269                                                         wrap_y * 2, 8) +
2270                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2271                                                         ptr_y + wrap_y,
2272                                                         wrap_y * 2, 8);
2273
2274                 if (progressive_score > interlaced_score) {
2275                     s->interlaced_dct = 1;
2276
2277                     dct_offset = wrap_y;
2278                     uv_dct_offset = wrap_c;
2279                     wrap_y <<= 1;
2280                     if (s->chroma_format == CHROMA_422)
2281                         wrap_c <<= 1;
2282                 }
2283             }
2284         }
2285
2286         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2287         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2288         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2289                             dest_y + dct_offset, wrap_y);
2290         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2291                             dest_y + dct_offset + 8, wrap_y);
2292
2293         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2294             skip_dct[4] = 1;
2295             skip_dct[5] = 1;
2296         } else {
2297             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2298             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2299             if (!s->chroma_y_shift) { /* 422 */
2300                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2301                                     dest_cb + uv_dct_offset, wrap_c);
2302                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2303                                     dest_cr + uv_dct_offset, wrap_c);
2304             }
2305         }
2306         /* pre quantization */
2307         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2308                 2 * s->qscale * s->qscale) {
2309             // FIXME optimize
2310             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2311                 skip_dct[0] = 1;
2312             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2313                 skip_dct[1] = 1;
2314             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2315                                wrap_y, 8) < 20 * s->qscale)
2316                 skip_dct[2] = 1;
2317             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2318                                wrap_y, 8) < 20 * s->qscale)
2319                 skip_dct[3] = 1;
2320             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2321                 skip_dct[4] = 1;
2322             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2323                 skip_dct[5] = 1;
2324             if (!s->chroma_y_shift) { /* 422 */
2325                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2326                                    dest_cb + uv_dct_offset,
2327                                    wrap_c, 8) < 20 * s->qscale)
2328                     skip_dct[6] = 1;
2329                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2330                                    dest_cr + uv_dct_offset,
2331                                    wrap_c, 8) < 20 * s->qscale)
2332                     skip_dct[7] = 1;
2333             }
2334         }
2335     }
2336
2337     if (s->quantizer_noise_shaping) {
2338         if (!skip_dct[0])
2339             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2340         if (!skip_dct[1])
2341             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2342         if (!skip_dct[2])
2343             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2344         if (!skip_dct[3])
2345             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2346         if (!skip_dct[4])
2347             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2348         if (!skip_dct[5])
2349             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2350         if (!s->chroma_y_shift) { /* 422 */
2351             if (!skip_dct[6])
2352                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2353                                   wrap_c);
2354             if (!skip_dct[7])
2355                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2356                                   wrap_c);
2357         }
2358         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2359     }
2360
2361     /* DCT & quantize */
2362     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2363     {
2364         for (i = 0; i < mb_block_count; i++) {
2365             if (!skip_dct[i]) {
2366                 int overflow;
2367                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2368                 // FIXME we could decide to change to quantizer instead of
2369                 // clipping
2370                 // JS: I don't think that would be a good idea it could lower
2371                 //     quality instead of improve it. Just INTRADC clipping
2372                 //     deserves changes in quantizer
2373                 if (overflow)
2374                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2375             } else
2376                 s->block_last_index[i] = -1;
2377         }
2378         if (s->quantizer_noise_shaping) {
2379             for (i = 0; i < mb_block_count; i++) {
2380                 if (!skip_dct[i]) {
2381                     s->block_last_index[i] =
2382                         dct_quantize_refine(s, s->block[i], weight[i],
2383                                             orig[i], i, s->qscale);
2384                 }
2385             }
2386         }
2387
2388         if (s->luma_elim_threshold && !s->mb_intra)
2389             for (i = 0; i < 4; i++)
2390                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2391         if (s->chroma_elim_threshold && !s->mb_intra)
2392             for (i = 4; i < mb_block_count; i++)
2393                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2394
2395         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2396             for (i = 0; i < mb_block_count; i++) {
2397                 if (s->block_last_index[i] == -1)
2398                     s->coded_score[i] = INT_MAX / 256;
2399             }
2400         }
2401     }
2402
2403     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2404         s->block_last_index[4] =
2405         s->block_last_index[5] = 0;
2406         s->block[4][0] =
2407         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2408         if (!s->chroma_y_shift) { /* 422 / 444 */
2409             for (i=6; i<12; i++) {
2410                 s->block_last_index[i] = 0;
2411                 s->block[i][0] = s->block[4][0];
2412             }
2413         }
2414     }
2415
2416     // non c quantize code returns incorrect block_last_index FIXME
2417     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2418         for (i = 0; i < mb_block_count; i++) {
2419             int j;
2420             if (s->block_last_index[i] > 0) {
2421                 for (j = 63; j > 0; j--) {
2422                     if (s->block[i][s->intra_scantable.permutated[j]])
2423                         break;
2424                 }
2425                 s->block_last_index[i] = j;
2426             }
2427         }
2428     }
2429
2430     /* huffman encode */
2431     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2432     case AV_CODEC_ID_MPEG1VIDEO:
2433     case AV_CODEC_ID_MPEG2VIDEO:
2434         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2435             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2436         break;
2437     case AV_CODEC_ID_MPEG4:
2438         if (CONFIG_MPEG4_ENCODER)
2439             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2440         break;
2441     case AV_CODEC_ID_MSMPEG4V2:
2442     case AV_CODEC_ID_MSMPEG4V3:
2443     case AV_CODEC_ID_WMV1:
2444         if (CONFIG_MSMPEG4_ENCODER)
2445             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2446         break;
2447     case AV_CODEC_ID_WMV2:
2448         if (CONFIG_WMV2_ENCODER)
2449             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2450         break;
2451     case AV_CODEC_ID_H261:
2452         if (CONFIG_H261_ENCODER)
2453             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2454         break;
2455     case AV_CODEC_ID_H263:
2456     case AV_CODEC_ID_H263P:
2457     case AV_CODEC_ID_FLV1:
2458     case AV_CODEC_ID_RV10:
2459     case AV_CODEC_ID_RV20:
2460         if (CONFIG_H263_ENCODER)
2461             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2462         break;
2463     case AV_CODEC_ID_MJPEG:
2464     case AV_CODEC_ID_AMV:
2465         if (CONFIG_MJPEG_ENCODER)
2466             ff_mjpeg_encode_mb(s, s->block);
2467         break;
2468     default:
2469         av_assert1(0);
2470     }
2471 }
2472
2473 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2474 {
2475     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2476     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2477     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2478 }
2479
2480 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2481     int i;
2482
2483     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2484
2485     /* mpeg1 */
2486     d->mb_skip_run= s->mb_skip_run;
2487     for(i=0; i<3; i++)
2488         d->last_dc[i] = s->last_dc[i];
2489
2490     /* statistics */
2491     d->mv_bits= s->mv_bits;
2492     d->i_tex_bits= s->i_tex_bits;
2493     d->p_tex_bits= s->p_tex_bits;
2494     d->i_count= s->i_count;
2495     d->f_count= s->f_count;
2496     d->b_count= s->b_count;
2497     d->skip_count= s->skip_count;
2498     d->misc_bits= s->misc_bits;
2499     d->last_bits= 0;
2500
2501     d->mb_skipped= 0;
2502     d->qscale= s->qscale;
2503     d->dquant= s->dquant;
2504
2505     d->esc3_level_length= s->esc3_level_length;
2506 }
2507
2508 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2509     int i;
2510
2511     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2512     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2513
2514     /* mpeg1 */
2515     d->mb_skip_run= s->mb_skip_run;
2516     for(i=0; i<3; i++)
2517         d->last_dc[i] = s->last_dc[i];
2518
2519     /* statistics */
2520     d->mv_bits= s->mv_bits;
2521     d->i_tex_bits= s->i_tex_bits;
2522     d->p_tex_bits= s->p_tex_bits;
2523     d->i_count= s->i_count;
2524     d->f_count= s->f_count;
2525     d->b_count= s->b_count;
2526     d->skip_count= s->skip_count;
2527     d->misc_bits= s->misc_bits;
2528
2529     d->mb_intra= s->mb_intra;
2530     d->mb_skipped= s->mb_skipped;
2531     d->mv_type= s->mv_type;
2532     d->mv_dir= s->mv_dir;
2533     d->pb= s->pb;
2534     if(s->data_partitioning){
2535         d->pb2= s->pb2;
2536         d->tex_pb= s->tex_pb;
2537     }
2538     d->block= s->block;
2539     for(i=0; i<8; i++)
2540         d->block_last_index[i]= s->block_last_index[i];
2541     d->interlaced_dct= s->interlaced_dct;
2542     d->qscale= s->qscale;
2543
2544     d->esc3_level_length= s->esc3_level_length;
2545 }
2546
2547 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2548                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2549                            int *dmin, int *next_block, int motion_x, int motion_y)
2550 {
2551     int score;
2552     uint8_t *dest_backup[3];
2553
2554     copy_context_before_encode(s, backup, type);
2555
2556     s->block= s->blocks[*next_block];
2557     s->pb= pb[*next_block];
2558     if(s->data_partitioning){
2559         s->pb2   = pb2   [*next_block];
2560         s->tex_pb= tex_pb[*next_block];
2561     }
2562
2563     if(*next_block){
2564         memcpy(dest_backup, s->dest, sizeof(s->dest));
2565         s->dest[0] = s->sc.rd_scratchpad;
2566         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2567         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2568         av_assert0(s->linesize >= 32); //FIXME
2569     }
2570
2571     encode_mb(s, motion_x, motion_y);
2572
2573     score= put_bits_count(&s->pb);
2574     if(s->data_partitioning){
2575         score+= put_bits_count(&s->pb2);
2576         score+= put_bits_count(&s->tex_pb);
2577     }
2578
2579     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2580         ff_mpv_decode_mb(s, s->block);
2581
2582         score *= s->lambda2;
2583         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2584     }
2585
2586     if(*next_block){
2587         memcpy(s->dest, dest_backup, sizeof(s->dest));
2588     }
2589
2590     if(score<*dmin){
2591         *dmin= score;
2592         *next_block^=1;
2593
2594         copy_context_after_encode(best, s, type);
2595     }
2596 }
2597
2598 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2599     uint32_t *sq = ff_square_tab + 256;
2600     int acc=0;
2601     int x,y;
2602
2603     if(w==16 && h==16)
2604         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2605     else if(w==8 && h==8)
2606         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2607
2608     for(y=0; y<h; y++){
2609         for(x=0; x<w; x++){
2610             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2611         }
2612     }
2613
2614     av_assert2(acc>=0);
2615
2616     return acc;
2617 }
2618
2619 static int sse_mb(MpegEncContext *s){
2620     int w= 16;
2621     int h= 16;
2622
2623     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2624     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2625
2626     if(w==16 && h==16)
2627       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2628         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2629                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2630                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2631       }else{
2632         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2633                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2634                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2635       }
2636     else
2637         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2638                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2639                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2640 }
2641
2642 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2643     MpegEncContext *s= *(void**)arg;
2644
2645
2646     s->me.pre_pass=1;
2647     s->me.dia_size= s->avctx->pre_dia_size;
2648     s->first_slice_line=1;
2649     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2650         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2651             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2652         }
2653         s->first_slice_line=0;
2654     }
2655
2656     s->me.pre_pass=0;
2657
2658     return 0;
2659 }
2660
2661 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2662     MpegEncContext *s= *(void**)arg;
2663
2664     ff_check_alignment();
2665
2666     s->me.dia_size= s->avctx->dia_size;
2667     s->first_slice_line=1;
2668     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2669         s->mb_x=0; //for block init below
2670         ff_init_block_index(s);
2671         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2672             s->block_index[0]+=2;
2673             s->block_index[1]+=2;
2674             s->block_index[2]+=2;
2675             s->block_index[3]+=2;
2676
2677             /* compute motion vector & mb_type and store in context */
2678             if(s->pict_type==AV_PICTURE_TYPE_B)
2679                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2680             else
2681                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2682         }
2683         s->first_slice_line=0;
2684     }
2685     return 0;
2686 }
2687
2688 static int mb_var_thread(AVCodecContext *c, void *arg){
2689     MpegEncContext *s= *(void**)arg;
2690     int mb_x, mb_y;
2691
2692     ff_check_alignment();
2693
2694     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2695         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2696             int xx = mb_x * 16;
2697             int yy = mb_y * 16;
2698             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2699             int varc;
2700             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2701
2702             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2703                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2704
2705             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2706             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2707             s->me.mb_var_sum_temp    += varc;
2708         }
2709     }
2710     return 0;
2711 }
2712
2713 static void write_slice_end(MpegEncContext *s){
2714     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2715         if(s->partitioned_frame){
2716             ff_mpeg4_merge_partitions(s);
2717         }
2718
2719         ff_mpeg4_stuffing(&s->pb);
2720     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2721         ff_mjpeg_encode_stuffing(s);
2722     }
2723
2724     avpriv_align_put_bits(&s->pb);
2725     flush_put_bits(&s->pb);
2726
2727     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2728         s->misc_bits+= get_bits_diff(s);
2729 }
2730
2731 static void write_mb_info(MpegEncContext *s)
2732 {
2733     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2734     int offset = put_bits_count(&s->pb);
2735     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2736     int gobn = s->mb_y / s->gob_index;
2737     int pred_x, pred_y;
2738     if (CONFIG_H263_ENCODER)
2739         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2740     bytestream_put_le32(&ptr, offset);
2741     bytestream_put_byte(&ptr, s->qscale);
2742     bytestream_put_byte(&ptr, gobn);
2743     bytestream_put_le16(&ptr, mba);
2744     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2745     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2746     /* 4MV not implemented */
2747     bytestream_put_byte(&ptr, 0); /* hmv2 */
2748     bytestream_put_byte(&ptr, 0); /* vmv2 */
2749 }
2750
2751 static void update_mb_info(MpegEncContext *s, int startcode)
2752 {
2753     if (!s->mb_info)
2754         return;
2755     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2756         s->mb_info_size += 12;
2757         s->prev_mb_info = s->last_mb_info;
2758     }
2759     if (startcode) {
2760         s->prev_mb_info = put_bits_count(&s->pb)/8;
2761         /* This might have incremented mb_info_size above, and we return without
2762          * actually writing any info into that slot yet. But in that case,
2763          * this will be called again at the start of the after writing the
2764          * start code, actually writing the mb info. */
2765         return;
2766     }
2767
2768     s->last_mb_info = put_bits_count(&s->pb)/8;
2769     if (!s->mb_info_size)
2770         s->mb_info_size += 12;
2771     write_mb_info(s);
2772 }
2773
2774 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2775 {
2776     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2777         && s->slice_context_count == 1
2778         && s->pb.buf == s->avctx->internal->byte_buffer) {
2779         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2780         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2781
2782         uint8_t *new_buffer = NULL;
2783         int new_buffer_size = 0;
2784
2785         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2786                               s->avctx->internal->byte_buffer_size + size_increase);
2787         if (!new_buffer)
2788             return AVERROR(ENOMEM);
2789
2790         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2791         av_free(s->avctx->internal->byte_buffer);
2792         s->avctx->internal->byte_buffer      = new_buffer;
2793         s->avctx->internal->byte_buffer_size = new_buffer_size;
2794         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2795         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2796         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2797     }
2798     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2799         return AVERROR(EINVAL);
2800     return 0;
2801 }
2802
2803 static int encode_thread(AVCodecContext *c, void *arg){
2804     MpegEncContext *s= *(void**)arg;
2805     int mb_x, mb_y, pdif = 0;
2806     int chr_h= 16>>s->chroma_y_shift;
2807     int i, j;
2808     MpegEncContext best_s = { 0 }, backup_s;
2809     uint8_t bit_buf[2][MAX_MB_BYTES];
2810     uint8_t bit_buf2[2][MAX_MB_BYTES];
2811     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2812     PutBitContext pb[2], pb2[2], tex_pb[2];
2813
2814     ff_check_alignment();
2815
2816     for(i=0; i<2; i++){
2817         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2818         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2819         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2820     }
2821
2822     s->last_bits= put_bits_count(&s->pb);
2823     s->mv_bits=0;
2824     s->misc_bits=0;
2825     s->i_tex_bits=0;
2826     s->p_tex_bits=0;
2827     s->i_count=0;
2828     s->f_count=0;
2829     s->b_count=0;
2830     s->skip_count=0;
2831
2832     for(i=0; i<3; i++){
2833         /* init last dc values */
2834         /* note: quant matrix value (8) is implied here */
2835         s->last_dc[i] = 128 << s->intra_dc_precision;
2836
2837         s->current_picture.error[i] = 0;
2838     }
2839     if(s->codec_id==AV_CODEC_ID_AMV){
2840         s->last_dc[0] = 128*8/13;
2841         s->last_dc[1] = 128*8/14;
2842         s->last_dc[2] = 128*8/14;
2843     }
2844     s->mb_skip_run = 0;
2845     memset(s->last_mv, 0, sizeof(s->last_mv));
2846
2847     s->last_mv_dir = 0;
2848
2849     switch(s->codec_id){
2850     case AV_CODEC_ID_H263:
2851     case AV_CODEC_ID_H263P:
2852     case AV_CODEC_ID_FLV1:
2853         if (CONFIG_H263_ENCODER)
2854             s->gob_index = H263_GOB_HEIGHT(s->height);
2855         break;
2856     case AV_CODEC_ID_MPEG4:
2857         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2858             ff_mpeg4_init_partitions(s);
2859         break;
2860     }
2861
2862     s->resync_mb_x=0;
2863     s->resync_mb_y=0;
2864     s->first_slice_line = 1;
2865     s->ptr_lastgob = s->pb.buf;
2866     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2867         s->mb_x=0;
2868         s->mb_y= mb_y;
2869
2870         ff_set_qscale(s, s->qscale);
2871         ff_init_block_index(s);
2872
2873         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2874             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2875             int mb_type= s->mb_type[xy];
2876 //            int d;
2877             int dmin= INT_MAX;
2878             int dir;
2879             int size_increase =  s->avctx->internal->byte_buffer_size/4
2880                                + s->mb_width*MAX_MB_BYTES;
2881
2882             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2883             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2884                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2885                 return -1;
2886             }
2887             if(s->data_partitioning){
2888                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2889                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2890                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2891                     return -1;
2892                 }
2893             }
2894
2895             s->mb_x = mb_x;
2896             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2897             ff_update_block_index(s);
2898
2899             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2900                 ff_h261_reorder_mb_index(s);
2901                 xy= s->mb_y*s->mb_stride + s->mb_x;
2902                 mb_type= s->mb_type[xy];
2903             }
2904
2905             /* write gob / video packet header  */
2906             if(s->rtp_mode){
2907                 int current_packet_size, is_gob_start;
2908
2909                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2910
2911                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2912
2913                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2914
2915                 switch(s->codec_id){
2916                 case AV_CODEC_ID_H263:
2917                 case AV_CODEC_ID_H263P:
2918                     if(!s->h263_slice_structured)
2919                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2920                     break;
2921                 case AV_CODEC_ID_MPEG2VIDEO:
2922                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2923                 case AV_CODEC_ID_MPEG1VIDEO:
2924                     if(s->mb_skip_run) is_gob_start=0;
2925                     break;
2926                 case AV_CODEC_ID_MJPEG:
2927                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2928                     break;
2929                 }
2930
2931                 if(is_gob_start){
2932                     if(s->start_mb_y != mb_y || mb_x!=0){
2933                         write_slice_end(s);
2934
2935                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2936                             ff_mpeg4_init_partitions(s);
2937                         }
2938                     }
2939
2940                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2941                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2942
2943                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2944                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2945                         int d = 100 / s->error_rate;
2946                         if(r % d == 0){
2947                             current_packet_size=0;
2948                             s->pb.buf_ptr= s->ptr_lastgob;
2949                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2950                         }
2951                     }
2952
2953                     if (s->avctx->rtp_callback){
2954                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2955                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2956                     }
2957                     update_mb_info(s, 1);
2958
2959                     switch(s->codec_id){
2960                     case AV_CODEC_ID_MPEG4:
2961                         if (CONFIG_MPEG4_ENCODER) {
2962                             ff_mpeg4_encode_video_packet_header(s);
2963                             ff_mpeg4_clean_buffers(s);
2964                         }
2965                     break;
2966                     case AV_CODEC_ID_MPEG1VIDEO:
2967                     case AV_CODEC_ID_MPEG2VIDEO:
2968                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2969                             ff_mpeg1_encode_slice_header(s);
2970                             ff_mpeg1_clean_buffers(s);
2971                         }
2972                     break;
2973                     case AV_CODEC_ID_H263:
2974                     case AV_CODEC_ID_H263P:
2975                         if (CONFIG_H263_ENCODER)
2976                             ff_h263_encode_gob_header(s, mb_y);
2977                     break;
2978                     }
2979
2980                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2981                         int bits= put_bits_count(&s->pb);
2982                         s->misc_bits+= bits - s->last_bits;
2983                         s->last_bits= bits;
2984                     }
2985
2986                     s->ptr_lastgob += current_packet_size;
2987                     s->first_slice_line=1;
2988                     s->resync_mb_x=mb_x;
2989                     s->resync_mb_y=mb_y;
2990                 }
2991             }
2992
2993             if(  (s->resync_mb_x   == s->mb_x)
2994                && s->resync_mb_y+1 == s->mb_y){
2995                 s->first_slice_line=0;
2996             }
2997
2998             s->mb_skipped=0;
2999             s->dquant=0; //only for QP_RD
3000
3001             update_mb_info(s, 0);
3002
3003             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
3004                 int next_block=0;
3005                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3006
3007                 copy_context_before_encode(&backup_s, s, -1);
3008                 backup_s.pb= s->pb;
3009                 best_s.data_partitioning= s->data_partitioning;
3010                 best_s.partitioned_frame= s->partitioned_frame;
3011                 if(s->data_partitioning){
3012                     backup_s.pb2= s->pb2;
3013                     backup_s.tex_pb= s->tex_pb;
3014                 }
3015
3016                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
3017                     s->mv_dir = MV_DIR_FORWARD;
3018                     s->mv_type = MV_TYPE_16X16;
3019                     s->mb_intra= 0;
3020                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3021                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3022                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
3023                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3024                 }
3025                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
3026                     s->mv_dir = MV_DIR_FORWARD;
3027                     s->mv_type = MV_TYPE_FIELD;
3028                     s->mb_intra= 0;
3029                     for(i=0; i<2; i++){
3030                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3031                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3032                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3033                     }
3034                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3035                                  &dmin, &next_block, 0, 0);
3036                 }
3037                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3038                     s->mv_dir = MV_DIR_FORWARD;
3039                     s->mv_type = MV_TYPE_16X16;
3040                     s->mb_intra= 0;
3041                     s->mv[0][0][0] = 0;
3042                     s->mv[0][0][1] = 0;
3043                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3044                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3045                 }
3046                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3047                     s->mv_dir = MV_DIR_FORWARD;
3048                     s->mv_type = MV_TYPE_8X8;
3049                     s->mb_intra= 0;
3050                     for(i=0; i<4; i++){
3051                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3052                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3053                     }
3054                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3055                                  &dmin, &next_block, 0, 0);
3056                 }
3057                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3058                     s->mv_dir = MV_DIR_FORWARD;
3059                     s->mv_type = MV_TYPE_16X16;
3060                     s->mb_intra= 0;
3061                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3062                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3063                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3064                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3065                 }
3066                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3067                     s->mv_dir = MV_DIR_BACKWARD;
3068                     s->mv_type = MV_TYPE_16X16;
3069                     s->mb_intra= 0;
3070                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3071                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3072                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3073                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3074                 }
3075                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3076                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3077                     s->mv_type = MV_TYPE_16X16;
3078                     s->mb_intra= 0;
3079                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3080                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3081                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3082                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3083                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3084                                  &dmin, &next_block, 0, 0);
3085                 }
3086                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3087                     s->mv_dir = MV_DIR_FORWARD;
3088                     s->mv_type = MV_TYPE_FIELD;
3089                     s->mb_intra= 0;
3090                     for(i=0; i<2; i++){
3091                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3092                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3093                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3094                     }
3095                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3096                                  &dmin, &next_block, 0, 0);
3097                 }
3098                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3099                     s->mv_dir = MV_DIR_BACKWARD;
3100                     s->mv_type = MV_TYPE_FIELD;
3101                     s->mb_intra= 0;
3102                     for(i=0; i<2; i++){
3103                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3104                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3105                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3106                     }
3107                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3108                                  &dmin, &next_block, 0, 0);
3109                 }
3110                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3111                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3112                     s->mv_type = MV_TYPE_FIELD;
3113                     s->mb_intra= 0;
3114                     for(dir=0; dir<2; dir++){
3115                         for(i=0; i<2; i++){
3116                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3117                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3118                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3119                         }
3120                     }
3121                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3122                                  &dmin, &next_block, 0, 0);
3123                 }
3124                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3125                     s->mv_dir = 0;
3126                     s->mv_type = MV_TYPE_16X16;
3127                     s->mb_intra= 1;
3128                     s->mv[0][0][0] = 0;
3129                     s->mv[0][0][1] = 0;
3130                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3131                                  &dmin, &next_block, 0, 0);
3132                     if(s->h263_pred || s->h263_aic){
3133                         if(best_s.mb_intra)
3134                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3135                         else
3136                             ff_clean_intra_table_entries(s); //old mode?
3137                     }
3138                 }
3139
3140                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3141                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3142                         const int last_qp= backup_s.qscale;
3143                         int qpi, qp, dc[6];
3144                         int16_t ac[6][16];
3145                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3146                         static const int dquant_tab[4]={-1,1,-2,2};
3147                         int storecoefs = s->mb_intra && s->dc_val[0];
3148
3149                         av_assert2(backup_s.dquant == 0);
3150
3151                         //FIXME intra
3152                         s->mv_dir= best_s.mv_dir;
3153                         s->mv_type = MV_TYPE_16X16;
3154                         s->mb_intra= best_s.mb_intra;
3155                         s->mv[0][0][0] = best_s.mv[0][0][0];
3156                         s->mv[0][0][1] = best_s.mv[0][0][1];
3157                         s->mv[1][0][0] = best_s.mv[1][0][0];
3158                         s->mv[1][0][1] = best_s.mv[1][0][1];
3159
3160                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3161                         for(; qpi<4; qpi++){
3162                             int dquant= dquant_tab[qpi];
3163                             qp= last_qp + dquant;
3164                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3165                                 continue;
3166                             backup_s.dquant= dquant;
3167                             if(storecoefs){
3168                                 for(i=0; i<6; i++){
3169                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3170                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3171                                 }
3172                             }
3173
3174                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3175                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3176                             if(best_s.qscale != qp){
3177                                 if(storecoefs){
3178                                     for(i=0; i<6; i++){
3179                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3180                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3181                                     }
3182                                 }
3183                             }
3184                         }
3185                     }
3186                 }
3187                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3188                     int mx= s->b_direct_mv_table[xy][0];
3189                     int my= s->b_direct_mv_table[xy][1];
3190
3191                     backup_s.dquant = 0;
3192                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3193                     s->mb_intra= 0;
3194                     ff_mpeg4_set_direct_mv(s, mx, my);
3195                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3196                                  &dmin, &next_block, mx, my);
3197                 }
3198                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3199                     backup_s.dquant = 0;
3200                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3201                     s->mb_intra= 0;
3202                     ff_mpeg4_set_direct_mv(s, 0, 0);
3203                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3204                                  &dmin, &next_block, 0, 0);
3205                 }
3206                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3207                     int coded=0;
3208                     for(i=0; i<6; i++)
3209                         coded |= s->block_last_index[i];
3210                     if(coded){
3211                         int mx,my;
3212                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3213                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3214                             mx=my=0; //FIXME find the one we actually used
3215                             ff_mpeg4_set_direct_mv(s, mx, my);
3216                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3217                             mx= s->mv[1][0][0];
3218                             my= s->mv[1][0][1];
3219                         }else{
3220                             mx= s->mv[0][0][0];
3221                             my= s->mv[0][0][1];
3222                         }
3223
3224                         s->mv_dir= best_s.mv_dir;
3225                         s->mv_type = best_s.mv_type;
3226                         s->mb_intra= 0;
3227 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3228                         s->mv[0][0][1] = best_s.mv[0][0][1];
3229                         s->mv[1][0][0] = best_s.mv[1][0][0];
3230                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3231                         backup_s.dquant= 0;
3232                         s->skipdct=1;
3233                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3234                                         &dmin, &next_block, mx, my);
3235                         s->skipdct=0;
3236                     }
3237                 }
3238
3239                 s->current_picture.qscale_table[xy] = best_s.qscale;
3240
3241                 copy_context_after_encode(s, &best_s, -1);
3242
3243                 pb_bits_count= put_bits_count(&s->pb);
3244                 flush_put_bits(&s->pb);
3245                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3246                 s->pb= backup_s.pb;
3247
3248                 if(s->data_partitioning){
3249                     pb2_bits_count= put_bits_count(&s->pb2);
3250                     flush_put_bits(&s->pb2);
3251                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3252                     s->pb2= backup_s.pb2;
3253
3254                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3255                     flush_put_bits(&s->tex_pb);
3256                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3257                     s->tex_pb= backup_s.tex_pb;
3258                 }
3259                 s->last_bits= put_bits_count(&s->pb);
3260
3261                 if (CONFIG_H263_ENCODER &&
3262                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3263                     ff_h263_update_motion_val(s);
3264
3265                 if(next_block==0){ //FIXME 16 vs linesize16
3266                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3267                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3268                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3269                 }
3270
3271                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3272                     ff_mpv_decode_mb(s, s->block);
3273             } else {
3274                 int motion_x = 0, motion_y = 0;
3275                 s->mv_type=MV_TYPE_16X16;
3276                 // only one MB-Type possible
3277
3278                 switch(mb_type){
3279                 case CANDIDATE_MB_TYPE_INTRA:
3280                     s->mv_dir = 0;
3281                     s->mb_intra= 1;
3282                     motion_x= s->mv[0][0][0] = 0;
3283                     motion_y= s->mv[0][0][1] = 0;
3284                     break;
3285                 case CANDIDATE_MB_TYPE_INTER:
3286                     s->mv_dir = MV_DIR_FORWARD;
3287                     s->mb_intra= 0;
3288                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3289                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3290                     break;
3291                 case CANDIDATE_MB_TYPE_INTER_I:
3292                     s->mv_dir = MV_DIR_FORWARD;
3293                     s->mv_type = MV_TYPE_FIELD;
3294                     s->mb_intra= 0;
3295                     for(i=0; i<2; i++){
3296                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3297                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3298                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3299                     }
3300                     break;
3301                 case CANDIDATE_MB_TYPE_INTER4V:
3302                     s->mv_dir = MV_DIR_FORWARD;
3303                     s->mv_type = MV_TYPE_8X8;
3304                     s->mb_intra= 0;
3305                     for(i=0; i<4; i++){
3306                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3307                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3308                     }
3309                     break;
3310                 case CANDIDATE_MB_TYPE_DIRECT:
3311                     if (CONFIG_MPEG4_ENCODER) {
3312                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3313                         s->mb_intra= 0;
3314                         motion_x=s->b_direct_mv_table[xy][0];
3315                         motion_y=s->b_direct_mv_table[xy][1];
3316                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3317                     }
3318                     break;
3319                 case CANDIDATE_MB_TYPE_DIRECT0:
3320                     if (CONFIG_MPEG4_ENCODER) {
3321                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3322                         s->mb_intra= 0;
3323                         ff_mpeg4_set_direct_mv(s, 0, 0);
3324                     }
3325                     break;
3326                 case CANDIDATE_MB_TYPE_BIDIR:
3327                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3328                     s->mb_intra= 0;
3329                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3330                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3331                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3332                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3333                     break;
3334                 case CANDIDATE_MB_TYPE_BACKWARD:
3335                     s->mv_dir = MV_DIR_BACKWARD;
3336                     s->mb_intra= 0;
3337                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3338                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3339                     break;
3340                 case CANDIDATE_MB_TYPE_FORWARD:
3341                     s->mv_dir = MV_DIR_FORWARD;
3342                     s->mb_intra= 0;
3343                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3344                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3345                     break;
3346                 case CANDIDATE_MB_TYPE_FORWARD_I:
3347                     s->mv_dir = MV_DIR_FORWARD;
3348                     s->mv_type = MV_TYPE_FIELD;
3349                     s->mb_intra= 0;
3350                     for(i=0; i<2; i++){
3351                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3352                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3353                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3354                     }
3355                     break;
3356                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3357                     s->mv_dir = MV_DIR_BACKWARD;
3358                     s->mv_type = MV_TYPE_FIELD;
3359                     s->mb_intra= 0;
3360                     for(i=0; i<2; i++){
3361                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3362                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3363                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3364                     }
3365                     break;
3366                 case CANDIDATE_MB_TYPE_BIDIR_I:
3367                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3368                     s->mv_type = MV_TYPE_FIELD;
3369                     s->mb_intra= 0;
3370                     for(dir=0; dir<2; dir++){
3371                         for(i=0; i<2; i++){
3372                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3373                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3374                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3375                         }
3376                     }
3377                     break;
3378                 default:
3379                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3380                 }
3381
3382                 encode_mb(s, motion_x, motion_y);
3383
3384                 // RAL: Update last macroblock type
3385                 s->last_mv_dir = s->mv_dir;
3386
3387                 if (CONFIG_H263_ENCODER &&
3388                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3389                     ff_h263_update_motion_val(s);
3390
3391                 ff_mpv_decode_mb(s, s->block);
3392             }
3393
3394             /* clean the MV table in IPS frames for direct mode in B frames */
3395             if(s->mb_intra /* && I,P,S_TYPE */){
3396                 s->p_mv_table[xy][0]=0;
3397                 s->p_mv_table[xy][1]=0;
3398             }
3399
3400             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3401                 int w= 16;
3402                 int h= 16;
3403
3404                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3405                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3406
3407                 s->current_picture.error[0] += sse(
3408                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3409                     s->dest[0], w, h, s->linesize);
3410                 s->current_picture.error[1] += sse(
3411                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3412                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3413                 s->current_picture.error[2] += sse(
3414                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3415                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3416             }
3417             if(s->loop_filter){
3418                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3419                     ff_h263_loop_filter(s);
3420             }
3421             ff_dlog(s->avctx, "MB %d %d bits\n",
3422                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3423         }
3424     }
3425
3426     //not beautiful here but we must write it before flushing so it has to be here
3427     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3428         ff_msmpeg4_encode_ext_header(s);
3429
3430     write_slice_end(s);
3431
3432     /* Send the last GOB if RTP */
3433     if (s->avctx->rtp_callback) {
3434         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3435         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3436         /* Call the RTP callback to send the last GOB */
3437         emms_c();
3438         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3439     }
3440
3441     return 0;
3442 }
3443
3444 #define MERGE(field) dst->field += src->field; src->field=0
3445 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3446     MERGE(me.scene_change_score);
3447     MERGE(me.mc_mb_var_sum_temp);
3448     MERGE(me.mb_var_sum_temp);
3449 }
3450
3451 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3452     int i;
3453
3454     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3455     MERGE(dct_count[1]);
3456     MERGE(mv_bits);
3457     MERGE(i_tex_bits);
3458     MERGE(p_tex_bits);
3459     MERGE(i_count);
3460     MERGE(f_count);
3461     MERGE(b_count);
3462     MERGE(skip_count);
3463     MERGE(misc_bits);
3464     MERGE(er.error_count);
3465     MERGE(padding_bug_score);
3466     MERGE(current_picture.error[0]);
3467     MERGE(current_picture.error[1]);
3468     MERGE(current_picture.error[2]);
3469
3470     if(dst->avctx->noise_reduction){
3471         for(i=0; i<64; i++){
3472             MERGE(dct_error_sum[0][i]);
3473             MERGE(dct_error_sum[1][i]);
3474         }
3475     }
3476
3477     assert(put_bits_count(&src->pb) % 8 ==0);
3478     assert(put_bits_count(&dst->pb) % 8 ==0);
3479     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3480     flush_put_bits(&dst->pb);
3481 }
3482
3483 static int estimate_qp(MpegEncContext *s, int dry_run){
3484     if (s->next_lambda){
3485         s->current_picture_ptr->f->quality =
3486         s->current_picture.f->quality = s->next_lambda;
3487         if(!dry_run) s->next_lambda= 0;
3488     } else if (!s->fixed_qscale) {
3489         s->current_picture_ptr->f->quality =
3490         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3491         if (s->current_picture.f->quality < 0)
3492             return -1;
3493     }
3494
3495     if(s->adaptive_quant){
3496         switch(s->codec_id){
3497         case AV_CODEC_ID_MPEG4:
3498             if (CONFIG_MPEG4_ENCODER)
3499                 ff_clean_mpeg4_qscales(s);
3500             break;
3501         case AV_CODEC_ID_H263:
3502         case AV_CODEC_ID_H263P:
3503         case AV_CODEC_ID_FLV1:
3504             if (CONFIG_H263_ENCODER)
3505                 ff_clean_h263_qscales(s);
3506             break;
3507         default:
3508             ff_init_qscale_tab(s);
3509         }
3510
3511         s->lambda= s->lambda_table[0];
3512         //FIXME broken
3513     }else
3514         s->lambda = s->current_picture.f->quality;
3515     update_qscale(s);
3516     return 0;
3517 }
3518
3519 /* must be called before writing the header */
3520 static void set_frame_distances(MpegEncContext * s){
3521     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3522     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3523
3524     if(s->pict_type==AV_PICTURE_TYPE_B){
3525         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3526         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3527     }else{
3528         s->pp_time= s->time - s->last_non_b_time;
3529         s->last_non_b_time= s->time;
3530         assert(s->picture_number==0 || s->pp_time > 0);
3531     }
3532 }
3533
3534 static int encode_picture(MpegEncContext *s, int picture_number)
3535 {
3536     int i, ret;
3537     int bits;
3538     int context_count = s->slice_context_count;
3539
3540     s->picture_number = picture_number;
3541
3542     /* Reset the average MB variance */
3543     s->me.mb_var_sum_temp    =
3544     s->me.mc_mb_var_sum_temp = 0;
3545
3546     /* we need to initialize some time vars before we can encode b-frames */
3547     // RAL: Condition added for MPEG1VIDEO
3548     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3549         set_frame_distances(s);
3550     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3551         ff_set_mpeg4_time(s);
3552
3553     s->me.scene_change_score=0;
3554
3555 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3556
3557     if(s->pict_type==AV_PICTURE_TYPE_I){
3558         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3559         else                        s->no_rounding=0;
3560     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3561         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3562             s->no_rounding ^= 1;
3563     }
3564
3565     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3566         if (estimate_qp(s,1) < 0)
3567             return -1;
3568         ff_get_2pass_fcode(s);
3569     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3570         if(s->pict_type==AV_PICTURE_TYPE_B)
3571             s->lambda= s->last_lambda_for[s->pict_type];
3572         else
3573             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3574         update_qscale(s);
3575     }
3576
3577     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3578         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3579         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3580         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3581         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3582     }
3583
3584     s->mb_intra=0; //for the rate distortion & bit compare functions
3585     for(i=1; i<context_count; i++){
3586         ret = ff_update_duplicate_context(s->thread_context[i], s);
3587         if (ret < 0)
3588             return ret;
3589     }
3590
3591     if(ff_init_me(s)<0)
3592         return -1;
3593
3594     /* Estimate motion for every MB */
3595     if(s->pict_type != AV_PICTURE_TYPE_I){
3596         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3597         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3598         if (s->pict_type != AV_PICTURE_TYPE_B) {
3599             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3600                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3601             }
3602         }
3603
3604         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3605     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3606         /* I-Frame */
3607         for(i=0; i<s->mb_stride*s->mb_height; i++)
3608             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3609
3610         if(!s->fixed_qscale){
3611             /* finding spatial complexity for I-frame rate control */
3612             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3613         }
3614     }
3615     for(i=1; i<context_count; i++){
3616         merge_context_after_me(s, s->thread_context[i]);
3617     }
3618     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3619     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3620     emms_c();
3621
3622     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3623         s->pict_type= AV_PICTURE_TYPE_I;
3624         for(i=0; i<s->mb_stride*s->mb_height; i++)
3625             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3626         if(s->msmpeg4_version >= 3)
3627             s->no_rounding=1;
3628         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3629                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3630     }
3631
3632     if(!s->umvplus){
3633         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3634             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3635
3636             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3637                 int a,b;
3638                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3639                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3640                 s->f_code= FFMAX3(s->f_code, a, b);
3641             }
3642
3643             ff_fix_long_p_mvs(s);
3644             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3645             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3646                 int j;
3647                 for(i=0; i<2; i++){
3648                     for(j=0; j<2; j++)
3649                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3650                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3651                 }
3652             }
3653         }
3654
3655         if(s->pict_type==AV_PICTURE_TYPE_B){
3656             int a, b;
3657
3658             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3659             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3660             s->f_code = FFMAX(a, b);
3661
3662             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3663             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3664             s->b_code = FFMAX(a, b);
3665
3666             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3667             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3668             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3669             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3670             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3671                 int dir, j;
3672                 for(dir=0; dir<2; dir++){
3673                     for(i=0; i<2; i++){
3674                         for(j=0; j<2; j++){
3675                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3676                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3677                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3678                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3679                         }
3680                     }
3681                 }
3682             }
3683         }
3684     }
3685
3686     if (estimate_qp(s, 0) < 0)
3687         return -1;
3688
3689     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3690         s->pict_type == AV_PICTURE_TYPE_I &&
3691         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3692         s->qscale= 3; //reduce clipping problems
3693
3694     if (s->out_format == FMT_MJPEG) {
3695         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3696         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3697
3698         if (s->avctx->intra_matrix) {
3699             chroma_matrix =
3700             luma_matrix = s->avctx->intra_matrix;
3701         }
3702         if (s->avctx->chroma_intra_matrix)
3703             chroma_matrix = s->avctx->chroma_intra_matrix;
3704
3705         /* for mjpeg, we do include qscale in the matrix */
3706         for(i=1;i<64;i++){
3707             int j = s->idsp.idct_permutation[i];
3708
3709             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3710             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3711         }
3712         s->y_dc_scale_table=
3713         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3714         s->chroma_intra_matrix[0] =
3715         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3716         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3717                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3718         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3719                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3720         s->qscale= 8;
3721     }
3722     if(s->codec_id == AV_CODEC_ID_AMV){
3723         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3724         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3725         for(i=1;i<64;i++){
3726             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3727
3728             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3729             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3730         }
3731         s->y_dc_scale_table= y;
3732         s->c_dc_scale_table= c;
3733         s->intra_matrix[0] = 13;
3734         s->chroma_intra_matrix[0] = 14;
3735         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3736                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3737         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3738                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3739         s->qscale= 8;
3740     }
3741
3742     //FIXME var duplication
3743     s->current_picture_ptr->f->key_frame =
3744     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3745     s->current_picture_ptr->f->pict_type =
3746     s->current_picture.f->pict_type = s->pict_type;
3747
3748     if (s->current_picture.f->key_frame)
3749         s->picture_in_gop_number=0;
3750
3751     s->mb_x = s->mb_y = 0;
3752     s->last_bits= put_bits_count(&s->pb);
3753     switch(s->out_format) {
3754     case FMT_MJPEG:
3755         if (CONFIG_MJPEG_ENCODER)
3756             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3757                                            s->intra_matrix, s->chroma_intra_matrix);
3758         break;
3759     case FMT_H261:
3760         if (CONFIG_H261_ENCODER)
3761             ff_h261_encode_picture_header(s, picture_number);
3762         break;
3763     case FMT_H263:
3764         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3765             ff_wmv2_encode_picture_header(s, picture_number);
3766         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3767             ff_msmpeg4_encode_picture_header(s, picture_number);
3768         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3769             ff_mpeg4_encode_picture_header(s, picture_number);
3770         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3771             ret = ff_rv10_encode_picture_header(s, picture_number);
3772             if (ret < 0)
3773                 return ret;
3774         }
3775         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3776             ff_rv20_encode_picture_header(s, picture_number);
3777         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3778             ff_flv_encode_picture_header(s, picture_number);
3779         else if (CONFIG_H263_ENCODER)
3780             ff_h263_encode_picture_header(s, picture_number);
3781         break;
3782     case FMT_MPEG1:
3783         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3784             ff_mpeg1_encode_picture_header(s, picture_number);
3785         break;
3786     default:
3787         av_assert0(0);
3788     }
3789     bits= put_bits_count(&s->pb);
3790     s->header_bits= bits - s->last_bits;
3791
3792     for(i=1; i<context_count; i++){
3793         update_duplicate_context_after_me(s->thread_context[i], s);
3794     }
3795     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3796     for(i=1; i<context_count; i++){
3797         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3798             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3799         merge_context_after_encode(s, s->thread_context[i]);
3800     }
3801     emms_c();
3802     return 0;
3803 }
3804
3805 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3806     const int intra= s->mb_intra;
3807     int i;
3808
3809     s->dct_count[intra]++;
3810
3811     for(i=0; i<64; i++){
3812         int level= block[i];
3813
3814         if(level){
3815             if(level>0){
3816                 s->dct_error_sum[intra][i] += level;
3817                 level -= s->dct_offset[intra][i];
3818                 if(level<0) level=0;
3819             }else{
3820                 s->dct_error_sum[intra][i] -= level;
3821                 level += s->dct_offset[intra][i];
3822                 if(level>0) level=0;
3823             }
3824             block[i]= level;
3825         }
3826     }
3827 }
3828
3829 static int dct_quantize_trellis_c(MpegEncContext *s,
3830                                   int16_t *block, int n,
3831                                   int qscale, int *overflow){
3832     const int *qmat;
3833     const uint16_t *matrix;
3834     const uint8_t *scantable= s->intra_scantable.scantable;
3835     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3836     int max=0;
3837     unsigned int threshold1, threshold2;
3838     int bias=0;
3839     int run_tab[65];
3840     int level_tab[65];
3841     int score_tab[65];
3842     int survivor[65];
3843     int survivor_count;
3844     int last_run=0;
3845     int last_level=0;
3846     int last_score= 0;
3847     int last_i;
3848     int coeff[2][64];
3849     int coeff_count[64];
3850     int qmul, qadd, start_i, last_non_zero, i, dc;
3851     const int esc_length= s->ac_esc_length;
3852     uint8_t * length;
3853     uint8_t * last_length;
3854     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3855
3856     s->fdsp.fdct(block);
3857
3858     if(s->dct_error_sum)
3859         s->denoise_dct(s, block);
3860     qmul= qscale*16;
3861     qadd= ((qscale-1)|1)*8;
3862
3863     if (s->mb_intra) {
3864         int q;
3865         if (!s->h263_aic) {
3866             if (n < 4)
3867                 q = s->y_dc_scale;
3868             else
3869                 q = s->c_dc_scale;
3870             q = q << 3;
3871         } else{
3872             /* For AIC we skip quant/dequant of INTRADC */
3873             q = 1 << 3;
3874             qadd=0;
3875         }
3876
3877         /* note: block[0] is assumed to be positive */
3878         block[0] = (block[0] + (q >> 1)) / q;
3879         start_i = 1;
3880         last_non_zero = 0;
3881         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3882         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3883         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3884             bias= 1<<(QMAT_SHIFT-1);
3885
3886         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3887             length     = s->intra_chroma_ac_vlc_length;
3888             last_length= s->intra_chroma_ac_vlc_last_length;
3889         } else {
3890             length     = s->intra_ac_vlc_length;
3891             last_length= s->intra_ac_vlc_last_length;
3892         }
3893     } else {
3894         start_i = 0;
3895         last_non_zero = -1;
3896         qmat = s->q_inter_matrix[qscale];
3897         matrix = s->inter_matrix;
3898         length     = s->inter_ac_vlc_length;
3899         last_length= s->inter_ac_vlc_last_length;
3900     }
3901     last_i= start_i;
3902
3903     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3904     threshold2= (threshold1<<1);
3905
3906     for(i=63; i>=start_i; i--) {
3907         const int j = scantable[i];
3908         int level = block[j] * qmat[j];
3909
3910         if(((unsigned)(level+threshold1))>threshold2){
3911             last_non_zero = i;
3912             break;
3913         }
3914     }
3915
3916     for(i=start_i; i<=last_non_zero; i++) {
3917         const int j = scantable[i];
3918         int level = block[j] * qmat[j];
3919
3920 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3921 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3922         if(((unsigned)(level+threshold1))>threshold2){
3923             if(level>0){
3924                 level= (bias + level)>>QMAT_SHIFT;
3925                 coeff[0][i]= level;
3926                 coeff[1][i]= level-1;
3927 //                coeff[2][k]= level-2;
3928             }else{
3929                 level= (bias - level)>>QMAT_SHIFT;
3930                 coeff[0][i]= -level;
3931                 coeff[1][i]= -level+1;
3932 //                coeff[2][k]= -level+2;
3933             }
3934             coeff_count[i]= FFMIN(level, 2);
3935             av_assert2(coeff_count[i]);
3936             max |=level;
3937         }else{
3938             coeff[0][i]= (level>>31)|1;
3939             coeff_count[i]= 1;
3940         }
3941     }
3942
3943     *overflow= s->max_qcoeff < max; //overflow might have happened
3944
3945     if(last_non_zero < start_i){
3946         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3947         return last_non_zero;
3948     }
3949
3950     score_tab[start_i]= 0;
3951     survivor[0]= start_i;
3952     survivor_count= 1;
3953
3954     for(i=start_i; i<=last_non_zero; i++){
3955         int level_index, j, zero_distortion;
3956         int dct_coeff= FFABS(block[ scantable[i] ]);
3957         int best_score=256*256*256*120;
3958
3959         if (s->fdsp.fdct == ff_fdct_ifast)
3960             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3961         zero_distortion= dct_coeff*dct_coeff;
3962
3963         for(level_index=0; level_index < coeff_count[i]; level_index++){
3964             int distortion;
3965             int level= coeff[level_index][i];
3966             const int alevel= FFABS(level);
3967             int unquant_coeff;
3968
3969             av_assert2(level);
3970
3971             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3972                 unquant_coeff= alevel*qmul + qadd;
3973             } else if(s->out_format == FMT_MJPEG) {
3974                 j = s->idsp.idct_permutation[scantable[i]];
3975                 unquant_coeff = alevel * matrix[j] * 8;
3976             }else{ //MPEG1
3977                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3978                 if(s->mb_intra){
3979                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3980                         unquant_coeff =   (unquant_coeff - 1) | 1;
3981                 }else{
3982                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3983                         unquant_coeff =   (unquant_coeff - 1) | 1;
3984                 }
3985                 unquant_coeff<<= 3;
3986             }
3987
3988             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3989             level+=64;
3990             if((level&(~127)) == 0){
3991                 for(j=survivor_count-1; j>=0; j--){
3992                     int run= i - survivor[j];
3993                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3994                     score += score_tab[i-run];
3995
3996                     if(score < best_score){
3997                         best_score= score;
3998                         run_tab[i+1]= run;
3999                         level_tab[i+1]= level-64;
4000                     }
4001                 }
4002
4003                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4004                     for(j=survivor_count-1; j>=0; j--){
4005                         int run= i - survivor[j];
4006                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4007                         score += score_tab[i-run];
4008                         if(score < last_score){
4009                             last_score= score;
4010                             last_run= run;
4011                             last_level= level-64;
4012                             last_i= i+1;
4013                         }
4014                     }
4015                 }
4016             }else{
4017                 distortion += esc_length*lambda;
4018                 for(j=survivor_count-1; j>=0; j--){
4019                     int run= i - survivor[j];
4020                     int score= distortion + score_tab[i-run];
4021
4022                     if(score < best_score){
4023                         best_score= score;
4024                         run_tab[i+1]= run;
4025                         level_tab[i+1]= level-64;
4026                     }
4027                 }
4028
4029                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4030                   for(j=survivor_count-1; j>=0; j--){
4031                         int run= i - survivor[j];
4032                         int score= distortion + score_tab[i-run];
4033                         if(score < last_score){
4034                             last_score= score;
4035                             last_run= run;
4036                             last_level= level-64;
4037                             last_i= i+1;
4038                         }
4039                     }
4040                 }
4041             }
4042         }
4043
4044         score_tab[i+1]= best_score;
4045
4046         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4047         if(last_non_zero <= 27){
4048             for(; survivor_count; survivor_count--){
4049                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4050                     break;
4051             }
4052         }else{
4053             for(; survivor_count; survivor_count--){
4054                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4055                     break;
4056             }
4057         }
4058
4059         survivor[ survivor_count++ ]= i+1;
4060     }
4061
4062     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4063         last_score= 256*256*256*120;
4064         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4065             int score= score_tab[i];
4066             if(i) score += lambda*2; //FIXME exacter?
4067
4068             if(score < last_score){
4069                 last_score= score;
4070                 last_i= i;
4071                 last_level= level_tab[i];
4072                 last_run= run_tab[i];
4073             }
4074         }
4075     }
4076
4077     s->coded_score[n] = last_score;
4078
4079     dc= FFABS(block[0]);
4080     last_non_zero= last_i - 1;
4081     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4082
4083     if(last_non_zero < start_i)
4084         return last_non_zero;
4085
4086     if(last_non_zero == 0 && start_i == 0){
4087         int best_level= 0;
4088         int best_score= dc * dc;
4089
4090         for(i=0; i<coeff_count[0]; i++){
4091             int level= coeff[i][0];
4092             int alevel= FFABS(level);
4093             int unquant_coeff, score, distortion;
4094
4095             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4096                     unquant_coeff= (alevel*qmul + qadd)>>3;
4097             }else{ //MPEG1
4098                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4099                     unquant_coeff =   (unquant_coeff - 1) | 1;
4100             }
4101             unquant_coeff = (unquant_coeff + 4) >> 3;
4102             unquant_coeff<<= 3 + 3;
4103
4104             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4105             level+=64;
4106             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4107             else                    score= distortion + esc_length*lambda;
4108
4109             if(score < best_score){
4110                 best_score= score;
4111                 best_level= level - 64;
4112             }
4113         }
4114         block[0]= best_level;
4115         s->coded_score[n] = best_score - dc*dc;
4116         if(best_level == 0) return -1;
4117         else                return last_non_zero;
4118     }
4119
4120     i= last_i;
4121     av_assert2(last_level);
4122
4123     block[ perm_scantable[last_non_zero] ]= last_level;
4124     i -= last_run + 1;
4125
4126     for(; i>start_i; i -= run_tab[i] + 1){
4127         block[ perm_scantable[i-1] ]= level_tab[i];
4128     }
4129
4130     return last_non_zero;
4131 }
4132
4133 //#define REFINE_STATS 1
4134 static int16_t basis[64][64];
4135
4136 static void build_basis(uint8_t *perm){
4137     int i, j, x, y;
4138     emms_c();
4139     for(i=0; i<8; i++){
4140         for(j=0; j<8; j++){
4141             for(y=0; y<8; y++){
4142                 for(x=0; x<8; x++){
4143                     double s= 0.25*(1<<BASIS_SHIFT);
4144                     int index= 8*i + j;
4145                     int perm_index= perm[index];
4146                     if(i==0) s*= sqrt(0.5);
4147                     if(j==0) s*= sqrt(0.5);
4148                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4149                 }
4150             }
4151         }
4152     }
4153 }
4154
4155 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4156                         int16_t *block, int16_t *weight, int16_t *orig,
4157                         int n, int qscale){
4158     int16_t rem[64];
4159     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4160     const uint8_t *scantable= s->intra_scantable.scantable;
4161     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4162 //    unsigned int threshold1, threshold2;
4163 //    int bias=0;
4164     int run_tab[65];
4165     int prev_run=0;
4166     int prev_level=0;
4167     int qmul, qadd, start_i, last_non_zero, i, dc;
4168     uint8_t * length;
4169     uint8_t * last_length;
4170     int lambda;
4171     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4172 #ifdef REFINE_STATS
4173 static int count=0;
4174 static int after_last=0;
4175 static int to_zero=0;
4176 static int from_zero=0;
4177 static int raise=0;
4178 static int lower=0;
4179 static int messed_sign=0;
4180 #endif
4181
4182     if(basis[0][0] == 0)
4183         build_basis(s->idsp.idct_permutation);
4184
4185     qmul= qscale*2;
4186     qadd= (qscale-1)|1;
4187     if (s->mb_intra) {
4188         if (!s->h263_aic) {
4189             if (n < 4)
4190                 q = s->y_dc_scale;
4191             else
4192                 q = s->c_dc_scale;
4193         } else{
4194             /* For AIC we skip quant/dequant of INTRADC */
4195             q = 1;
4196             qadd=0;
4197         }
4198         q <<= RECON_SHIFT-3;
4199         /* note: block[0] is assumed to be positive */
4200         dc= block[0]*q;
4201 //        block[0] = (block[0] + (q >> 1)) / q;
4202         start_i = 1;
4203 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4204 //            bias= 1<<(QMAT_SHIFT-1);
4205         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4206             length     = s->intra_chroma_ac_vlc_length;
4207             last_length= s->intra_chroma_ac_vlc_last_length;
4208         } else {
4209             length     = s->intra_ac_vlc_length;
4210             last_length= s->intra_ac_vlc_last_length;
4211         }
4212     } else {
4213         dc= 0;
4214         start_i = 0;
4215         length     = s->inter_ac_vlc_length;
4216         last_length= s->inter_ac_vlc_last_length;
4217     }
4218     last_non_zero = s->block_last_index[n];
4219
4220 #ifdef REFINE_STATS
4221 {START_TIMER
4222 #endif
4223     dc += (1<<(RECON_SHIFT-1));
4224     for(i=0; i<64; i++){
4225         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4226     }
4227 #ifdef REFINE_STATS
4228 STOP_TIMER("memset rem[]")}
4229 #endif
4230     sum=0;
4231     for(i=0; i<64; i++){
4232         int one= 36;
4233         int qns=4;
4234         int w;
4235
4236         w= FFABS(weight[i]) + qns*one;
4237         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4238
4239         weight[i] = w;
4240 //        w=weight[i] = (63*qns + (w/2)) / w;
4241
4242         av_assert2(w>0);
4243         av_assert2(w<(1<<6));
4244         sum += w*w;
4245     }
4246     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4247 #ifdef REFINE_STATS
4248 {START_TIMER
4249 #endif
4250     run=0;
4251     rle_index=0;
4252     for(i=start_i; i<=last_non_zero; i++){
4253         int j= perm_scantable[i];
4254         const int level= block[j];
4255         int coeff;
4256
4257         if(level){
4258             if(level<0) coeff= qmul*level - qadd;
4259             else        coeff= qmul*level + qadd;
4260             run_tab[rle_index++]=run;
4261             run=0;
4262
4263             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4264         }else{
4265             run++;
4266         }
4267     }
4268 #ifdef REFINE_STATS
4269 if(last_non_zero>0){
4270 STOP_TIMER("init rem[]")
4271 }
4272 }
4273
4274 {START_TIMER
4275 #endif
4276     for(;;){
4277         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4278         int best_coeff=0;
4279         int best_change=0;
4280         int run2, best_unquant_change=0, analyze_gradient;
4281 #ifdef REFINE_STATS
4282 {START_TIMER
4283 #endif
4284         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4285
4286         if(analyze_gradient){
4287 #ifdef REFINE_STATS
4288 {START_TIMER
4289 #endif
4290             for(i=0; i<64; i++){
4291                 int w= weight[i];
4292
4293                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4294             }
4295 #ifdef REFINE_STATS
4296 STOP_TIMER("rem*w*w")}
4297 {START_TIMER
4298 #endif
4299             s->fdsp.fdct(d1);
4300 #ifdef REFINE_STATS
4301 STOP_TIMER("dct")}
4302 #endif
4303         }
4304
4305         if(start_i){
4306             const int level= block[0];
4307             int change, old_coeff;
4308
4309             av_assert2(s->mb_intra);
4310
4311             old_coeff= q*level;
4312
4313             for(change=-1; change<=1; change+=2){
4314                 int new_level= level + change;
4315                 int score, new_coeff;
4316
4317                 new_coeff= q*new_level;
4318                 if(new_coeff >= 2048 || new_coeff < 0)
4319                     continue;
4320
4321                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4322                                                   new_coeff - old_coeff);
4323                 if(score<best_score){
4324                     best_score= score;
4325                     best_coeff= 0;
4326                     best_change= change;
4327                     best_unquant_change= new_coeff - old_coeff;
4328                 }
4329             }
4330         }
4331
4332         run=0;
4333         rle_index=0;
4334         run2= run_tab[rle_index++];
4335         prev_level=0;
4336         prev_run=0;
4337
4338         for(i=start_i; i<64; i++){
4339             int j= perm_scantable[i];
4340             const int level= block[j];
4341             int change, old_coeff;
4342
4343             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4344                 break;
4345
4346             if(level){
4347                 if(level<0) old_coeff= qmul*level - qadd;
4348                 else        old_coeff= qmul*level + qadd;
4349                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4350             }else{
4351                 old_coeff=0;
4352                 run2--;
4353                 av_assert2(run2>=0 || i >= last_non_zero );
4354             }
4355
4356             for(change=-1; change<=1; change+=2){
4357                 int new_level= level + change;
4358                 int score, new_coeff, unquant_change;
4359
4360                 score=0;
4361                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4362                    continue;
4363
4364                 if(new_level){
4365                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4366                     else            new_coeff= qmul*new_level + qadd;
4367                     if(new_coeff >= 2048 || new_coeff <= -2048)
4368                         continue;
4369                     //FIXME check for overflow
4370
4371                     if(level){
4372                         if(level < 63 && level > -63){
4373                             if(i < last_non_zero)
4374                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4375                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4376                             else
4377                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4378                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4379                         }
4380                     }else{
4381                         av_assert2(FFABS(new_level)==1);
4382
4383                         if(analyze_gradient){
4384                             int g= d1[ scantable[i] ];
4385                             if(g && (g^new_level) >= 0)
4386                                 continue;
4387                         }
4388
4389                         if(i < last_non_zero){
4390                             int next_i= i + run2 + 1;
4391                             int next_level= block[ perm_scantable[next_i] ] + 64;
4392
4393                             if(next_level&(~127))
4394                                 next_level= 0;
4395
4396                             if(next_i < last_non_zero)
4397                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4398                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4399                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4400                             else
4401                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4402                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4403                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4404                         }else{
4405                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4406                             if(prev_level){
4407                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4408                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4409                             }
4410                         }
4411                     }
4412                 }else{
4413                     new_coeff=0;
4414                     av_assert2(FFABS(level)==1);
4415
4416                     if(i < last_non_zero){
4417                         int next_i= i + run2 + 1;
4418                         int next_level= block[ perm_scantable[next_i] ] + 64;
4419
4420                         if(next_level&(~127))
4421                             next_level= 0;
4422
4423                         if(next_i < last_non_zero)
4424                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4425                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4426                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4427                         else
4428                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4429                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4430                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4431                     }else{
4432                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4433                         if(prev_level){
4434                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4435                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4436                         }
4437                     }
4438                 }
4439
4440                 score *= lambda;
4441
4442                 unquant_change= new_coeff - old_coeff;
4443                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4444
4445                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4446                                                    unquant_change);
4447                 if(score<best_score){
4448                     best_score= score;
4449                     best_coeff= i;
4450                     best_change= change;
4451                     best_unquant_change= unquant_change;
4452                 }
4453             }
4454             if(level){
4455                 prev_level= level + 64;
4456                 if(prev_level&(~127))
4457                     prev_level= 0;
4458                 prev_run= run;
4459                 run=0;
4460             }else{
4461                 run++;
4462             }
4463         }
4464 #ifdef REFINE_STATS
4465 STOP_TIMER("iterative step")}
4466 #endif
4467
4468         if(best_change){
4469             int j= perm_scantable[ best_coeff ];
4470
4471             block[j] += best_change;
4472
4473             if(best_coeff > last_non_zero){
4474                 last_non_zero= best_coeff;
4475                 av_assert2(block[j]);
4476 #ifdef REFINE_STATS
4477 after_last++;
4478 #endif
4479             }else{
4480 #ifdef REFINE_STATS
4481 if(block[j]){
4482     if(block[j] - best_change){
4483         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4484             raise++;
4485         }else{
4486             lower++;
4487         }
4488     }else{
4489         from_zero++;
4490     }
4491 }else{
4492     to_zero++;
4493 }
4494 #endif
4495                 for(; last_non_zero>=start_i; last_non_zero--){
4496                     if(block[perm_scantable[last_non_zero]])
4497                         break;
4498                 }
4499             }
4500 #ifdef REFINE_STATS
4501 count++;
4502 if(256*256*256*64 % count == 0){
4503     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4504 }
4505 #endif
4506             run=0;
4507             rle_index=0;
4508             for(i=start_i; i<=last_non_zero; i++){
4509                 int j= perm_scantable[i];
4510                 const int level= block[j];
4511
4512                  if(level){
4513                      run_tab[rle_index++]=run;
4514                      run=0;
4515                  }else{
4516                      run++;
4517                  }
4518             }
4519
4520             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4521         }else{
4522             break;
4523         }
4524     }
4525 #ifdef REFINE_STATS
4526 if(last_non_zero>0){
4527 STOP_TIMER("iterative search")
4528 }
4529 }
4530 #endif
4531
4532     return last_non_zero;
4533 }
4534
4535 /**
4536  * Permute an 8x8 block according to permuatation.
4537  * @param block the block which will be permuted according to
4538  *              the given permutation vector
4539  * @param permutation the permutation vector
4540  * @param last the last non zero coefficient in scantable order, used to
4541  *             speed the permutation up
4542  * @param scantable the used scantable, this is only used to speed the
4543  *                  permutation up, the block is not (inverse) permutated
4544  *                  to scantable order!
4545  */
4546 static void block_permute(int16_t *block, uint8_t *permutation,
4547                           const uint8_t *scantable, int last)
4548 {
4549     int i;
4550     int16_t temp[64];
4551
4552     if (last <= 0)
4553         return;
4554     //FIXME it is ok but not clean and might fail for some permutations
4555     // if (permutation[1] == 1)
4556     // return;
4557
4558     for (i = 0; i <= last; i++) {
4559         const int j = scantable[i];
4560         temp[j] = block[j];
4561         block[j] = 0;
4562     }
4563
4564     for (i = 0; i <= last; i++) {
4565         const int j = scantable[i];
4566         const int perm_j = permutation[j];
4567         block[perm_j] = temp[j];
4568     }
4569 }
4570
4571 int ff_dct_quantize_c(MpegEncContext *s,
4572                         int16_t *block, int n,
4573                         int qscale, int *overflow)
4574 {
4575     int i, j, level, last_non_zero, q, start_i;
4576     const int *qmat;
4577     const uint8_t *scantable= s->intra_scantable.scantable;
4578     int bias;
4579     int max=0;
4580     unsigned int threshold1, threshold2;
4581
4582     s->fdsp.fdct(block);
4583
4584     if(s->dct_error_sum)
4585         s->denoise_dct(s, block);
4586
4587     if (s->mb_intra) {
4588         if (!s->h263_aic) {
4589             if (n < 4)
4590                 q = s->y_dc_scale;
4591             else
4592                 q = s->c_dc_scale;
4593             q = q << 3;
4594         } else
4595             /* For AIC we skip quant/dequant of INTRADC */
4596             q = 1 << 3;
4597
4598         /* note: block[0] is assumed to be positive */
4599         block[0] = (block[0] + (q >> 1)) / q;
4600         start_i = 1;
4601         last_non_zero = 0;
4602         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4603         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4604     } else {
4605         start_i = 0;
4606         last_non_zero = -1;
4607         qmat = s->q_inter_matrix[qscale];
4608         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4609     }
4610     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4611     threshold2= (threshold1<<1);
4612     for(i=63;i>=start_i;i--) {
4613         j = scantable[i];
4614         level = block[j] * qmat[j];
4615
4616         if(((unsigned)(level+threshold1))>threshold2){
4617             last_non_zero = i;
4618             break;
4619         }else{
4620             block[j]=0;
4621         }
4622     }
4623     for(i=start_i; i<=last_non_zero; i++) {
4624         j = scantable[i];
4625         level = block[j] * qmat[j];
4626
4627 //        if(   bias+level >= (1<<QMAT_SHIFT)
4628 //           || bias-level >= (1<<QMAT_SHIFT)){
4629         if(((unsigned)(level+threshold1))>threshold2){
4630             if(level>0){
4631                 level= (bias + level)>>QMAT_SHIFT;
4632                 block[j]= level;
4633             }else{
4634                 level= (bias - level)>>QMAT_SHIFT;
4635                 block[j]= -level;
4636             }
4637             max |=level;
4638         }else{
4639             block[j]=0;
4640         }
4641     }
4642     *overflow= s->max_qcoeff < max; //overflow might have happened
4643
4644     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4645     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4646         block_permute(block, s->idsp.idct_permutation,
4647                       scantable, last_non_zero);
4648
4649     return last_non_zero;
4650 }
4651
4652 #define OFFSET(x) offsetof(MpegEncContext, x)
4653 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4654 static const AVOption h263_options[] = {
4655     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4656     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4657     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4658     FF_MPV_COMMON_OPTS
4659     { NULL },
4660 };
4661
4662 static const AVClass h263_class = {
4663     .class_name = "H.263 encoder",
4664     .item_name  = av_default_item_name,
4665     .option     = h263_options,
4666     .version    = LIBAVUTIL_VERSION_INT,
4667 };
4668
4669 AVCodec ff_h263_encoder = {
4670     .name           = "h263",
4671     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4672     .type           = AVMEDIA_TYPE_VIDEO,
4673     .id             = AV_CODEC_ID_H263,
4674     .priv_data_size = sizeof(MpegEncContext),
4675     .init           = ff_mpv_encode_init,
4676     .encode2        = ff_mpv_encode_picture,
4677     .close          = ff_mpv_encode_end,
4678     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4679     .priv_class     = &h263_class,
4680 };
4681
4682 static const AVOption h263p_options[] = {
4683     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4684     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4685     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4686     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4687     FF_MPV_COMMON_OPTS
4688     { NULL },
4689 };
4690 static const AVClass h263p_class = {
4691     .class_name = "H.263p encoder",
4692     .item_name  = av_default_item_name,
4693     .option     = h263p_options,
4694     .version    = LIBAVUTIL_VERSION_INT,
4695 };
4696
4697 AVCodec ff_h263p_encoder = {
4698     .name           = "h263p",
4699     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4700     .type           = AVMEDIA_TYPE_VIDEO,
4701     .id             = AV_CODEC_ID_H263P,
4702     .priv_data_size = sizeof(MpegEncContext),
4703     .init           = ff_mpv_encode_init,
4704     .encode2        = ff_mpv_encode_picture,
4705     .close          = ff_mpv_encode_end,
4706     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4707     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4708     .priv_class     = &h263p_class,
4709 };
4710
4711 static const AVClass msmpeg4v2_class = {
4712     .class_name = "msmpeg4v2 encoder",
4713     .item_name  = av_default_item_name,
4714     .option     = ff_mpv_generic_options,
4715     .version    = LIBAVUTIL_VERSION_INT,
4716 };
4717
4718 AVCodec ff_msmpeg4v2_encoder = {
4719     .name           = "msmpeg4v2",
4720     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4721     .type           = AVMEDIA_TYPE_VIDEO,
4722     .id             = AV_CODEC_ID_MSMPEG4V2,
4723     .priv_data_size = sizeof(MpegEncContext),
4724     .init           = ff_mpv_encode_init,
4725     .encode2        = ff_mpv_encode_picture,
4726     .close          = ff_mpv_encode_end,
4727     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4728     .priv_class     = &msmpeg4v2_class,
4729 };
4730
4731 static const AVClass msmpeg4v3_class = {
4732     .class_name = "msmpeg4v3 encoder",
4733     .item_name  = av_default_item_name,
4734     .option     = ff_mpv_generic_options,
4735     .version    = LIBAVUTIL_VERSION_INT,
4736 };
4737
4738 AVCodec ff_msmpeg4v3_encoder = {
4739     .name           = "msmpeg4",
4740     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4741     .type           = AVMEDIA_TYPE_VIDEO,
4742     .id             = AV_CODEC_ID_MSMPEG4V3,
4743     .priv_data_size = sizeof(MpegEncContext),
4744     .init           = ff_mpv_encode_init,
4745     .encode2        = ff_mpv_encode_picture,
4746     .close          = ff_mpv_encode_end,
4747     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4748     .priv_class     = &msmpeg4v3_class,
4749 };
4750
4751 static const AVClass wmv1_class = {
4752     .class_name = "wmv1 encoder",
4753     .item_name  = av_default_item_name,
4754     .option     = ff_mpv_generic_options,
4755     .version    = LIBAVUTIL_VERSION_INT,
4756 };
4757
4758 AVCodec ff_wmv1_encoder = {
4759     .name           = "wmv1",
4760     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4761     .type           = AVMEDIA_TYPE_VIDEO,
4762     .id             = AV_CODEC_ID_WMV1,
4763     .priv_data_size = sizeof(MpegEncContext),
4764     .init           = ff_mpv_encode_init,
4765     .encode2        = ff_mpv_encode_picture,
4766     .close          = ff_mpv_encode_end,
4767     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4768     .priv_class     = &wmv1_class,
4769 };