]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit '22cc57da64bfd73f2206969486b0aa183ee76479'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /*
26  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
27  */
28
29 /**
30  * @file
31  * The simplest mpeg encoder (well, it was the simplest!).
32  */
33
34 #include <stdint.h>
35
36 #include "libavutil/internal.h"
37 #include "libavutil/intmath.h"
38 #include "libavutil/mathematics.h"
39 #include "libavutil/pixdesc.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/timer.h"
42 #include "avcodec.h"
43 #include "dct.h"
44 #include "idctdsp.h"
45 #include "mpeg12.h"
46 #include "mpegvideo.h"
47 #include "mpegvideodata.h"
48 #include "h261.h"
49 #include "h263.h"
50 #include "h263data.h"
51 #include "mjpegenc_common.h"
52 #include "mathops.h"
53 #include "mpegutils.h"
54 #include "mjpegenc.h"
55 #include "msmpeg4.h"
56 #include "pixblockdsp.h"
57 #include "qpeldsp.h"
58 #include "faandct.h"
59 #include "thread.h"
60 #include "aandcttab.h"
61 #include "flv.h"
62 #include "mpeg4video.h"
63 #include "internal.h"
64 #include "bytestream.h"
65 #include "wmv2.h"
66 #include "rv10.h"
67 #include <limits.h>
68 #include "sp5x.h"
69
70 #define QUANT_BIAS_SHIFT 8
71
72 #define QMAT_SHIFT_MMX 16
73 #define QMAT_SHIFT 21
74
75 static int encode_picture(MpegEncContext *s, int picture_number);
76 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
77 static int sse_mb(MpegEncContext *s);
78 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
79 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
80
81 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
82 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
83
84 const AVOption ff_mpv_generic_options[] = {
85     FF_MPV_COMMON_OPTS
86     { NULL },
87 };
88
89 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
90                        uint16_t (*qmat16)[2][64],
91                        const uint16_t *quant_matrix,
92                        int bias, int qmin, int qmax, int intra)
93 {
94     FDCTDSPContext *fdsp = &s->fdsp;
95     int qscale;
96     int shift = 0;
97
98     for (qscale = qmin; qscale <= qmax; qscale++) {
99         int i;
100         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
101 #if CONFIG_FAANDCT
102             fdsp->fdct == ff_faandct            ||
103 #endif /* CONFIG_FAANDCT */
104             fdsp->fdct == ff_jpeg_fdct_islow_10) {
105             for (i = 0; i < 64; i++) {
106                 const int j = s->idsp.idct_permutation[i];
107                 int64_t den = (int64_t) qscale * quant_matrix[j];
108                 /* 16 <= qscale * quant_matrix[i] <= 7905
109                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
110                  *             19952 <=              x  <= 249205026
111                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
112                  *           3444240 >= (1 << 36) / (x) >= 275 */
113
114                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
115             }
116         } else if (fdsp->fdct == ff_fdct_ifast) {
117             for (i = 0; i < 64; i++) {
118                 const int j = s->idsp.idct_permutation[i];
119                 int64_t den = ff_aanscales[i] * (int64_t) qscale * quant_matrix[j];
120                 /* 16 <= qscale * quant_matrix[i] <= 7905
121                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
122                  *             19952 <=              x  <= 249205026
123                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
124                  *           3444240 >= (1 << 36) / (x) >= 275 */
125
126                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / den);
127             }
128         } else {
129             for (i = 0; i < 64; i++) {
130                 const int j = s->idsp.idct_permutation[i];
131                 int64_t den = (int64_t) qscale * quant_matrix[j];
132                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
133                  * Assume x = qscale * quant_matrix[i]
134                  * So             16 <=              x  <= 7905
135                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
136                  * so          32768 >= (1 << 19) / (x) >= 67 */
137                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / den);
138                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
139                 //                    (qscale * quant_matrix[i]);
140                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / den;
141
142                 if (qmat16[qscale][0][i] == 0 ||
143                     qmat16[qscale][0][i] == 128 * 256)
144                     qmat16[qscale][0][i] = 128 * 256 - 1;
145                 qmat16[qscale][1][i] =
146                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
147                                 qmat16[qscale][0][i]);
148             }
149         }
150
151         for (i = intra; i < 64; i++) {
152             int64_t max = 8191;
153             if (fdsp->fdct == ff_fdct_ifast) {
154                 max = (8191LL * ff_aanscales[i]) >> 14;
155             }
156             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
157                 shift++;
158             }
159         }
160     }
161     if (shift) {
162         av_log(NULL, AV_LOG_INFO,
163                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
164                QMAT_SHIFT - shift);
165     }
166 }
167
168 static inline void update_qscale(MpegEncContext *s)
169 {
170     if (s->q_scale_type == 1) {
171         int i;
172         int bestdiff=INT_MAX;
173         int best = 1;
174         static const uint8_t non_linear_qscale[] = {
175             1,2,3,4,5,6,7,8,9,10,11,12,14,16,18,20,24,26,28
176         };
177
178         for (i = 0 ; i<FF_ARRAY_ELEMS(non_linear_qscale); i++) {
179             int diff = FFABS((non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 7)) - (int)s->lambda * 139);
180             if (non_linear_qscale[i] < s->avctx->qmin ||
181                 (non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
182                 continue;
183             if (diff < bestdiff) {
184                 bestdiff = diff;
185                 best = non_linear_qscale[i];
186             }
187         }
188         s->qscale = best;
189     } else {
190         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
191                     (FF_LAMBDA_SHIFT + 7);
192         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
193     }
194
195     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
196                  FF_LAMBDA_SHIFT;
197 }
198
199 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
200 {
201     int i;
202
203     if (matrix) {
204         put_bits(pb, 1, 1);
205         for (i = 0; i < 64; i++) {
206             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
207         }
208     } else
209         put_bits(pb, 1, 0);
210 }
211
212 /**
213  * init s->current_picture.qscale_table from s->lambda_table
214  */
215 void ff_init_qscale_tab(MpegEncContext *s)
216 {
217     int8_t * const qscale_table = s->current_picture.qscale_table;
218     int i;
219
220     for (i = 0; i < s->mb_num; i++) {
221         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
222         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
223         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
224                                                   s->avctx->qmax);
225     }
226 }
227
228 static void update_duplicate_context_after_me(MpegEncContext *dst,
229                                               MpegEncContext *src)
230 {
231 #define COPY(a) dst->a= src->a
232     COPY(pict_type);
233     COPY(current_picture);
234     COPY(f_code);
235     COPY(b_code);
236     COPY(qscale);
237     COPY(lambda);
238     COPY(lambda2);
239     COPY(picture_in_gop_number);
240     COPY(gop_picture_number);
241     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
242     COPY(progressive_frame);    // FIXME don't set in encode_header
243     COPY(partitioned_frame);    // FIXME don't set in encode_header
244 #undef COPY
245 }
246
247 /**
248  * Set the given MpegEncContext to defaults for encoding.
249  * the changed fields will not depend upon the prior state of the MpegEncContext.
250  */
251 static void mpv_encode_defaults(MpegEncContext *s)
252 {
253     int i;
254     ff_mpv_common_defaults(s);
255
256     for (i = -16; i < 16; i++) {
257         default_fcode_tab[i + MAX_MV] = 1;
258     }
259     s->me.mv_penalty = default_mv_penalty;
260     s->fcode_tab     = default_fcode_tab;
261
262     s->input_picture_number  = 0;
263     s->picture_in_gop_number = 0;
264 }
265
266 av_cold int ff_dct_encode_init(MpegEncContext *s) {
267     if (ARCH_X86)
268         ff_dct_encode_init_x86(s);
269
270     if (CONFIG_H263_ENCODER)
271         ff_h263dsp_init(&s->h263dsp);
272     if (!s->dct_quantize)
273         s->dct_quantize = ff_dct_quantize_c;
274     if (!s->denoise_dct)
275         s->denoise_dct  = denoise_dct_c;
276     s->fast_dct_quantize = s->dct_quantize;
277     if (s->avctx->trellis)
278         s->dct_quantize  = dct_quantize_trellis_c;
279
280     return 0;
281 }
282
283 /* init video encoder */
284 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
285 {
286     MpegEncContext *s = avctx->priv_data;
287     int i, ret, format_supported;
288
289     mpv_encode_defaults(s);
290
291     switch (avctx->codec_id) {
292     case AV_CODEC_ID_MPEG2VIDEO:
293         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
294             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
295             av_log(avctx, AV_LOG_ERROR,
296                    "only YUV420 and YUV422 are supported\n");
297             return -1;
298         }
299         break;
300     case AV_CODEC_ID_MJPEG:
301     case AV_CODEC_ID_AMV:
302         format_supported = 0;
303         /* JPEG color space */
304         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
305             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
306             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
307             (avctx->color_range == AVCOL_RANGE_JPEG &&
308              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
309               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
310               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
311             format_supported = 1;
312         /* MPEG color space */
313         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
314                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
315                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
316                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
317             format_supported = 1;
318
319         if (!format_supported) {
320             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
321             return -1;
322         }
323         break;
324     default:
325         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
326             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
327             return -1;
328         }
329     }
330
331     switch (avctx->pix_fmt) {
332     case AV_PIX_FMT_YUVJ444P:
333     case AV_PIX_FMT_YUV444P:
334         s->chroma_format = CHROMA_444;
335         break;
336     case AV_PIX_FMT_YUVJ422P:
337     case AV_PIX_FMT_YUV422P:
338         s->chroma_format = CHROMA_422;
339         break;
340     case AV_PIX_FMT_YUVJ420P:
341     case AV_PIX_FMT_YUV420P:
342     default:
343         s->chroma_format = CHROMA_420;
344         break;
345     }
346
347     s->bit_rate = avctx->bit_rate;
348     s->width    = avctx->width;
349     s->height   = avctx->height;
350     if (avctx->gop_size > 600 &&
351         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
352         av_log(avctx, AV_LOG_WARNING,
353                "keyframe interval too large!, reducing it from %d to %d\n",
354                avctx->gop_size, 600);
355         avctx->gop_size = 600;
356     }
357     s->gop_size     = avctx->gop_size;
358     s->avctx        = avctx;
359     if (avctx->max_b_frames > MAX_B_FRAMES) {
360         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
361                "is %d.\n", MAX_B_FRAMES);
362         avctx->max_b_frames = MAX_B_FRAMES;
363     }
364     s->max_b_frames = avctx->max_b_frames;
365     s->codec_id     = avctx->codec->id;
366     s->strict_std_compliance = avctx->strict_std_compliance;
367     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
368     s->mpeg_quant         = avctx->mpeg_quant;
369     s->rtp_mode           = !!avctx->rtp_payload_size;
370     s->intra_dc_precision = avctx->intra_dc_precision;
371
372     // workaround some differences between how applications specify dc precision
373     if (s->intra_dc_precision < 0) {
374         s->intra_dc_precision += 8;
375     } else if (s->intra_dc_precision >= 8)
376         s->intra_dc_precision -= 8;
377
378     if (s->intra_dc_precision < 0) {
379         av_log(avctx, AV_LOG_ERROR,
380                 "intra dc precision must be positive, note some applications use"
381                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
382         return AVERROR(EINVAL);
383     }
384
385     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
386         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
387         return AVERROR(EINVAL);
388     }
389     s->user_specified_pts = AV_NOPTS_VALUE;
390
391     if (s->gop_size <= 1) {
392         s->intra_only = 1;
393         s->gop_size   = 12;
394     } else {
395         s->intra_only = 0;
396     }
397
398 #if FF_API_MOTION_EST
399 FF_DISABLE_DEPRECATION_WARNINGS
400     s->me_method = avctx->me_method;
401 FF_ENABLE_DEPRECATION_WARNINGS
402 #endif
403
404     /* Fixed QSCALE */
405     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
406
407 #if FF_API_MPV_OPT
408     FF_DISABLE_DEPRECATION_WARNINGS
409     if (avctx->border_masking != 0.0)
410         s->border_masking = avctx->border_masking;
411     FF_ENABLE_DEPRECATION_WARNINGS
412 #endif
413
414     s->adaptive_quant = (s->avctx->lumi_masking ||
415                          s->avctx->dark_masking ||
416                          s->avctx->temporal_cplx_masking ||
417                          s->avctx->spatial_cplx_masking  ||
418                          s->avctx->p_masking      ||
419                          s->border_masking ||
420                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
421                         !s->fixed_qscale;
422
423     s->loop_filter = !!(s->avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
424
425     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
426         switch(avctx->codec_id) {
427         case AV_CODEC_ID_MPEG1VIDEO:
428         case AV_CODEC_ID_MPEG2VIDEO:
429             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
430             break;
431         case AV_CODEC_ID_MPEG4:
432         case AV_CODEC_ID_MSMPEG4V1:
433         case AV_CODEC_ID_MSMPEG4V2:
434         case AV_CODEC_ID_MSMPEG4V3:
435             if       (avctx->rc_max_rate >= 15000000) {
436                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
437             } else if(avctx->rc_max_rate >=  2000000) {
438                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
439             } else if(avctx->rc_max_rate >=   384000) {
440                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
441             } else
442                 avctx->rc_buffer_size = 40;
443             avctx->rc_buffer_size *= 16384;
444             break;
445         }
446         if (avctx->rc_buffer_size) {
447             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
448         }
449     }
450
451     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
452         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
453         return -1;
454     }
455
456     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
457         av_log(avctx, AV_LOG_INFO,
458                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
459     }
460
461     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
462         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
463         return -1;
464     }
465
466     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
467         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
468         return -1;
469     }
470
471     if (avctx->rc_max_rate &&
472         avctx->rc_max_rate == avctx->bit_rate &&
473         avctx->rc_max_rate != avctx->rc_min_rate) {
474         av_log(avctx, AV_LOG_INFO,
475                "impossible bitrate constraints, this will fail\n");
476     }
477
478     if (avctx->rc_buffer_size &&
479         avctx->bit_rate * (int64_t)avctx->time_base.num >
480             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
481         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
482         return -1;
483     }
484
485     if (!s->fixed_qscale &&
486         avctx->bit_rate * av_q2d(avctx->time_base) >
487             avctx->bit_rate_tolerance) {
488         av_log(avctx, AV_LOG_WARNING,
489                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, (int64_t)avctx->bit_rate);
490         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
491     }
492
493     if (s->avctx->rc_max_rate &&
494         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
495         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
496          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
497         90000LL * (avctx->rc_buffer_size - 1) >
498             s->avctx->rc_max_rate * 0xFFFFLL) {
499         av_log(avctx, AV_LOG_INFO,
500                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
501                "specified vbv buffer is too large for the given bitrate!\n");
502     }
503
504     if ((s->avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
505         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
506         s->codec_id != AV_CODEC_ID_FLV1) {
507         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
508         return -1;
509     }
510
511     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
512         av_log(avctx, AV_LOG_ERROR,
513                "OBMC is only supported with simple mb decision\n");
514         return -1;
515     }
516
517     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
518         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
519         return -1;
520     }
521
522     if (s->max_b_frames                    &&
523         s->codec_id != AV_CODEC_ID_MPEG4      &&
524         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
525         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
526         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
527         return -1;
528     }
529     if (s->max_b_frames < 0) {
530         av_log(avctx, AV_LOG_ERROR,
531                "max b frames must be 0 or positive for mpegvideo based encoders\n");
532         return -1;
533     }
534
535     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
536          s->codec_id == AV_CODEC_ID_H263  ||
537          s->codec_id == AV_CODEC_ID_H263P) &&
538         (avctx->sample_aspect_ratio.num > 255 ||
539          avctx->sample_aspect_ratio.den > 255)) {
540         av_log(avctx, AV_LOG_WARNING,
541                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
542                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
543         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
544                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
545     }
546
547     if ((s->codec_id == AV_CODEC_ID_H263  ||
548          s->codec_id == AV_CODEC_ID_H263P) &&
549         (avctx->width  > 2048 ||
550          avctx->height > 1152 )) {
551         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
552         return -1;
553     }
554     if ((s->codec_id == AV_CODEC_ID_H263  ||
555          s->codec_id == AV_CODEC_ID_H263P) &&
556         ((avctx->width &3) ||
557          (avctx->height&3) )) {
558         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
559         return -1;
560     }
561
562     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
563         (avctx->width  > 4095 ||
564          avctx->height > 4095 )) {
565         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
566         return -1;
567     }
568
569     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
570         (avctx->width  > 16383 ||
571          avctx->height > 16383 )) {
572         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
573         return -1;
574     }
575
576     if (s->codec_id == AV_CODEC_ID_RV10 &&
577         (avctx->width &15 ||
578          avctx->height&15 )) {
579         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
580         return AVERROR(EINVAL);
581     }
582
583     if (s->codec_id == AV_CODEC_ID_RV20 &&
584         (avctx->width &3 ||
585          avctx->height&3 )) {
586         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
587         return AVERROR(EINVAL);
588     }
589
590     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
591          s->codec_id == AV_CODEC_ID_WMV2) &&
592          avctx->width & 1) {
593          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
594          return -1;
595     }
596
597     if ((s->avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
598         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
599         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
600         return -1;
601     }
602
603     // FIXME mpeg2 uses that too
604     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
605                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
606         av_log(avctx, AV_LOG_ERROR,
607                "mpeg2 style quantization not supported by codec\n");
608         return -1;
609     }
610
611     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
612         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
613         return -1;
614     }
615
616     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
617         s->avctx->mb_decision != FF_MB_DECISION_RD) {
618         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
619         return -1;
620     }
621
622     if (s->avctx->scenechange_threshold < 1000000000 &&
623         (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
624         av_log(avctx, AV_LOG_ERROR,
625                "closed gop with scene change detection are not supported yet, "
626                "set threshold to 1000000000\n");
627         return -1;
628     }
629
630     if (s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
631         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
632             av_log(avctx, AV_LOG_ERROR,
633                   "low delay forcing is only available for mpeg2\n");
634             return -1;
635         }
636         if (s->max_b_frames != 0) {
637             av_log(avctx, AV_LOG_ERROR,
638                    "b frames cannot be used with low delay\n");
639             return -1;
640         }
641     }
642
643     if (s->q_scale_type == 1) {
644         if (avctx->qmax > 28) {
645             av_log(avctx, AV_LOG_ERROR,
646                    "non linear quant only supports qmax <= 28 currently\n");
647             return -1;
648         }
649     }
650
651     if (s->avctx->thread_count > 1         &&
652         s->codec_id != AV_CODEC_ID_MPEG4      &&
653         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
654         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
655         s->codec_id != AV_CODEC_ID_MJPEG      &&
656         (s->codec_id != AV_CODEC_ID_H263P)) {
657         av_log(avctx, AV_LOG_ERROR,
658                "multi threaded encoding not supported by codec\n");
659         return -1;
660     }
661
662     if (s->avctx->thread_count < 1) {
663         av_log(avctx, AV_LOG_ERROR,
664                "automatic thread number detection not supported by codec, "
665                "patch welcome\n");
666         return -1;
667     }
668
669     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
670         s->rtp_mode = 1;
671
672     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
673         s->h263_slice_structured = 1;
674
675     if (!avctx->time_base.den || !avctx->time_base.num) {
676         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
677         return -1;
678     }
679
680     if (avctx->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
681         av_log(avctx, AV_LOG_INFO,
682                "notice: b_frame_strategy only affects the first pass\n");
683         avctx->b_frame_strategy = 0;
684     }
685
686     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
687     if (i > 1) {
688         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
689         avctx->time_base.den /= i;
690         avctx->time_base.num /= i;
691         //return -1;
692     }
693
694     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
695         // (a + x * 3 / 8) / x
696         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
697         s->inter_quant_bias = 0;
698     } else {
699         s->intra_quant_bias = 0;
700         // (a - x / 4) / x
701         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
702     }
703
704     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
705         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
706         return AVERROR(EINVAL);
707     }
708
709 #if FF_API_QUANT_BIAS
710 FF_DISABLE_DEPRECATION_WARNINGS
711     if (s->intra_quant_bias == FF_DEFAULT_QUANT_BIAS &&
712         avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
713         s->intra_quant_bias = avctx->intra_quant_bias;
714     if (s->inter_quant_bias == FF_DEFAULT_QUANT_BIAS &&
715         avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
716         s->inter_quant_bias = avctx->inter_quant_bias;
717 FF_ENABLE_DEPRECATION_WARNINGS
718 #endif
719
720     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
721
722     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
723         s->avctx->time_base.den > (1 << 16) - 1) {
724         av_log(avctx, AV_LOG_ERROR,
725                "timebase %d/%d not supported by MPEG 4 standard, "
726                "the maximum admitted value for the timebase denominator "
727                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
728                (1 << 16) - 1);
729         return -1;
730     }
731     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
732
733     switch (avctx->codec->id) {
734     case AV_CODEC_ID_MPEG1VIDEO:
735         s->out_format = FMT_MPEG1;
736         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
737         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
738         break;
739     case AV_CODEC_ID_MPEG2VIDEO:
740         s->out_format = FMT_MPEG1;
741         s->low_delay  = !!(s->avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
742         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
743         s->rtp_mode   = 1;
744         break;
745     case AV_CODEC_ID_MJPEG:
746     case AV_CODEC_ID_AMV:
747         s->out_format = FMT_MJPEG;
748         s->intra_only = 1; /* force intra only for jpeg */
749         if (!CONFIG_MJPEG_ENCODER ||
750             ff_mjpeg_encode_init(s) < 0)
751             return -1;
752         avctx->delay = 0;
753         s->low_delay = 1;
754         break;
755     case AV_CODEC_ID_H261:
756         if (!CONFIG_H261_ENCODER)
757             return -1;
758         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
759             av_log(avctx, AV_LOG_ERROR,
760                    "The specified picture size of %dx%d is not valid for the "
761                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
762                     s->width, s->height);
763             return -1;
764         }
765         s->out_format = FMT_H261;
766         avctx->delay  = 0;
767         s->low_delay  = 1;
768         s->rtp_mode   = 0; /* Sliced encoding not supported */
769         break;
770     case AV_CODEC_ID_H263:
771         if (!CONFIG_H263_ENCODER)
772             return -1;
773         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
774                              s->width, s->height) == 8) {
775             av_log(avctx, AV_LOG_ERROR,
776                    "The specified picture size of %dx%d is not valid for "
777                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
778                    "352x288, 704x576, and 1408x1152. "
779                    "Try H.263+.\n", s->width, s->height);
780             return -1;
781         }
782         s->out_format = FMT_H263;
783         avctx->delay  = 0;
784         s->low_delay  = 1;
785         break;
786     case AV_CODEC_ID_H263P:
787         s->out_format = FMT_H263;
788         s->h263_plus  = 1;
789         /* Fx */
790         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
791         s->modified_quant  = s->h263_aic;
792         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
793         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
794
795         /* /Fx */
796         /* These are just to be sure */
797         avctx->delay = 0;
798         s->low_delay = 1;
799         break;
800     case AV_CODEC_ID_FLV1:
801         s->out_format      = FMT_H263;
802         s->h263_flv        = 2; /* format = 1; 11-bit codes */
803         s->unrestricted_mv = 1;
804         s->rtp_mode  = 0; /* don't allow GOB */
805         avctx->delay = 0;
806         s->low_delay = 1;
807         break;
808     case AV_CODEC_ID_RV10:
809         s->out_format = FMT_H263;
810         avctx->delay  = 0;
811         s->low_delay  = 1;
812         break;
813     case AV_CODEC_ID_RV20:
814         s->out_format      = FMT_H263;
815         avctx->delay       = 0;
816         s->low_delay       = 1;
817         s->modified_quant  = 1;
818         s->h263_aic        = 1;
819         s->h263_plus       = 1;
820         s->loop_filter     = 1;
821         s->unrestricted_mv = 0;
822         break;
823     case AV_CODEC_ID_MPEG4:
824         s->out_format      = FMT_H263;
825         s->h263_pred       = 1;
826         s->unrestricted_mv = 1;
827         s->low_delay       = s->max_b_frames ? 0 : 1;
828         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
829         break;
830     case AV_CODEC_ID_MSMPEG4V2:
831         s->out_format      = FMT_H263;
832         s->h263_pred       = 1;
833         s->unrestricted_mv = 1;
834         s->msmpeg4_version = 2;
835         avctx->delay       = 0;
836         s->low_delay       = 1;
837         break;
838     case AV_CODEC_ID_MSMPEG4V3:
839         s->out_format        = FMT_H263;
840         s->h263_pred         = 1;
841         s->unrestricted_mv   = 1;
842         s->msmpeg4_version   = 3;
843         s->flipflop_rounding = 1;
844         avctx->delay         = 0;
845         s->low_delay         = 1;
846         break;
847     case AV_CODEC_ID_WMV1:
848         s->out_format        = FMT_H263;
849         s->h263_pred         = 1;
850         s->unrestricted_mv   = 1;
851         s->msmpeg4_version   = 4;
852         s->flipflop_rounding = 1;
853         avctx->delay         = 0;
854         s->low_delay         = 1;
855         break;
856     case AV_CODEC_ID_WMV2:
857         s->out_format        = FMT_H263;
858         s->h263_pred         = 1;
859         s->unrestricted_mv   = 1;
860         s->msmpeg4_version   = 5;
861         s->flipflop_rounding = 1;
862         avctx->delay         = 0;
863         s->low_delay         = 1;
864         break;
865     default:
866         return -1;
867     }
868
869     avctx->has_b_frames = !s->low_delay;
870
871     s->encoding = 1;
872
873     s->progressive_frame    =
874     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
875                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
876                                 s->alternate_scan);
877
878     /* init */
879     ff_mpv_idct_init(s);
880     if (ff_mpv_common_init(s) < 0)
881         return -1;
882
883     ff_fdctdsp_init(&s->fdsp, avctx);
884     ff_me_cmp_init(&s->mecc, avctx);
885     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
886     ff_pixblockdsp_init(&s->pdsp, avctx);
887     ff_qpeldsp_init(&s->qdsp);
888
889     if (s->msmpeg4_version) {
890         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
891                           2 * 2 * (MAX_LEVEL + 1) *
892                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
893     }
894     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
895
896     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
897     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
898     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
899     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
900     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
901     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
902     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
903                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
904     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
905                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
906
907     if (s->avctx->noise_reduction) {
908         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
909                           2 * 64 * sizeof(uint16_t), fail);
910     }
911
912     ff_dct_encode_init(s);
913
914     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
915         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
916
917     s->quant_precision = 5;
918
919     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
920     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
921
922     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
923         ff_h261_encode_init(s);
924     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
925         ff_h263_encode_init(s);
926     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
927         if ((ret = ff_msmpeg4_encode_init(s)) < 0)
928             return ret;
929     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
930         && s->out_format == FMT_MPEG1)
931         ff_mpeg1_encode_init(s);
932
933     /* init q matrix */
934     for (i = 0; i < 64; i++) {
935         int j = s->idsp.idct_permutation[i];
936         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
937             s->mpeg_quant) {
938             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
939             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
940         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
941             s->intra_matrix[j] =
942             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
943         } else {
944             /* mpeg1/2 */
945             s->chroma_intra_matrix[j] =
946             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
947             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
948         }
949         if (s->avctx->intra_matrix)
950             s->intra_matrix[j] = s->avctx->intra_matrix[i];
951         if (s->avctx->inter_matrix)
952             s->inter_matrix[j] = s->avctx->inter_matrix[i];
953     }
954
955     /* precompute matrix */
956     /* for mjpeg, we do include qscale in the matrix */
957     if (s->out_format != FMT_MJPEG) {
958         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
959                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
960                           31, 1);
961         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
962                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
963                           31, 0);
964     }
965
966     if (ff_rate_control_init(s) < 0)
967         return -1;
968
969 #if FF_API_ERROR_RATE
970     FF_DISABLE_DEPRECATION_WARNINGS
971     if (avctx->error_rate)
972         s->error_rate = avctx->error_rate;
973     FF_ENABLE_DEPRECATION_WARNINGS;
974 #endif
975
976 #if FF_API_NORMALIZE_AQP
977     FF_DISABLE_DEPRECATION_WARNINGS
978     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
979         s->mpv_flags |= FF_MPV_FLAG_NAQ;
980     FF_ENABLE_DEPRECATION_WARNINGS;
981 #endif
982
983 #if FF_API_MV0
984     FF_DISABLE_DEPRECATION_WARNINGS
985     if (avctx->flags & CODEC_FLAG_MV0)
986         s->mpv_flags |= FF_MPV_FLAG_MV0;
987     FF_ENABLE_DEPRECATION_WARNINGS
988 #endif
989
990 #if FF_API_MPV_OPT
991     FF_DISABLE_DEPRECATION_WARNINGS
992     if (avctx->rc_qsquish != 0.0)
993         s->rc_qsquish = avctx->rc_qsquish;
994     if (avctx->rc_qmod_amp != 0.0)
995         s->rc_qmod_amp = avctx->rc_qmod_amp;
996     if (avctx->rc_qmod_freq)
997         s->rc_qmod_freq = avctx->rc_qmod_freq;
998     if (avctx->rc_buffer_aggressivity != 1.0)
999         s->rc_buffer_aggressivity = avctx->rc_buffer_aggressivity;
1000     if (avctx->rc_initial_cplx != 0.0)
1001         s->rc_initial_cplx = avctx->rc_initial_cplx;
1002     if (avctx->lmin)
1003         s->lmin = avctx->lmin;
1004     if (avctx->lmax)
1005         s->lmax = avctx->lmax;
1006
1007     if (avctx->rc_eq) {
1008         av_freep(&s->rc_eq);
1009         s->rc_eq = av_strdup(avctx->rc_eq);
1010         if (!s->rc_eq)
1011             return AVERROR(ENOMEM);
1012     }
1013     FF_ENABLE_DEPRECATION_WARNINGS
1014 #endif
1015
1016     if (avctx->b_frame_strategy == 2) {
1017         for (i = 0; i < s->max_b_frames + 2; i++) {
1018             s->tmp_frames[i] = av_frame_alloc();
1019             if (!s->tmp_frames[i])
1020                 return AVERROR(ENOMEM);
1021
1022             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
1023             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
1024             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
1025
1026             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
1027             if (ret < 0)
1028                 return ret;
1029         }
1030     }
1031
1032     return 0;
1033 fail:
1034     ff_mpv_encode_end(avctx);
1035     return AVERROR_UNKNOWN;
1036 }
1037
1038 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1039 {
1040     MpegEncContext *s = avctx->priv_data;
1041     int i;
1042
1043     ff_rate_control_uninit(s);
1044
1045     ff_mpv_common_end(s);
1046     if (CONFIG_MJPEG_ENCODER &&
1047         s->out_format == FMT_MJPEG)
1048         ff_mjpeg_encode_close(s);
1049
1050     av_freep(&avctx->extradata);
1051
1052     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1053         av_frame_free(&s->tmp_frames[i]);
1054
1055     ff_free_picture_tables(&s->new_picture);
1056     ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1057
1058     av_freep(&s->avctx->stats_out);
1059     av_freep(&s->ac_stats);
1060
1061     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1062     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1063     s->q_chroma_intra_matrix=   NULL;
1064     s->q_chroma_intra_matrix16= NULL;
1065     av_freep(&s->q_intra_matrix);
1066     av_freep(&s->q_inter_matrix);
1067     av_freep(&s->q_intra_matrix16);
1068     av_freep(&s->q_inter_matrix16);
1069     av_freep(&s->input_picture);
1070     av_freep(&s->reordered_input_picture);
1071     av_freep(&s->dct_offset);
1072
1073     return 0;
1074 }
1075
1076 static int get_sae(uint8_t *src, int ref, int stride)
1077 {
1078     int x,y;
1079     int acc = 0;
1080
1081     for (y = 0; y < 16; y++) {
1082         for (x = 0; x < 16; x++) {
1083             acc += FFABS(src[x + y * stride] - ref);
1084         }
1085     }
1086
1087     return acc;
1088 }
1089
1090 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1091                            uint8_t *ref, int stride)
1092 {
1093     int x, y, w, h;
1094     int acc = 0;
1095
1096     w = s->width  & ~15;
1097     h = s->height & ~15;
1098
1099     for (y = 0; y < h; y += 16) {
1100         for (x = 0; x < w; x += 16) {
1101             int offset = x + y * stride;
1102             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1103                                       stride, 16);
1104             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1105             int sae  = get_sae(src + offset, mean, stride);
1106
1107             acc += sae + 500 < sad;
1108         }
1109     }
1110     return acc;
1111 }
1112
1113 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared)
1114 {
1115     return ff_alloc_picture(s->avctx, pic, &s->me, &s->sc, shared, 1,
1116                             s->chroma_x_shift, s->chroma_y_shift, s->out_format,
1117                             s->mb_stride, s->mb_width, s->mb_height, s->b8_stride,
1118                             &s->linesize, &s->uvlinesize);
1119 }
1120
1121 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1122 {
1123     Picture *pic = NULL;
1124     int64_t pts;
1125     int i, display_picture_number = 0, ret;
1126     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1127                                                  (s->low_delay ? 0 : 1);
1128     int direct = 1;
1129
1130     if (pic_arg) {
1131         pts = pic_arg->pts;
1132         display_picture_number = s->input_picture_number++;
1133
1134         if (pts != AV_NOPTS_VALUE) {
1135             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1136                 int64_t last = s->user_specified_pts;
1137
1138                 if (pts <= last) {
1139                     av_log(s->avctx, AV_LOG_ERROR,
1140                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1141                            pts, last);
1142                     return AVERROR(EINVAL);
1143                 }
1144
1145                 if (!s->low_delay && display_picture_number == 1)
1146                     s->dts_delta = pts - last;
1147             }
1148             s->user_specified_pts = pts;
1149         } else {
1150             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1151                 s->user_specified_pts =
1152                 pts = s->user_specified_pts + 1;
1153                 av_log(s->avctx, AV_LOG_INFO,
1154                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1155                        pts);
1156             } else {
1157                 pts = display_picture_number;
1158             }
1159         }
1160     }
1161
1162     if (pic_arg) {
1163         if (!pic_arg->buf[0] ||
1164             pic_arg->linesize[0] != s->linesize ||
1165             pic_arg->linesize[1] != s->uvlinesize ||
1166             pic_arg->linesize[2] != s->uvlinesize)
1167             direct = 0;
1168         if ((s->width & 15) || (s->height & 15))
1169             direct = 0;
1170         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1171             direct = 0;
1172         if (s->linesize & (STRIDE_ALIGN-1))
1173             direct = 0;
1174
1175         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1176                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1177
1178         i = ff_find_unused_picture(s->avctx, s->picture, direct);
1179         if (i < 0)
1180             return i;
1181
1182         pic = &s->picture[i];
1183         pic->reference = 3;
1184
1185         if (direct) {
1186             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1187                 return ret;
1188         }
1189         ret = alloc_picture(s, pic, direct);
1190         if (ret < 0)
1191             return ret;
1192
1193         if (!direct) {
1194             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1195                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1196                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1197                 // empty
1198             } else {
1199                 int h_chroma_shift, v_chroma_shift;
1200                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1201                                                  &h_chroma_shift,
1202                                                  &v_chroma_shift);
1203
1204                 for (i = 0; i < 3; i++) {
1205                     int src_stride = pic_arg->linesize[i];
1206                     int dst_stride = i ? s->uvlinesize : s->linesize;
1207                     int h_shift = i ? h_chroma_shift : 0;
1208                     int v_shift = i ? v_chroma_shift : 0;
1209                     int w = s->width  >> h_shift;
1210                     int h = s->height >> v_shift;
1211                     uint8_t *src = pic_arg->data[i];
1212                     uint8_t *dst = pic->f->data[i];
1213                     int vpad = 16;
1214
1215                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1216                         && !s->progressive_sequence
1217                         && FFALIGN(s->height, 32) - s->height > 16)
1218                         vpad = 32;
1219
1220                     if (!s->avctx->rc_buffer_size)
1221                         dst += INPLACE_OFFSET;
1222
1223                     if (src_stride == dst_stride)
1224                         memcpy(dst, src, src_stride * h);
1225                     else {
1226                         int h2 = h;
1227                         uint8_t *dst2 = dst;
1228                         while (h2--) {
1229                             memcpy(dst2, src, w);
1230                             dst2 += dst_stride;
1231                             src += src_stride;
1232                         }
1233                     }
1234                     if ((s->width & 15) || (s->height & (vpad-1))) {
1235                         s->mpvencdsp.draw_edges(dst, dst_stride,
1236                                                 w, h,
1237                                                 16 >> h_shift,
1238                                                 vpad >> v_shift,
1239                                                 EDGE_BOTTOM);
1240                     }
1241                 }
1242             }
1243         }
1244         ret = av_frame_copy_props(pic->f, pic_arg);
1245         if (ret < 0)
1246             return ret;
1247
1248         pic->f->display_picture_number = display_picture_number;
1249         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1250     }
1251
1252     /* shift buffer entries */
1253     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1254         s->input_picture[i - 1] = s->input_picture[i];
1255
1256     s->input_picture[encoding_delay] = (Picture*) pic;
1257
1258     return 0;
1259 }
1260
1261 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1262 {
1263     int x, y, plane;
1264     int score = 0;
1265     int64_t score64 = 0;
1266
1267     for (plane = 0; plane < 3; plane++) {
1268         const int stride = p->f->linesize[plane];
1269         const int bw = plane ? 1 : 2;
1270         for (y = 0; y < s->mb_height * bw; y++) {
1271             for (x = 0; x < s->mb_width * bw; x++) {
1272                 int off = p->shared ? 0 : 16;
1273                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1274                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1275                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1276
1277                 switch (FFABS(s->avctx->frame_skip_exp)) {
1278                 case 0: score    =  FFMAX(score, v);          break;
1279                 case 1: score   += FFABS(v);                  break;
1280                 case 2: score64 += v * (int64_t)v;                       break;
1281                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1282                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1283                 }
1284             }
1285         }
1286     }
1287     emms_c();
1288
1289     if (score)
1290         score64 = score;
1291     if (s->avctx->frame_skip_exp < 0)
1292         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1293                       -1.0/s->avctx->frame_skip_exp);
1294
1295     if (score64 < s->avctx->frame_skip_threshold)
1296         return 1;
1297     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1298         return 1;
1299     return 0;
1300 }
1301
1302 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1303 {
1304     AVPacket pkt = { 0 };
1305     int ret, got_output;
1306
1307     av_init_packet(&pkt);
1308     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1309     if (ret < 0)
1310         return ret;
1311
1312     ret = pkt.size;
1313     av_free_packet(&pkt);
1314     return ret;
1315 }
1316
1317 static int estimate_best_b_count(MpegEncContext *s)
1318 {
1319     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1320     AVCodecContext *c = avcodec_alloc_context3(NULL);
1321     const int scale = s->avctx->brd_scale;
1322     int i, j, out_size, p_lambda, b_lambda, lambda2;
1323     int64_t best_rd  = INT64_MAX;
1324     int best_b_count = -1;
1325
1326     if (!c)
1327         return AVERROR(ENOMEM);
1328     av_assert0(scale >= 0 && scale <= 3);
1329
1330     //emms_c();
1331     //s->next_picture_ptr->quality;
1332     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1333     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1334     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1335     if (!b_lambda) // FIXME we should do this somewhere else
1336         b_lambda = p_lambda;
1337     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1338                FF_LAMBDA_SHIFT;
1339
1340     c->width        = s->width  >> scale;
1341     c->height       = s->height >> scale;
1342     c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1343     c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1344     c->mb_decision  = s->avctx->mb_decision;
1345     c->me_cmp       = s->avctx->me_cmp;
1346     c->mb_cmp       = s->avctx->mb_cmp;
1347     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1348     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1349     c->time_base    = s->avctx->time_base;
1350     c->max_b_frames = s->max_b_frames;
1351
1352     if (avcodec_open2(c, codec, NULL) < 0)
1353         return -1;
1354
1355     for (i = 0; i < s->max_b_frames + 2; i++) {
1356         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1357                                                 s->next_picture_ptr;
1358         uint8_t *data[4];
1359
1360         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1361             pre_input = *pre_input_ptr;
1362             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1363
1364             if (!pre_input.shared && i) {
1365                 data[0] += INPLACE_OFFSET;
1366                 data[1] += INPLACE_OFFSET;
1367                 data[2] += INPLACE_OFFSET;
1368             }
1369
1370             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1371                                        s->tmp_frames[i]->linesize[0],
1372                                        data[0],
1373                                        pre_input.f->linesize[0],
1374                                        c->width, c->height);
1375             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1376                                        s->tmp_frames[i]->linesize[1],
1377                                        data[1],
1378                                        pre_input.f->linesize[1],
1379                                        c->width >> 1, c->height >> 1);
1380             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1381                                        s->tmp_frames[i]->linesize[2],
1382                                        data[2],
1383                                        pre_input.f->linesize[2],
1384                                        c->width >> 1, c->height >> 1);
1385         }
1386     }
1387
1388     for (j = 0; j < s->max_b_frames + 1; j++) {
1389         int64_t rd = 0;
1390
1391         if (!s->input_picture[j])
1392             break;
1393
1394         c->error[0] = c->error[1] = c->error[2] = 0;
1395
1396         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1397         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1398
1399         out_size = encode_frame(c, s->tmp_frames[0]);
1400
1401         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1402
1403         for (i = 0; i < s->max_b_frames + 1; i++) {
1404             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1405
1406             s->tmp_frames[i + 1]->pict_type = is_p ?
1407                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1408             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1409
1410             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1411
1412             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1413         }
1414
1415         /* get the delayed frames */
1416         while (out_size) {
1417             out_size = encode_frame(c, NULL);
1418             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1419         }
1420
1421         rd += c->error[0] + c->error[1] + c->error[2];
1422
1423         if (rd < best_rd) {
1424             best_rd = rd;
1425             best_b_count = j;
1426         }
1427     }
1428
1429     avcodec_close(c);
1430     av_freep(&c);
1431
1432     return best_b_count;
1433 }
1434
1435 static int select_input_picture(MpegEncContext *s)
1436 {
1437     int i, ret;
1438
1439     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1440         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1441     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1442
1443     /* set next picture type & ordering */
1444     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1445         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1446             if (s->picture_in_gop_number < s->gop_size &&
1447                 s->next_picture_ptr &&
1448                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1449                 // FIXME check that te gop check above is +-1 correct
1450                 av_frame_unref(s->input_picture[0]->f);
1451
1452                 ff_vbv_update(s, 0);
1453
1454                 goto no_output_pic;
1455             }
1456         }
1457
1458         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1459             !s->next_picture_ptr || s->intra_only) {
1460             s->reordered_input_picture[0] = s->input_picture[0];
1461             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1462             s->reordered_input_picture[0]->f->coded_picture_number =
1463                 s->coded_picture_number++;
1464         } else {
1465             int b_frames;
1466
1467             if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1468                 for (i = 0; i < s->max_b_frames + 1; i++) {
1469                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1470
1471                     if (pict_num >= s->rc_context.num_entries)
1472                         break;
1473                     if (!s->input_picture[i]) {
1474                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1475                         break;
1476                     }
1477
1478                     s->input_picture[i]->f->pict_type =
1479                         s->rc_context.entry[pict_num].new_pict_type;
1480                 }
1481             }
1482
1483             if (s->avctx->b_frame_strategy == 0) {
1484                 b_frames = s->max_b_frames;
1485                 while (b_frames && !s->input_picture[b_frames])
1486                     b_frames--;
1487             } else if (s->avctx->b_frame_strategy == 1) {
1488                 for (i = 1; i < s->max_b_frames + 1; i++) {
1489                     if (s->input_picture[i] &&
1490                         s->input_picture[i]->b_frame_score == 0) {
1491                         s->input_picture[i]->b_frame_score =
1492                             get_intra_count(s,
1493                                             s->input_picture[i    ]->f->data[0],
1494                                             s->input_picture[i - 1]->f->data[0],
1495                                             s->linesize) + 1;
1496                     }
1497                 }
1498                 for (i = 0; i < s->max_b_frames + 1; i++) {
1499                     if (!s->input_picture[i] ||
1500                         s->input_picture[i]->b_frame_score - 1 >
1501                             s->mb_num / s->avctx->b_sensitivity)
1502                         break;
1503                 }
1504
1505                 b_frames = FFMAX(0, i - 1);
1506
1507                 /* reset scores */
1508                 for (i = 0; i < b_frames + 1; i++) {
1509                     s->input_picture[i]->b_frame_score = 0;
1510                 }
1511             } else if (s->avctx->b_frame_strategy == 2) {
1512                 b_frames = estimate_best_b_count(s);
1513             } else {
1514                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1515                 b_frames = 0;
1516             }
1517
1518             emms_c();
1519
1520             for (i = b_frames - 1; i >= 0; i--) {
1521                 int type = s->input_picture[i]->f->pict_type;
1522                 if (type && type != AV_PICTURE_TYPE_B)
1523                     b_frames = i;
1524             }
1525             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1526                 b_frames == s->max_b_frames) {
1527                 av_log(s->avctx, AV_LOG_ERROR,
1528                        "warning, too many b frames in a row\n");
1529             }
1530
1531             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1532                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1533                     s->gop_size > s->picture_in_gop_number) {
1534                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1535                 } else {
1536                     if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1537                         b_frames = 0;
1538                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1539                 }
1540             }
1541
1542             if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1543                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1544                 b_frames--;
1545
1546             s->reordered_input_picture[0] = s->input_picture[b_frames];
1547             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1548                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1549             s->reordered_input_picture[0]->f->coded_picture_number =
1550                 s->coded_picture_number++;
1551             for (i = 0; i < b_frames; i++) {
1552                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1553                 s->reordered_input_picture[i + 1]->f->pict_type =
1554                     AV_PICTURE_TYPE_B;
1555                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1556                     s->coded_picture_number++;
1557             }
1558         }
1559     }
1560 no_output_pic:
1561     if (s->reordered_input_picture[0]) {
1562         s->reordered_input_picture[0]->reference =
1563            s->reordered_input_picture[0]->f->pict_type !=
1564                AV_PICTURE_TYPE_B ? 3 : 0;
1565
1566         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1567         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->new_picture, s->reordered_input_picture[0])))
1568             return ret;
1569
1570         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1571             // input is a shared pix, so we can't modifiy it -> alloc a new
1572             // one & ensure that the shared one is reuseable
1573
1574             Picture *pic;
1575             int i = ff_find_unused_picture(s->avctx, s->picture, 0);
1576             if (i < 0)
1577                 return i;
1578             pic = &s->picture[i];
1579
1580             pic->reference = s->reordered_input_picture[0]->reference;
1581             if (alloc_picture(s, pic, 0) < 0) {
1582                 return -1;
1583             }
1584
1585             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1586             if (ret < 0)
1587                 return ret;
1588
1589             /* mark us unused / free shared pic */
1590             av_frame_unref(s->reordered_input_picture[0]->f);
1591             s->reordered_input_picture[0]->shared = 0;
1592
1593             s->current_picture_ptr = pic;
1594         } else {
1595             // input is not a shared pix -> reuse buffer for current_pix
1596             s->current_picture_ptr = s->reordered_input_picture[0];
1597             for (i = 0; i < 4; i++) {
1598                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1599             }
1600         }
1601         ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1602         if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1603                                        s->current_picture_ptr)) < 0)
1604             return ret;
1605
1606         s->picture_number = s->new_picture.f->display_picture_number;
1607     } else {
1608         ff_mpeg_unref_picture(s->avctx, &s->new_picture);
1609     }
1610     return 0;
1611 }
1612
1613 static void frame_end(MpegEncContext *s)
1614 {
1615     if (s->unrestricted_mv &&
1616         s->current_picture.reference &&
1617         !s->intra_only) {
1618         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1619         int hshift = desc->log2_chroma_w;
1620         int vshift = desc->log2_chroma_h;
1621         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1622                                 s->current_picture.f->linesize[0],
1623                                 s->h_edge_pos, s->v_edge_pos,
1624                                 EDGE_WIDTH, EDGE_WIDTH,
1625                                 EDGE_TOP | EDGE_BOTTOM);
1626         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1627                                 s->current_picture.f->linesize[1],
1628                                 s->h_edge_pos >> hshift,
1629                                 s->v_edge_pos >> vshift,
1630                                 EDGE_WIDTH >> hshift,
1631                                 EDGE_WIDTH >> vshift,
1632                                 EDGE_TOP | EDGE_BOTTOM);
1633         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1634                                 s->current_picture.f->linesize[2],
1635                                 s->h_edge_pos >> hshift,
1636                                 s->v_edge_pos >> vshift,
1637                                 EDGE_WIDTH >> hshift,
1638                                 EDGE_WIDTH >> vshift,
1639                                 EDGE_TOP | EDGE_BOTTOM);
1640     }
1641
1642     emms_c();
1643
1644     s->last_pict_type                 = s->pict_type;
1645     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1646     if (s->pict_type!= AV_PICTURE_TYPE_B)
1647         s->last_non_b_pict_type = s->pict_type;
1648
1649 #if FF_API_CODED_FRAME
1650 FF_DISABLE_DEPRECATION_WARNINGS
1651     av_frame_copy_props(s->avctx->coded_frame, s->current_picture.f);
1652 FF_ENABLE_DEPRECATION_WARNINGS
1653 #endif
1654 }
1655
1656 static void update_noise_reduction(MpegEncContext *s)
1657 {
1658     int intra, i;
1659
1660     for (intra = 0; intra < 2; intra++) {
1661         if (s->dct_count[intra] > (1 << 16)) {
1662             for (i = 0; i < 64; i++) {
1663                 s->dct_error_sum[intra][i] >>= 1;
1664             }
1665             s->dct_count[intra] >>= 1;
1666         }
1667
1668         for (i = 0; i < 64; i++) {
1669             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1670                                        s->dct_count[intra] +
1671                                        s->dct_error_sum[intra][i] / 2) /
1672                                       (s->dct_error_sum[intra][i] + 1);
1673         }
1674     }
1675 }
1676
1677 static int frame_start(MpegEncContext *s)
1678 {
1679     int ret;
1680
1681     /* mark & release old frames */
1682     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1683         s->last_picture_ptr != s->next_picture_ptr &&
1684         s->last_picture_ptr->f->buf[0]) {
1685         ff_mpeg_unref_picture(s->avctx, s->last_picture_ptr);
1686     }
1687
1688     s->current_picture_ptr->f->pict_type = s->pict_type;
1689     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1690
1691     ff_mpeg_unref_picture(s->avctx, &s->current_picture);
1692     if ((ret = ff_mpeg_ref_picture(s->avctx, &s->current_picture,
1693                                    s->current_picture_ptr)) < 0)
1694         return ret;
1695
1696     if (s->pict_type != AV_PICTURE_TYPE_B) {
1697         s->last_picture_ptr = s->next_picture_ptr;
1698         if (!s->droppable)
1699             s->next_picture_ptr = s->current_picture_ptr;
1700     }
1701
1702     if (s->last_picture_ptr) {
1703         ff_mpeg_unref_picture(s->avctx, &s->last_picture);
1704         if (s->last_picture_ptr->f->buf[0] &&
1705             (ret = ff_mpeg_ref_picture(s->avctx, &s->last_picture,
1706                                        s->last_picture_ptr)) < 0)
1707             return ret;
1708     }
1709     if (s->next_picture_ptr) {
1710         ff_mpeg_unref_picture(s->avctx, &s->next_picture);
1711         if (s->next_picture_ptr->f->buf[0] &&
1712             (ret = ff_mpeg_ref_picture(s->avctx, &s->next_picture,
1713                                        s->next_picture_ptr)) < 0)
1714             return ret;
1715     }
1716
1717     if (s->picture_structure!= PICT_FRAME) {
1718         int i;
1719         for (i = 0; i < 4; i++) {
1720             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1721                 s->current_picture.f->data[i] +=
1722                     s->current_picture.f->linesize[i];
1723             }
1724             s->current_picture.f->linesize[i] *= 2;
1725             s->last_picture.f->linesize[i]    *= 2;
1726             s->next_picture.f->linesize[i]    *= 2;
1727         }
1728     }
1729
1730     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1731         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1732         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1733     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1734         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1735         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1736     } else {
1737         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1738         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1739     }
1740
1741     if (s->dct_error_sum) {
1742         av_assert2(s->avctx->noise_reduction && s->encoding);
1743         update_noise_reduction(s);
1744     }
1745
1746     return 0;
1747 }
1748
1749 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1750                           const AVFrame *pic_arg, int *got_packet)
1751 {
1752     MpegEncContext *s = avctx->priv_data;
1753     int i, stuffing_count, ret;
1754     int context_count = s->slice_context_count;
1755
1756     s->vbv_ignore_qmax = 0;
1757
1758     s->picture_in_gop_number++;
1759
1760     if (load_input_picture(s, pic_arg) < 0)
1761         return -1;
1762
1763     if (select_input_picture(s) < 0) {
1764         return -1;
1765     }
1766
1767     /* output? */
1768     if (s->new_picture.f->data[0]) {
1769         int growing_buffer = context_count == 1 && !pkt->data && !s->data_partitioning;
1770         int pkt_size = growing_buffer ? FFMAX(s->mb_width*s->mb_height*64+10000, avctx->internal->byte_buffer_size) - AV_INPUT_BUFFER_PADDING_SIZE
1771                                               :
1772                                               s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000;
1773         if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size, 0)) < 0)
1774             return ret;
1775         if (s->mb_info) {
1776             s->mb_info_ptr = av_packet_new_side_data(pkt,
1777                                  AV_PKT_DATA_H263_MB_INFO,
1778                                  s->mb_width*s->mb_height*12);
1779             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1780         }
1781
1782         for (i = 0; i < context_count; i++) {
1783             int start_y = s->thread_context[i]->start_mb_y;
1784             int   end_y = s->thread_context[i]->  end_mb_y;
1785             int h       = s->mb_height;
1786             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1787             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1788
1789             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1790         }
1791
1792         s->pict_type = s->new_picture.f->pict_type;
1793         //emms_c();
1794         ret = frame_start(s);
1795         if (ret < 0)
1796             return ret;
1797 vbv_retry:
1798         ret = encode_picture(s, s->picture_number);
1799         if (growing_buffer) {
1800             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1801             pkt->data = s->pb.buf;
1802             pkt->size = avctx->internal->byte_buffer_size;
1803         }
1804         if (ret < 0)
1805             return -1;
1806
1807         avctx->header_bits = s->header_bits;
1808         avctx->mv_bits     = s->mv_bits;
1809         avctx->misc_bits   = s->misc_bits;
1810         avctx->i_tex_bits  = s->i_tex_bits;
1811         avctx->p_tex_bits  = s->p_tex_bits;
1812         avctx->i_count     = s->i_count;
1813         // FIXME f/b_count in avctx
1814         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1815         avctx->skip_count  = s->skip_count;
1816
1817         frame_end(s);
1818
1819         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1820             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1821
1822         if (avctx->rc_buffer_size) {
1823             RateControlContext *rcc = &s->rc_context;
1824             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1825
1826             if (put_bits_count(&s->pb) > max_size &&
1827                 s->lambda < s->lmax) {
1828                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1829                                        (s->qscale + 1) / s->qscale);
1830                 if (s->adaptive_quant) {
1831                     int i;
1832                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1833                         s->lambda_table[i] =
1834                             FFMAX(s->lambda_table[i] + 1,
1835                                   s->lambda_table[i] * (s->qscale + 1) /
1836                                   s->qscale);
1837                 }
1838                 s->mb_skipped = 0;        // done in frame_start()
1839                 // done in encode_picture() so we must undo it
1840                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1841                     if (s->flipflop_rounding          ||
1842                         s->codec_id == AV_CODEC_ID_H263P ||
1843                         s->codec_id == AV_CODEC_ID_MPEG4)
1844                         s->no_rounding ^= 1;
1845                 }
1846                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1847                     s->time_base       = s->last_time_base;
1848                     s->last_non_b_time = s->time - s->pp_time;
1849                 }
1850                 for (i = 0; i < context_count; i++) {
1851                     PutBitContext *pb = &s->thread_context[i]->pb;
1852                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1853                 }
1854                 s->vbv_ignore_qmax = 1;
1855                 av_log(s->avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1856                 goto vbv_retry;
1857             }
1858
1859             av_assert0(s->avctx->rc_max_rate);
1860         }
1861
1862         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1863             ff_write_pass1_stats(s);
1864
1865         for (i = 0; i < 4; i++) {
1866             s->current_picture_ptr->f->error[i] =
1867             s->current_picture.f->error[i] =
1868                 s->current_picture.error[i];
1869             avctx->error[i] += s->current_picture_ptr->f->error[i];
1870         }
1871         ff_side_data_set_encoder_stats(pkt, s->current_picture.f->quality,
1872                                        s->current_picture_ptr->f->error,
1873                                        (s->avctx->flags&AV_CODEC_FLAG_PSNR) ? 4 : 0,
1874                                        s->pict_type);
1875
1876         if (s->avctx->flags & AV_CODEC_FLAG_PASS1)
1877             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1878                    avctx->i_tex_bits + avctx->p_tex_bits ==
1879                        put_bits_count(&s->pb));
1880         flush_put_bits(&s->pb);
1881         s->frame_bits  = put_bits_count(&s->pb);
1882
1883         stuffing_count = ff_vbv_update(s, s->frame_bits);
1884         s->stuffing_bits = 8*stuffing_count;
1885         if (stuffing_count) {
1886             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1887                     stuffing_count + 50) {
1888                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1889                 return -1;
1890             }
1891
1892             switch (s->codec_id) {
1893             case AV_CODEC_ID_MPEG1VIDEO:
1894             case AV_CODEC_ID_MPEG2VIDEO:
1895                 while (stuffing_count--) {
1896                     put_bits(&s->pb, 8, 0);
1897                 }
1898             break;
1899             case AV_CODEC_ID_MPEG4:
1900                 put_bits(&s->pb, 16, 0);
1901                 put_bits(&s->pb, 16, 0x1C3);
1902                 stuffing_count -= 4;
1903                 while (stuffing_count--) {
1904                     put_bits(&s->pb, 8, 0xFF);
1905                 }
1906             break;
1907             default:
1908                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1909             }
1910             flush_put_bits(&s->pb);
1911             s->frame_bits  = put_bits_count(&s->pb);
1912         }
1913
1914         /* update mpeg1/2 vbv_delay for CBR */
1915         if (s->avctx->rc_max_rate                          &&
1916             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1917             s->out_format == FMT_MPEG1                     &&
1918             90000LL * (avctx->rc_buffer_size - 1) <=
1919                 s->avctx->rc_max_rate * 0xFFFFLL) {
1920             int vbv_delay, min_delay;
1921             double inbits  = s->avctx->rc_max_rate *
1922                              av_q2d(s->avctx->time_base);
1923             int    minbits = s->frame_bits - 8 *
1924                              (s->vbv_delay_ptr - s->pb.buf - 1);
1925             double bits    = s->rc_context.buffer_index + minbits - inbits;
1926
1927             if (bits < 0)
1928                 av_log(s->avctx, AV_LOG_ERROR,
1929                        "Internal error, negative bits\n");
1930
1931             assert(s->repeat_first_field == 0);
1932
1933             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1934             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1935                         s->avctx->rc_max_rate;
1936
1937             vbv_delay = FFMAX(vbv_delay, min_delay);
1938
1939             av_assert0(vbv_delay < 0xFFFF);
1940
1941             s->vbv_delay_ptr[0] &= 0xF8;
1942             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1943             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1944             s->vbv_delay_ptr[2] &= 0x07;
1945             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1946             avctx->vbv_delay     = vbv_delay * 300;
1947         }
1948         s->total_bits     += s->frame_bits;
1949         avctx->frame_bits  = s->frame_bits;
1950
1951         pkt->pts = s->current_picture.f->pts;
1952         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1953             if (!s->current_picture.f->coded_picture_number)
1954                 pkt->dts = pkt->pts - s->dts_delta;
1955             else
1956                 pkt->dts = s->reordered_pts;
1957             s->reordered_pts = pkt->pts;
1958         } else
1959             pkt->dts = pkt->pts;
1960         if (s->current_picture.f->key_frame)
1961             pkt->flags |= AV_PKT_FLAG_KEY;
1962         if (s->mb_info)
1963             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1964     } else {
1965         s->frame_bits = 0;
1966     }
1967
1968     /* release non-reference frames */
1969     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1970         if (!s->picture[i].reference)
1971             ff_mpeg_unref_picture(s->avctx, &s->picture[i]);
1972     }
1973
1974     av_assert1((s->frame_bits & 7) == 0);
1975
1976     pkt->size = s->frame_bits / 8;
1977     *got_packet = !!pkt->size;
1978     return 0;
1979 }
1980
1981 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1982                                                 int n, int threshold)
1983 {
1984     static const char tab[64] = {
1985         3, 2, 2, 1, 1, 1, 1, 1,
1986         1, 1, 1, 1, 1, 1, 1, 1,
1987         1, 1, 1, 1, 1, 1, 1, 1,
1988         0, 0, 0, 0, 0, 0, 0, 0,
1989         0, 0, 0, 0, 0, 0, 0, 0,
1990         0, 0, 0, 0, 0, 0, 0, 0,
1991         0, 0, 0, 0, 0, 0, 0, 0,
1992         0, 0, 0, 0, 0, 0, 0, 0
1993     };
1994     int score = 0;
1995     int run = 0;
1996     int i;
1997     int16_t *block = s->block[n];
1998     const int last_index = s->block_last_index[n];
1999     int skip_dc;
2000
2001     if (threshold < 0) {
2002         skip_dc = 0;
2003         threshold = -threshold;
2004     } else
2005         skip_dc = 1;
2006
2007     /* Are all we could set to zero already zero? */
2008     if (last_index <= skip_dc - 1)
2009         return;
2010
2011     for (i = 0; i <= last_index; i++) {
2012         const int j = s->intra_scantable.permutated[i];
2013         const int level = FFABS(block[j]);
2014         if (level == 1) {
2015             if (skip_dc && i == 0)
2016                 continue;
2017             score += tab[run];
2018             run = 0;
2019         } else if (level > 1) {
2020             return;
2021         } else {
2022             run++;
2023         }
2024     }
2025     if (score >= threshold)
2026         return;
2027     for (i = skip_dc; i <= last_index; i++) {
2028         const int j = s->intra_scantable.permutated[i];
2029         block[j] = 0;
2030     }
2031     if (block[0])
2032         s->block_last_index[n] = 0;
2033     else
2034         s->block_last_index[n] = -1;
2035 }
2036
2037 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2038                                int last_index)
2039 {
2040     int i;
2041     const int maxlevel = s->max_qcoeff;
2042     const int minlevel = s->min_qcoeff;
2043     int overflow = 0;
2044
2045     if (s->mb_intra) {
2046         i = 1; // skip clipping of intra dc
2047     } else
2048         i = 0;
2049
2050     for (; i <= last_index; i++) {
2051         const int j = s->intra_scantable.permutated[i];
2052         int level = block[j];
2053
2054         if (level > maxlevel) {
2055             level = maxlevel;
2056             overflow++;
2057         } else if (level < minlevel) {
2058             level = minlevel;
2059             overflow++;
2060         }
2061
2062         block[j] = level;
2063     }
2064
2065     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2066         av_log(s->avctx, AV_LOG_INFO,
2067                "warning, clipping %d dct coefficients to %d..%d\n",
2068                overflow, minlevel, maxlevel);
2069 }
2070
2071 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
2072 {
2073     int x, y;
2074     // FIXME optimize
2075     for (y = 0; y < 8; y++) {
2076         for (x = 0; x < 8; x++) {
2077             int x2, y2;
2078             int sum = 0;
2079             int sqr = 0;
2080             int count = 0;
2081
2082             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2083                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2084                     int v = ptr[x2 + y2 * stride];
2085                     sum += v;
2086                     sqr += v * v;
2087                     count++;
2088                 }
2089             }
2090             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2091         }
2092     }
2093 }
2094
2095 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2096                                                 int motion_x, int motion_y,
2097                                                 int mb_block_height,
2098                                                 int mb_block_width,
2099                                                 int mb_block_count)
2100 {
2101     int16_t weight[12][64];
2102     int16_t orig[12][64];
2103     const int mb_x = s->mb_x;
2104     const int mb_y = s->mb_y;
2105     int i;
2106     int skip_dct[12];
2107     int dct_offset = s->linesize * 8; // default for progressive frames
2108     int uv_dct_offset = s->uvlinesize * 8;
2109     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2110     ptrdiff_t wrap_y, wrap_c;
2111
2112     for (i = 0; i < mb_block_count; i++)
2113         skip_dct[i] = s->skipdct;
2114
2115     if (s->adaptive_quant) {
2116         const int last_qp = s->qscale;
2117         const int mb_xy = mb_x + mb_y * s->mb_stride;
2118
2119         s->lambda = s->lambda_table[mb_xy];
2120         update_qscale(s);
2121
2122         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2123             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2124             s->dquant = s->qscale - last_qp;
2125
2126             if (s->out_format == FMT_H263) {
2127                 s->dquant = av_clip(s->dquant, -2, 2);
2128
2129                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2130                     if (!s->mb_intra) {
2131                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2132                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2133                                 s->dquant = 0;
2134                         }
2135                         if (s->mv_type == MV_TYPE_8X8)
2136                             s->dquant = 0;
2137                     }
2138                 }
2139             }
2140         }
2141         ff_set_qscale(s, last_qp + s->dquant);
2142     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2143         ff_set_qscale(s, s->qscale + s->dquant);
2144
2145     wrap_y = s->linesize;
2146     wrap_c = s->uvlinesize;
2147     ptr_y  = s->new_picture.f->data[0] +
2148              (mb_y * 16 * wrap_y)              + mb_x * 16;
2149     ptr_cb = s->new_picture.f->data[1] +
2150              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2151     ptr_cr = s->new_picture.f->data[2] +
2152              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2153
2154     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2155         uint8_t *ebuf = s->sc.edge_emu_buffer + 36 * wrap_y;
2156         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2157         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2158         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2159                                  wrap_y, wrap_y,
2160                                  16, 16, mb_x * 16, mb_y * 16,
2161                                  s->width, s->height);
2162         ptr_y = ebuf;
2163         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2164                                  wrap_c, wrap_c,
2165                                  mb_block_width, mb_block_height,
2166                                  mb_x * mb_block_width, mb_y * mb_block_height,
2167                                  cw, ch);
2168         ptr_cb = ebuf + 16 * wrap_y;
2169         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2170                                  wrap_c, wrap_c,
2171                                  mb_block_width, mb_block_height,
2172                                  mb_x * mb_block_width, mb_y * mb_block_height,
2173                                  cw, ch);
2174         ptr_cr = ebuf + 16 * wrap_y + 16;
2175     }
2176
2177     if (s->mb_intra) {
2178         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2179             int progressive_score, interlaced_score;
2180
2181             s->interlaced_dct = 0;
2182             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2183                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2184                                                      NULL, wrap_y, 8) - 400;
2185
2186             if (progressive_score > 0) {
2187                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2188                                                         NULL, wrap_y * 2, 8) +
2189                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2190                                                         NULL, wrap_y * 2, 8);
2191                 if (progressive_score > interlaced_score) {
2192                     s->interlaced_dct = 1;
2193
2194                     dct_offset = wrap_y;
2195                     uv_dct_offset = wrap_c;
2196                     wrap_y <<= 1;
2197                     if (s->chroma_format == CHROMA_422 ||
2198                         s->chroma_format == CHROMA_444)
2199                         wrap_c <<= 1;
2200                 }
2201             }
2202         }
2203
2204         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2205         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2206         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2207         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2208
2209         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2210             skip_dct[4] = 1;
2211             skip_dct[5] = 1;
2212         } else {
2213             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2214             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2215             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2216                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2217                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2218             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2219                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2220                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2221                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2222                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2223                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2224                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2225             }
2226         }
2227     } else {
2228         op_pixels_func (*op_pix)[4];
2229         qpel_mc_func (*op_qpix)[16];
2230         uint8_t *dest_y, *dest_cb, *dest_cr;
2231
2232         dest_y  = s->dest[0];
2233         dest_cb = s->dest[1];
2234         dest_cr = s->dest[2];
2235
2236         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2237             op_pix  = s->hdsp.put_pixels_tab;
2238             op_qpix = s->qdsp.put_qpel_pixels_tab;
2239         } else {
2240             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2241             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2242         }
2243
2244         if (s->mv_dir & MV_DIR_FORWARD) {
2245             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2246                           s->last_picture.f->data,
2247                           op_pix, op_qpix);
2248             op_pix  = s->hdsp.avg_pixels_tab;
2249             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2250         }
2251         if (s->mv_dir & MV_DIR_BACKWARD) {
2252             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2253                           s->next_picture.f->data,
2254                           op_pix, op_qpix);
2255         }
2256
2257         if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
2258             int progressive_score, interlaced_score;
2259
2260             s->interlaced_dct = 0;
2261             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2262                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2263                                                      ptr_y + wrap_y * 8,
2264                                                      wrap_y, 8) - 400;
2265
2266             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2267                 progressive_score -= 400;
2268
2269             if (progressive_score > 0) {
2270                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2271                                                         wrap_y * 2, 8) +
2272                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2273                                                         ptr_y + wrap_y,
2274                                                         wrap_y * 2, 8);
2275
2276                 if (progressive_score > interlaced_score) {
2277                     s->interlaced_dct = 1;
2278
2279                     dct_offset = wrap_y;
2280                     uv_dct_offset = wrap_c;
2281                     wrap_y <<= 1;
2282                     if (s->chroma_format == CHROMA_422)
2283                         wrap_c <<= 1;
2284                 }
2285             }
2286         }
2287
2288         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2289         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2290         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2291                             dest_y + dct_offset, wrap_y);
2292         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2293                             dest_y + dct_offset + 8, wrap_y);
2294
2295         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2296             skip_dct[4] = 1;
2297             skip_dct[5] = 1;
2298         } else {
2299             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2300             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2301             if (!s->chroma_y_shift) { /* 422 */
2302                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2303                                     dest_cb + uv_dct_offset, wrap_c);
2304                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2305                                     dest_cr + uv_dct_offset, wrap_c);
2306             }
2307         }
2308         /* pre quantization */
2309         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2310                 2 * s->qscale * s->qscale) {
2311             // FIXME optimize
2312             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2313                 skip_dct[0] = 1;
2314             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2315                 skip_dct[1] = 1;
2316             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2317                                wrap_y, 8) < 20 * s->qscale)
2318                 skip_dct[2] = 1;
2319             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2320                                wrap_y, 8) < 20 * s->qscale)
2321                 skip_dct[3] = 1;
2322             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2323                 skip_dct[4] = 1;
2324             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2325                 skip_dct[5] = 1;
2326             if (!s->chroma_y_shift) { /* 422 */
2327                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2328                                    dest_cb + uv_dct_offset,
2329                                    wrap_c, 8) < 20 * s->qscale)
2330                     skip_dct[6] = 1;
2331                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2332                                    dest_cr + uv_dct_offset,
2333                                    wrap_c, 8) < 20 * s->qscale)
2334                     skip_dct[7] = 1;
2335             }
2336         }
2337     }
2338
2339     if (s->quantizer_noise_shaping) {
2340         if (!skip_dct[0])
2341             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2342         if (!skip_dct[1])
2343             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2344         if (!skip_dct[2])
2345             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2346         if (!skip_dct[3])
2347             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2348         if (!skip_dct[4])
2349             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2350         if (!skip_dct[5])
2351             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2352         if (!s->chroma_y_shift) { /* 422 */
2353             if (!skip_dct[6])
2354                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2355                                   wrap_c);
2356             if (!skip_dct[7])
2357                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2358                                   wrap_c);
2359         }
2360         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2361     }
2362
2363     /* DCT & quantize */
2364     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2365     {
2366         for (i = 0; i < mb_block_count; i++) {
2367             if (!skip_dct[i]) {
2368                 int overflow;
2369                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2370                 // FIXME we could decide to change to quantizer instead of
2371                 // clipping
2372                 // JS: I don't think that would be a good idea it could lower
2373                 //     quality instead of improve it. Just INTRADC clipping
2374                 //     deserves changes in quantizer
2375                 if (overflow)
2376                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2377             } else
2378                 s->block_last_index[i] = -1;
2379         }
2380         if (s->quantizer_noise_shaping) {
2381             for (i = 0; i < mb_block_count; i++) {
2382                 if (!skip_dct[i]) {
2383                     s->block_last_index[i] =
2384                         dct_quantize_refine(s, s->block[i], weight[i],
2385                                             orig[i], i, s->qscale);
2386                 }
2387             }
2388         }
2389
2390         if (s->luma_elim_threshold && !s->mb_intra)
2391             for (i = 0; i < 4; i++)
2392                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2393         if (s->chroma_elim_threshold && !s->mb_intra)
2394             for (i = 4; i < mb_block_count; i++)
2395                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2396
2397         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2398             for (i = 0; i < mb_block_count; i++) {
2399                 if (s->block_last_index[i] == -1)
2400                     s->coded_score[i] = INT_MAX / 256;
2401             }
2402         }
2403     }
2404
2405     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2406         s->block_last_index[4] =
2407         s->block_last_index[5] = 0;
2408         s->block[4][0] =
2409         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2410         if (!s->chroma_y_shift) { /* 422 / 444 */
2411             for (i=6; i<12; i++) {
2412                 s->block_last_index[i] = 0;
2413                 s->block[i][0] = s->block[4][0];
2414             }
2415         }
2416     }
2417
2418     // non c quantize code returns incorrect block_last_index FIXME
2419     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2420         for (i = 0; i < mb_block_count; i++) {
2421             int j;
2422             if (s->block_last_index[i] > 0) {
2423                 for (j = 63; j > 0; j--) {
2424                     if (s->block[i][s->intra_scantable.permutated[j]])
2425                         break;
2426                 }
2427                 s->block_last_index[i] = j;
2428             }
2429         }
2430     }
2431
2432     /* huffman encode */
2433     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2434     case AV_CODEC_ID_MPEG1VIDEO:
2435     case AV_CODEC_ID_MPEG2VIDEO:
2436         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2437             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2438         break;
2439     case AV_CODEC_ID_MPEG4:
2440         if (CONFIG_MPEG4_ENCODER)
2441             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2442         break;
2443     case AV_CODEC_ID_MSMPEG4V2:
2444     case AV_CODEC_ID_MSMPEG4V3:
2445     case AV_CODEC_ID_WMV1:
2446         if (CONFIG_MSMPEG4_ENCODER)
2447             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2448         break;
2449     case AV_CODEC_ID_WMV2:
2450         if (CONFIG_WMV2_ENCODER)
2451             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2452         break;
2453     case AV_CODEC_ID_H261:
2454         if (CONFIG_H261_ENCODER)
2455             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2456         break;
2457     case AV_CODEC_ID_H263:
2458     case AV_CODEC_ID_H263P:
2459     case AV_CODEC_ID_FLV1:
2460     case AV_CODEC_ID_RV10:
2461     case AV_CODEC_ID_RV20:
2462         if (CONFIG_H263_ENCODER)
2463             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2464         break;
2465     case AV_CODEC_ID_MJPEG:
2466     case AV_CODEC_ID_AMV:
2467         if (CONFIG_MJPEG_ENCODER)
2468             ff_mjpeg_encode_mb(s, s->block);
2469         break;
2470     default:
2471         av_assert1(0);
2472     }
2473 }
2474
2475 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2476 {
2477     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2478     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2479     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2480 }
2481
2482 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2483     int i;
2484
2485     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2486
2487     /* mpeg1 */
2488     d->mb_skip_run= s->mb_skip_run;
2489     for(i=0; i<3; i++)
2490         d->last_dc[i] = s->last_dc[i];
2491
2492     /* statistics */
2493     d->mv_bits= s->mv_bits;
2494     d->i_tex_bits= s->i_tex_bits;
2495     d->p_tex_bits= s->p_tex_bits;
2496     d->i_count= s->i_count;
2497     d->f_count= s->f_count;
2498     d->b_count= s->b_count;
2499     d->skip_count= s->skip_count;
2500     d->misc_bits= s->misc_bits;
2501     d->last_bits= 0;
2502
2503     d->mb_skipped= 0;
2504     d->qscale= s->qscale;
2505     d->dquant= s->dquant;
2506
2507     d->esc3_level_length= s->esc3_level_length;
2508 }
2509
2510 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2511     int i;
2512
2513     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2514     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2515
2516     /* mpeg1 */
2517     d->mb_skip_run= s->mb_skip_run;
2518     for(i=0; i<3; i++)
2519         d->last_dc[i] = s->last_dc[i];
2520
2521     /* statistics */
2522     d->mv_bits= s->mv_bits;
2523     d->i_tex_bits= s->i_tex_bits;
2524     d->p_tex_bits= s->p_tex_bits;
2525     d->i_count= s->i_count;
2526     d->f_count= s->f_count;
2527     d->b_count= s->b_count;
2528     d->skip_count= s->skip_count;
2529     d->misc_bits= s->misc_bits;
2530
2531     d->mb_intra= s->mb_intra;
2532     d->mb_skipped= s->mb_skipped;
2533     d->mv_type= s->mv_type;
2534     d->mv_dir= s->mv_dir;
2535     d->pb= s->pb;
2536     if(s->data_partitioning){
2537         d->pb2= s->pb2;
2538         d->tex_pb= s->tex_pb;
2539     }
2540     d->block= s->block;
2541     for(i=0; i<8; i++)
2542         d->block_last_index[i]= s->block_last_index[i];
2543     d->interlaced_dct= s->interlaced_dct;
2544     d->qscale= s->qscale;
2545
2546     d->esc3_level_length= s->esc3_level_length;
2547 }
2548
2549 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2550                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2551                            int *dmin, int *next_block, int motion_x, int motion_y)
2552 {
2553     int score;
2554     uint8_t *dest_backup[3];
2555
2556     copy_context_before_encode(s, backup, type);
2557
2558     s->block= s->blocks[*next_block];
2559     s->pb= pb[*next_block];
2560     if(s->data_partitioning){
2561         s->pb2   = pb2   [*next_block];
2562         s->tex_pb= tex_pb[*next_block];
2563     }
2564
2565     if(*next_block){
2566         memcpy(dest_backup, s->dest, sizeof(s->dest));
2567         s->dest[0] = s->sc.rd_scratchpad;
2568         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2569         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2570         av_assert0(s->linesize >= 32); //FIXME
2571     }
2572
2573     encode_mb(s, motion_x, motion_y);
2574
2575     score= put_bits_count(&s->pb);
2576     if(s->data_partitioning){
2577         score+= put_bits_count(&s->pb2);
2578         score+= put_bits_count(&s->tex_pb);
2579     }
2580
2581     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2582         ff_mpv_decode_mb(s, s->block);
2583
2584         score *= s->lambda2;
2585         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2586     }
2587
2588     if(*next_block){
2589         memcpy(s->dest, dest_backup, sizeof(s->dest));
2590     }
2591
2592     if(score<*dmin){
2593         *dmin= score;
2594         *next_block^=1;
2595
2596         copy_context_after_encode(best, s, type);
2597     }
2598 }
2599
2600 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2601     uint32_t *sq = ff_square_tab + 256;
2602     int acc=0;
2603     int x,y;
2604
2605     if(w==16 && h==16)
2606         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2607     else if(w==8 && h==8)
2608         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2609
2610     for(y=0; y<h; y++){
2611         for(x=0; x<w; x++){
2612             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2613         }
2614     }
2615
2616     av_assert2(acc>=0);
2617
2618     return acc;
2619 }
2620
2621 static int sse_mb(MpegEncContext *s){
2622     int w= 16;
2623     int h= 16;
2624
2625     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2626     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2627
2628     if(w==16 && h==16)
2629       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2630         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2631                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2632                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2633       }else{
2634         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2635                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2636                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2637       }
2638     else
2639         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2640                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2641                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2642 }
2643
2644 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2645     MpegEncContext *s= *(void**)arg;
2646
2647
2648     s->me.pre_pass=1;
2649     s->me.dia_size= s->avctx->pre_dia_size;
2650     s->first_slice_line=1;
2651     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2652         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2653             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2654         }
2655         s->first_slice_line=0;
2656     }
2657
2658     s->me.pre_pass=0;
2659
2660     return 0;
2661 }
2662
2663 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2664     MpegEncContext *s= *(void**)arg;
2665
2666     ff_check_alignment();
2667
2668     s->me.dia_size= s->avctx->dia_size;
2669     s->first_slice_line=1;
2670     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2671         s->mb_x=0; //for block init below
2672         ff_init_block_index(s);
2673         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2674             s->block_index[0]+=2;
2675             s->block_index[1]+=2;
2676             s->block_index[2]+=2;
2677             s->block_index[3]+=2;
2678
2679             /* compute motion vector & mb_type and store in context */
2680             if(s->pict_type==AV_PICTURE_TYPE_B)
2681                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2682             else
2683                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2684         }
2685         s->first_slice_line=0;
2686     }
2687     return 0;
2688 }
2689
2690 static int mb_var_thread(AVCodecContext *c, void *arg){
2691     MpegEncContext *s= *(void**)arg;
2692     int mb_x, mb_y;
2693
2694     ff_check_alignment();
2695
2696     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2697         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2698             int xx = mb_x * 16;
2699             int yy = mb_y * 16;
2700             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2701             int varc;
2702             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2703
2704             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2705                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2706
2707             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2708             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2709             s->me.mb_var_sum_temp    += varc;
2710         }
2711     }
2712     return 0;
2713 }
2714
2715 static void write_slice_end(MpegEncContext *s){
2716     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2717         if(s->partitioned_frame){
2718             ff_mpeg4_merge_partitions(s);
2719         }
2720
2721         ff_mpeg4_stuffing(&s->pb);
2722     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2723         ff_mjpeg_encode_stuffing(s);
2724     }
2725
2726     avpriv_align_put_bits(&s->pb);
2727     flush_put_bits(&s->pb);
2728
2729     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2730         s->misc_bits+= get_bits_diff(s);
2731 }
2732
2733 static void write_mb_info(MpegEncContext *s)
2734 {
2735     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2736     int offset = put_bits_count(&s->pb);
2737     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2738     int gobn = s->mb_y / s->gob_index;
2739     int pred_x, pred_y;
2740     if (CONFIG_H263_ENCODER)
2741         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2742     bytestream_put_le32(&ptr, offset);
2743     bytestream_put_byte(&ptr, s->qscale);
2744     bytestream_put_byte(&ptr, gobn);
2745     bytestream_put_le16(&ptr, mba);
2746     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2747     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2748     /* 4MV not implemented */
2749     bytestream_put_byte(&ptr, 0); /* hmv2 */
2750     bytestream_put_byte(&ptr, 0); /* vmv2 */
2751 }
2752
2753 static void update_mb_info(MpegEncContext *s, int startcode)
2754 {
2755     if (!s->mb_info)
2756         return;
2757     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2758         s->mb_info_size += 12;
2759         s->prev_mb_info = s->last_mb_info;
2760     }
2761     if (startcode) {
2762         s->prev_mb_info = put_bits_count(&s->pb)/8;
2763         /* This might have incremented mb_info_size above, and we return without
2764          * actually writing any info into that slot yet. But in that case,
2765          * this will be called again at the start of the after writing the
2766          * start code, actually writing the mb info. */
2767         return;
2768     }
2769
2770     s->last_mb_info = put_bits_count(&s->pb)/8;
2771     if (!s->mb_info_size)
2772         s->mb_info_size += 12;
2773     write_mb_info(s);
2774 }
2775
2776 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2777 {
2778     if (   s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold
2779         && s->slice_context_count == 1
2780         && s->pb.buf == s->avctx->internal->byte_buffer) {
2781         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2782         int vbv_pos     = s->vbv_delay_ptr - s->pb.buf;
2783
2784         uint8_t *new_buffer = NULL;
2785         int new_buffer_size = 0;
2786
2787         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2788                               s->avctx->internal->byte_buffer_size + size_increase);
2789         if (!new_buffer)
2790             return AVERROR(ENOMEM);
2791
2792         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2793         av_free(s->avctx->internal->byte_buffer);
2794         s->avctx->internal->byte_buffer      = new_buffer;
2795         s->avctx->internal->byte_buffer_size = new_buffer_size;
2796         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2797         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2798         s->vbv_delay_ptr = s->pb.buf + vbv_pos;
2799     }
2800     if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < threshold)
2801         return AVERROR(EINVAL);
2802     return 0;
2803 }
2804
2805 static int encode_thread(AVCodecContext *c, void *arg){
2806     MpegEncContext *s= *(void**)arg;
2807     int mb_x, mb_y, pdif = 0;
2808     int chr_h= 16>>s->chroma_y_shift;
2809     int i, j;
2810     MpegEncContext best_s = { 0 }, backup_s;
2811     uint8_t bit_buf[2][MAX_MB_BYTES];
2812     uint8_t bit_buf2[2][MAX_MB_BYTES];
2813     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2814     PutBitContext pb[2], pb2[2], tex_pb[2];
2815
2816     ff_check_alignment();
2817
2818     for(i=0; i<2; i++){
2819         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2820         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2821         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2822     }
2823
2824     s->last_bits= put_bits_count(&s->pb);
2825     s->mv_bits=0;
2826     s->misc_bits=0;
2827     s->i_tex_bits=0;
2828     s->p_tex_bits=0;
2829     s->i_count=0;
2830     s->f_count=0;
2831     s->b_count=0;
2832     s->skip_count=0;
2833
2834     for(i=0; i<3; i++){
2835         /* init last dc values */
2836         /* note: quant matrix value (8) is implied here */
2837         s->last_dc[i] = 128 << s->intra_dc_precision;
2838
2839         s->current_picture.error[i] = 0;
2840     }
2841     if(s->codec_id==AV_CODEC_ID_AMV){
2842         s->last_dc[0] = 128*8/13;
2843         s->last_dc[1] = 128*8/14;
2844         s->last_dc[2] = 128*8/14;
2845     }
2846     s->mb_skip_run = 0;
2847     memset(s->last_mv, 0, sizeof(s->last_mv));
2848
2849     s->last_mv_dir = 0;
2850
2851     switch(s->codec_id){
2852     case AV_CODEC_ID_H263:
2853     case AV_CODEC_ID_H263P:
2854     case AV_CODEC_ID_FLV1:
2855         if (CONFIG_H263_ENCODER)
2856             s->gob_index = H263_GOB_HEIGHT(s->height);
2857         break;
2858     case AV_CODEC_ID_MPEG4:
2859         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2860             ff_mpeg4_init_partitions(s);
2861         break;
2862     }
2863
2864     s->resync_mb_x=0;
2865     s->resync_mb_y=0;
2866     s->first_slice_line = 1;
2867     s->ptr_lastgob = s->pb.buf;
2868     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2869         s->mb_x=0;
2870         s->mb_y= mb_y;
2871
2872         ff_set_qscale(s, s->qscale);
2873         ff_init_block_index(s);
2874
2875         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2876             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2877             int mb_type= s->mb_type[xy];
2878 //            int d;
2879             int dmin= INT_MAX;
2880             int dir;
2881             int size_increase =  s->avctx->internal->byte_buffer_size/4
2882                                + s->mb_width*MAX_MB_BYTES;
2883
2884             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2885             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2886                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2887                 return -1;
2888             }
2889             if(s->data_partitioning){
2890                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2891                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2892                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2893                     return -1;
2894                 }
2895             }
2896
2897             s->mb_x = mb_x;
2898             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2899             ff_update_block_index(s);
2900
2901             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2902                 ff_h261_reorder_mb_index(s);
2903                 xy= s->mb_y*s->mb_stride + s->mb_x;
2904                 mb_type= s->mb_type[xy];
2905             }
2906
2907             /* write gob / video packet header  */
2908             if(s->rtp_mode){
2909                 int current_packet_size, is_gob_start;
2910
2911                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2912
2913                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2914
2915                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2916
2917                 switch(s->codec_id){
2918                 case AV_CODEC_ID_H263:
2919                 case AV_CODEC_ID_H263P:
2920                     if(!s->h263_slice_structured)
2921                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2922                     break;
2923                 case AV_CODEC_ID_MPEG2VIDEO:
2924                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2925                 case AV_CODEC_ID_MPEG1VIDEO:
2926                     if(s->mb_skip_run) is_gob_start=0;
2927                     break;
2928                 case AV_CODEC_ID_MJPEG:
2929                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2930                     break;
2931                 }
2932
2933                 if(is_gob_start){
2934                     if(s->start_mb_y != mb_y || mb_x!=0){
2935                         write_slice_end(s);
2936
2937                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2938                             ff_mpeg4_init_partitions(s);
2939                         }
2940                     }
2941
2942                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2943                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2944
2945                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2946                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2947                         int d = 100 / s->error_rate;
2948                         if(r % d == 0){
2949                             current_packet_size=0;
2950                             s->pb.buf_ptr= s->ptr_lastgob;
2951                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2952                         }
2953                     }
2954
2955                     if (s->avctx->rtp_callback){
2956                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2957                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2958                     }
2959                     update_mb_info(s, 1);
2960
2961                     switch(s->codec_id){
2962                     case AV_CODEC_ID_MPEG4:
2963                         if (CONFIG_MPEG4_ENCODER) {
2964                             ff_mpeg4_encode_video_packet_header(s);
2965                             ff_mpeg4_clean_buffers(s);
2966                         }
2967                     break;
2968                     case AV_CODEC_ID_MPEG1VIDEO:
2969                     case AV_CODEC_ID_MPEG2VIDEO:
2970                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2971                             ff_mpeg1_encode_slice_header(s);
2972                             ff_mpeg1_clean_buffers(s);
2973                         }
2974                     break;
2975                     case AV_CODEC_ID_H263:
2976                     case AV_CODEC_ID_H263P:
2977                         if (CONFIG_H263_ENCODER)
2978                             ff_h263_encode_gob_header(s, mb_y);
2979                     break;
2980                     }
2981
2982                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
2983                         int bits= put_bits_count(&s->pb);
2984                         s->misc_bits+= bits - s->last_bits;
2985                         s->last_bits= bits;
2986                     }
2987
2988                     s->ptr_lastgob += current_packet_size;
2989                     s->first_slice_line=1;
2990                     s->resync_mb_x=mb_x;
2991                     s->resync_mb_y=mb_y;
2992                 }
2993             }
2994
2995             if(  (s->resync_mb_x   == s->mb_x)
2996                && s->resync_mb_y+1 == s->mb_y){
2997                 s->first_slice_line=0;
2998             }
2999
3000             s->mb_skipped=0;
3001             s->dquant=0; //only for QP_RD
3002
3003             update_mb_info(s, 0);
3004
3005             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
3006                 int next_block=0;
3007                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3008
3009                 copy_context_before_encode(&backup_s, s, -1);
3010                 backup_s.pb= s->pb;
3011                 best_s.data_partitioning= s->data_partitioning;
3012                 best_s.partitioned_frame= s->partitioned_frame;
3013                 if(s->data_partitioning){
3014                     backup_s.pb2= s->pb2;
3015                     backup_s.tex_pb= s->tex_pb;
3016                 }
3017
3018                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
3019                     s->mv_dir = MV_DIR_FORWARD;
3020                     s->mv_type = MV_TYPE_16X16;
3021                     s->mb_intra= 0;
3022                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3023                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3024                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
3025                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3026                 }
3027                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
3028                     s->mv_dir = MV_DIR_FORWARD;
3029                     s->mv_type = MV_TYPE_FIELD;
3030                     s->mb_intra= 0;
3031                     for(i=0; i<2; i++){
3032                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3033                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3034                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3035                     }
3036                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
3037                                  &dmin, &next_block, 0, 0);
3038                 }
3039                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3040                     s->mv_dir = MV_DIR_FORWARD;
3041                     s->mv_type = MV_TYPE_16X16;
3042                     s->mb_intra= 0;
3043                     s->mv[0][0][0] = 0;
3044                     s->mv[0][0][1] = 0;
3045                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
3046                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3047                 }
3048                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3049                     s->mv_dir = MV_DIR_FORWARD;
3050                     s->mv_type = MV_TYPE_8X8;
3051                     s->mb_intra= 0;
3052                     for(i=0; i<4; i++){
3053                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3054                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3055                     }
3056                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
3057                                  &dmin, &next_block, 0, 0);
3058                 }
3059                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3060                     s->mv_dir = MV_DIR_FORWARD;
3061                     s->mv_type = MV_TYPE_16X16;
3062                     s->mb_intra= 0;
3063                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3064                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3065                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
3066                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3067                 }
3068                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3069                     s->mv_dir = MV_DIR_BACKWARD;
3070                     s->mv_type = MV_TYPE_16X16;
3071                     s->mb_intra= 0;
3072                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3073                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3074                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3075                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3076                 }
3077                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3078                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3079                     s->mv_type = MV_TYPE_16X16;
3080                     s->mb_intra= 0;
3081                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3082                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3083                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3084                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3085                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
3086                                  &dmin, &next_block, 0, 0);
3087                 }
3088                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3089                     s->mv_dir = MV_DIR_FORWARD;
3090                     s->mv_type = MV_TYPE_FIELD;
3091                     s->mb_intra= 0;
3092                     for(i=0; i<2; i++){
3093                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3094                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3095                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3096                     }
3097                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
3098                                  &dmin, &next_block, 0, 0);
3099                 }
3100                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3101                     s->mv_dir = MV_DIR_BACKWARD;
3102                     s->mv_type = MV_TYPE_FIELD;
3103                     s->mb_intra= 0;
3104                     for(i=0; i<2; i++){
3105                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3106                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3107                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3108                     }
3109                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
3110                                  &dmin, &next_block, 0, 0);
3111                 }
3112                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3113                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3114                     s->mv_type = MV_TYPE_FIELD;
3115                     s->mb_intra= 0;
3116                     for(dir=0; dir<2; dir++){
3117                         for(i=0; i<2; i++){
3118                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3119                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3120                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3121                         }
3122                     }
3123                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3124                                  &dmin, &next_block, 0, 0);
3125                 }
3126                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3127                     s->mv_dir = 0;
3128                     s->mv_type = MV_TYPE_16X16;
3129                     s->mb_intra= 1;
3130                     s->mv[0][0][0] = 0;
3131                     s->mv[0][0][1] = 0;
3132                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3133                                  &dmin, &next_block, 0, 0);
3134                     if(s->h263_pred || s->h263_aic){
3135                         if(best_s.mb_intra)
3136                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3137                         else
3138                             ff_clean_intra_table_entries(s); //old mode?
3139                     }
3140                 }
3141
3142                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3143                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3144                         const int last_qp= backup_s.qscale;
3145                         int qpi, qp, dc[6];
3146                         int16_t ac[6][16];
3147                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3148                         static const int dquant_tab[4]={-1,1,-2,2};
3149                         int storecoefs = s->mb_intra && s->dc_val[0];
3150
3151                         av_assert2(backup_s.dquant == 0);
3152
3153                         //FIXME intra
3154                         s->mv_dir= best_s.mv_dir;
3155                         s->mv_type = MV_TYPE_16X16;
3156                         s->mb_intra= best_s.mb_intra;
3157                         s->mv[0][0][0] = best_s.mv[0][0][0];
3158                         s->mv[0][0][1] = best_s.mv[0][0][1];
3159                         s->mv[1][0][0] = best_s.mv[1][0][0];
3160                         s->mv[1][0][1] = best_s.mv[1][0][1];
3161
3162                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3163                         for(; qpi<4; qpi++){
3164                             int dquant= dquant_tab[qpi];
3165                             qp= last_qp + dquant;
3166                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3167                                 continue;
3168                             backup_s.dquant= dquant;
3169                             if(storecoefs){
3170                                 for(i=0; i<6; i++){
3171                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3172                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3173                                 }
3174                             }
3175
3176                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3177                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3178                             if(best_s.qscale != qp){
3179                                 if(storecoefs){
3180                                     for(i=0; i<6; i++){
3181                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3182                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3183                                     }
3184                                 }
3185                             }
3186                         }
3187                     }
3188                 }
3189                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3190                     int mx= s->b_direct_mv_table[xy][0];
3191                     int my= s->b_direct_mv_table[xy][1];
3192
3193                     backup_s.dquant = 0;
3194                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3195                     s->mb_intra= 0;
3196                     ff_mpeg4_set_direct_mv(s, mx, my);
3197                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3198                                  &dmin, &next_block, mx, my);
3199                 }
3200                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3201                     backup_s.dquant = 0;
3202                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3203                     s->mb_intra= 0;
3204                     ff_mpeg4_set_direct_mv(s, 0, 0);
3205                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3206                                  &dmin, &next_block, 0, 0);
3207                 }
3208                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3209                     int coded=0;
3210                     for(i=0; i<6; i++)
3211                         coded |= s->block_last_index[i];
3212                     if(coded){
3213                         int mx,my;
3214                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3215                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3216                             mx=my=0; //FIXME find the one we actually used
3217                             ff_mpeg4_set_direct_mv(s, mx, my);
3218                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3219                             mx= s->mv[1][0][0];
3220                             my= s->mv[1][0][1];
3221                         }else{
3222                             mx= s->mv[0][0][0];
3223                             my= s->mv[0][0][1];
3224                         }
3225
3226                         s->mv_dir= best_s.mv_dir;
3227                         s->mv_type = best_s.mv_type;
3228                         s->mb_intra= 0;
3229 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3230                         s->mv[0][0][1] = best_s.mv[0][0][1];
3231                         s->mv[1][0][0] = best_s.mv[1][0][0];
3232                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3233                         backup_s.dquant= 0;
3234                         s->skipdct=1;
3235                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3236                                         &dmin, &next_block, mx, my);
3237                         s->skipdct=0;
3238                     }
3239                 }
3240
3241                 s->current_picture.qscale_table[xy] = best_s.qscale;
3242
3243                 copy_context_after_encode(s, &best_s, -1);
3244
3245                 pb_bits_count= put_bits_count(&s->pb);
3246                 flush_put_bits(&s->pb);
3247                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3248                 s->pb= backup_s.pb;
3249
3250                 if(s->data_partitioning){
3251                     pb2_bits_count= put_bits_count(&s->pb2);
3252                     flush_put_bits(&s->pb2);
3253                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3254                     s->pb2= backup_s.pb2;
3255
3256                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3257                     flush_put_bits(&s->tex_pb);
3258                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3259                     s->tex_pb= backup_s.tex_pb;
3260                 }
3261                 s->last_bits= put_bits_count(&s->pb);
3262
3263                 if (CONFIG_H263_ENCODER &&
3264                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3265                     ff_h263_update_motion_val(s);
3266
3267                 if(next_block==0){ //FIXME 16 vs linesize16
3268                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3269                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3270                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3271                 }
3272
3273                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3274                     ff_mpv_decode_mb(s, s->block);
3275             } else {
3276                 int motion_x = 0, motion_y = 0;
3277                 s->mv_type=MV_TYPE_16X16;
3278                 // only one MB-Type possible
3279
3280                 switch(mb_type){
3281                 case CANDIDATE_MB_TYPE_INTRA:
3282                     s->mv_dir = 0;
3283                     s->mb_intra= 1;
3284                     motion_x= s->mv[0][0][0] = 0;
3285                     motion_y= s->mv[0][0][1] = 0;
3286                     break;
3287                 case CANDIDATE_MB_TYPE_INTER:
3288                     s->mv_dir = MV_DIR_FORWARD;
3289                     s->mb_intra= 0;
3290                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3291                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3292                     break;
3293                 case CANDIDATE_MB_TYPE_INTER_I:
3294                     s->mv_dir = MV_DIR_FORWARD;
3295                     s->mv_type = MV_TYPE_FIELD;
3296                     s->mb_intra= 0;
3297                     for(i=0; i<2; i++){
3298                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3299                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3300                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3301                     }
3302                     break;
3303                 case CANDIDATE_MB_TYPE_INTER4V:
3304                     s->mv_dir = MV_DIR_FORWARD;
3305                     s->mv_type = MV_TYPE_8X8;
3306                     s->mb_intra= 0;
3307                     for(i=0; i<4; i++){
3308                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3309                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3310                     }
3311                     break;
3312                 case CANDIDATE_MB_TYPE_DIRECT:
3313                     if (CONFIG_MPEG4_ENCODER) {
3314                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3315                         s->mb_intra= 0;
3316                         motion_x=s->b_direct_mv_table[xy][0];
3317                         motion_y=s->b_direct_mv_table[xy][1];
3318                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3319                     }
3320                     break;
3321                 case CANDIDATE_MB_TYPE_DIRECT0:
3322                     if (CONFIG_MPEG4_ENCODER) {
3323                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3324                         s->mb_intra= 0;
3325                         ff_mpeg4_set_direct_mv(s, 0, 0);
3326                     }
3327                     break;
3328                 case CANDIDATE_MB_TYPE_BIDIR:
3329                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3330                     s->mb_intra= 0;
3331                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3332                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3333                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3334                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3335                     break;
3336                 case CANDIDATE_MB_TYPE_BACKWARD:
3337                     s->mv_dir = MV_DIR_BACKWARD;
3338                     s->mb_intra= 0;
3339                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3340                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3341                     break;
3342                 case CANDIDATE_MB_TYPE_FORWARD:
3343                     s->mv_dir = MV_DIR_FORWARD;
3344                     s->mb_intra= 0;
3345                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3346                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3347                     break;
3348                 case CANDIDATE_MB_TYPE_FORWARD_I:
3349                     s->mv_dir = MV_DIR_FORWARD;
3350                     s->mv_type = MV_TYPE_FIELD;
3351                     s->mb_intra= 0;
3352                     for(i=0; i<2; i++){
3353                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3354                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3355                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3356                     }
3357                     break;
3358                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3359                     s->mv_dir = MV_DIR_BACKWARD;
3360                     s->mv_type = MV_TYPE_FIELD;
3361                     s->mb_intra= 0;
3362                     for(i=0; i<2; i++){
3363                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3364                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3365                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3366                     }
3367                     break;
3368                 case CANDIDATE_MB_TYPE_BIDIR_I:
3369                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3370                     s->mv_type = MV_TYPE_FIELD;
3371                     s->mb_intra= 0;
3372                     for(dir=0; dir<2; dir++){
3373                         for(i=0; i<2; i++){
3374                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3375                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3376                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3377                         }
3378                     }
3379                     break;
3380                 default:
3381                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3382                 }
3383
3384                 encode_mb(s, motion_x, motion_y);
3385
3386                 // RAL: Update last macroblock type
3387                 s->last_mv_dir = s->mv_dir;
3388
3389                 if (CONFIG_H263_ENCODER &&
3390                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3391                     ff_h263_update_motion_val(s);
3392
3393                 ff_mpv_decode_mb(s, s->block);
3394             }
3395
3396             /* clean the MV table in IPS frames for direct mode in B frames */
3397             if(s->mb_intra /* && I,P,S_TYPE */){
3398                 s->p_mv_table[xy][0]=0;
3399                 s->p_mv_table[xy][1]=0;
3400             }
3401
3402             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3403                 int w= 16;
3404                 int h= 16;
3405
3406                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3407                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3408
3409                 s->current_picture.error[0] += sse(
3410                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3411                     s->dest[0], w, h, s->linesize);
3412                 s->current_picture.error[1] += sse(
3413                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3414                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3415                 s->current_picture.error[2] += sse(
3416                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3417                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3418             }
3419             if(s->loop_filter){
3420                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3421                     ff_h263_loop_filter(s);
3422             }
3423             ff_dlog(s->avctx, "MB %d %d bits\n",
3424                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3425         }
3426     }
3427
3428     //not beautiful here but we must write it before flushing so it has to be here
3429     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3430         ff_msmpeg4_encode_ext_header(s);
3431
3432     write_slice_end(s);
3433
3434     /* Send the last GOB if RTP */
3435     if (s->avctx->rtp_callback) {
3436         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3437         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3438         /* Call the RTP callback to send the last GOB */
3439         emms_c();
3440         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3441     }
3442
3443     return 0;
3444 }
3445
3446 #define MERGE(field) dst->field += src->field; src->field=0
3447 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3448     MERGE(me.scene_change_score);
3449     MERGE(me.mc_mb_var_sum_temp);
3450     MERGE(me.mb_var_sum_temp);
3451 }
3452
3453 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3454     int i;
3455
3456     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3457     MERGE(dct_count[1]);
3458     MERGE(mv_bits);
3459     MERGE(i_tex_bits);
3460     MERGE(p_tex_bits);
3461     MERGE(i_count);
3462     MERGE(f_count);
3463     MERGE(b_count);
3464     MERGE(skip_count);
3465     MERGE(misc_bits);
3466     MERGE(er.error_count);
3467     MERGE(padding_bug_score);
3468     MERGE(current_picture.error[0]);
3469     MERGE(current_picture.error[1]);
3470     MERGE(current_picture.error[2]);
3471
3472     if(dst->avctx->noise_reduction){
3473         for(i=0; i<64; i++){
3474             MERGE(dct_error_sum[0][i]);
3475             MERGE(dct_error_sum[1][i]);
3476         }
3477     }
3478
3479     assert(put_bits_count(&src->pb) % 8 ==0);
3480     assert(put_bits_count(&dst->pb) % 8 ==0);
3481     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3482     flush_put_bits(&dst->pb);
3483 }
3484
3485 static int estimate_qp(MpegEncContext *s, int dry_run){
3486     if (s->next_lambda){
3487         s->current_picture_ptr->f->quality =
3488         s->current_picture.f->quality = s->next_lambda;
3489         if(!dry_run) s->next_lambda= 0;
3490     } else if (!s->fixed_qscale) {
3491         s->current_picture_ptr->f->quality =
3492         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3493         if (s->current_picture.f->quality < 0)
3494             return -1;
3495     }
3496
3497     if(s->adaptive_quant){
3498         switch(s->codec_id){
3499         case AV_CODEC_ID_MPEG4:
3500             if (CONFIG_MPEG4_ENCODER)
3501                 ff_clean_mpeg4_qscales(s);
3502             break;
3503         case AV_CODEC_ID_H263:
3504         case AV_CODEC_ID_H263P:
3505         case AV_CODEC_ID_FLV1:
3506             if (CONFIG_H263_ENCODER)
3507                 ff_clean_h263_qscales(s);
3508             break;
3509         default:
3510             ff_init_qscale_tab(s);
3511         }
3512
3513         s->lambda= s->lambda_table[0];
3514         //FIXME broken
3515     }else
3516         s->lambda = s->current_picture.f->quality;
3517     update_qscale(s);
3518     return 0;
3519 }
3520
3521 /* must be called before writing the header */
3522 static void set_frame_distances(MpegEncContext * s){
3523     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3524     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3525
3526     if(s->pict_type==AV_PICTURE_TYPE_B){
3527         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3528         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3529     }else{
3530         s->pp_time= s->time - s->last_non_b_time;
3531         s->last_non_b_time= s->time;
3532         assert(s->picture_number==0 || s->pp_time > 0);
3533     }
3534 }
3535
3536 static int encode_picture(MpegEncContext *s, int picture_number)
3537 {
3538     int i, ret;
3539     int bits;
3540     int context_count = s->slice_context_count;
3541
3542     s->picture_number = picture_number;
3543
3544     /* Reset the average MB variance */
3545     s->me.mb_var_sum_temp    =
3546     s->me.mc_mb_var_sum_temp = 0;
3547
3548     /* we need to initialize some time vars before we can encode b-frames */
3549     // RAL: Condition added for MPEG1VIDEO
3550     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3551         set_frame_distances(s);
3552     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3553         ff_set_mpeg4_time(s);
3554
3555     s->me.scene_change_score=0;
3556
3557 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3558
3559     if(s->pict_type==AV_PICTURE_TYPE_I){
3560         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3561         else                        s->no_rounding=0;
3562     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3563         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3564             s->no_rounding ^= 1;
3565     }
3566
3567     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3568         if (estimate_qp(s,1) < 0)
3569             return -1;
3570         ff_get_2pass_fcode(s);
3571     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3572         if(s->pict_type==AV_PICTURE_TYPE_B)
3573             s->lambda= s->last_lambda_for[s->pict_type];
3574         else
3575             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3576         update_qscale(s);
3577     }
3578
3579     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3580         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3581         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3582         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3583         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3584     }
3585
3586     s->mb_intra=0; //for the rate distortion & bit compare functions
3587     for(i=1; i<context_count; i++){
3588         ret = ff_update_duplicate_context(s->thread_context[i], s);
3589         if (ret < 0)
3590             return ret;
3591     }
3592
3593     if(ff_init_me(s)<0)
3594         return -1;
3595
3596     /* Estimate motion for every MB */
3597     if(s->pict_type != AV_PICTURE_TYPE_I){
3598         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3599         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3600         if (s->pict_type != AV_PICTURE_TYPE_B) {
3601             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3602                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3603             }
3604         }
3605
3606         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3607     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3608         /* I-Frame */
3609         for(i=0; i<s->mb_stride*s->mb_height; i++)
3610             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3611
3612         if(!s->fixed_qscale){
3613             /* finding spatial complexity for I-frame rate control */
3614             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3615         }
3616     }
3617     for(i=1; i<context_count; i++){
3618         merge_context_after_me(s, s->thread_context[i]);
3619     }
3620     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3621     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3622     emms_c();
3623
3624     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3625         s->pict_type= AV_PICTURE_TYPE_I;
3626         for(i=0; i<s->mb_stride*s->mb_height; i++)
3627             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3628         if(s->msmpeg4_version >= 3)
3629             s->no_rounding=1;
3630         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3631                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3632     }
3633
3634     if(!s->umvplus){
3635         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3636             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3637
3638             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3639                 int a,b;
3640                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3641                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3642                 s->f_code= FFMAX3(s->f_code, a, b);
3643             }
3644
3645             ff_fix_long_p_mvs(s);
3646             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3647             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3648                 int j;
3649                 for(i=0; i<2; i++){
3650                     for(j=0; j<2; j++)
3651                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3652                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3653                 }
3654             }
3655         }
3656
3657         if(s->pict_type==AV_PICTURE_TYPE_B){
3658             int a, b;
3659
3660             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3661             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3662             s->f_code = FFMAX(a, b);
3663
3664             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3665             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3666             s->b_code = FFMAX(a, b);
3667
3668             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3669             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3670             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3671             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3672             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3673                 int dir, j;
3674                 for(dir=0; dir<2; dir++){
3675                     for(i=0; i<2; i++){
3676                         for(j=0; j<2; j++){
3677                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3678                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3679                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3680                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3681                         }
3682                     }
3683                 }
3684             }
3685         }
3686     }
3687
3688     if (estimate_qp(s, 0) < 0)
3689         return -1;
3690
3691     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3692         s->pict_type == AV_PICTURE_TYPE_I &&
3693         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3694         s->qscale= 3; //reduce clipping problems
3695
3696     if (s->out_format == FMT_MJPEG) {
3697         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3698         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3699
3700         if (s->avctx->intra_matrix) {
3701             chroma_matrix =
3702             luma_matrix = s->avctx->intra_matrix;
3703         }
3704         if (s->avctx->chroma_intra_matrix)
3705             chroma_matrix = s->avctx->chroma_intra_matrix;
3706
3707         /* for mjpeg, we do include qscale in the matrix */
3708         for(i=1;i<64;i++){
3709             int j = s->idsp.idct_permutation[i];
3710
3711             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3712             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3713         }
3714         s->y_dc_scale_table=
3715         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3716         s->chroma_intra_matrix[0] =
3717         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3718         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3719                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3720         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3721                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3722         s->qscale= 8;
3723     }
3724     if(s->codec_id == AV_CODEC_ID_AMV){
3725         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3726         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3727         for(i=1;i<64;i++){
3728             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3729
3730             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3731             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3732         }
3733         s->y_dc_scale_table= y;
3734         s->c_dc_scale_table= c;
3735         s->intra_matrix[0] = 13;
3736         s->chroma_intra_matrix[0] = 14;
3737         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3738                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3739         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3740                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3741         s->qscale= 8;
3742     }
3743
3744     //FIXME var duplication
3745     s->current_picture_ptr->f->key_frame =
3746     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3747     s->current_picture_ptr->f->pict_type =
3748     s->current_picture.f->pict_type = s->pict_type;
3749
3750     if (s->current_picture.f->key_frame)
3751         s->picture_in_gop_number=0;
3752
3753     s->mb_x = s->mb_y = 0;
3754     s->last_bits= put_bits_count(&s->pb);
3755     switch(s->out_format) {
3756     case FMT_MJPEG:
3757         if (CONFIG_MJPEG_ENCODER)
3758             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3759                                            s->intra_matrix, s->chroma_intra_matrix);
3760         break;
3761     case FMT_H261:
3762         if (CONFIG_H261_ENCODER)
3763             ff_h261_encode_picture_header(s, picture_number);
3764         break;
3765     case FMT_H263:
3766         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3767             ff_wmv2_encode_picture_header(s, picture_number);
3768         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3769             ff_msmpeg4_encode_picture_header(s, picture_number);
3770         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3771             ff_mpeg4_encode_picture_header(s, picture_number);
3772         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3773             ret = ff_rv10_encode_picture_header(s, picture_number);
3774             if (ret < 0)
3775                 return ret;
3776         }
3777         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3778             ff_rv20_encode_picture_header(s, picture_number);
3779         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3780             ff_flv_encode_picture_header(s, picture_number);
3781         else if (CONFIG_H263_ENCODER)
3782             ff_h263_encode_picture_header(s, picture_number);
3783         break;
3784     case FMT_MPEG1:
3785         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3786             ff_mpeg1_encode_picture_header(s, picture_number);
3787         break;
3788     default:
3789         av_assert0(0);
3790     }
3791     bits= put_bits_count(&s->pb);
3792     s->header_bits= bits - s->last_bits;
3793
3794     for(i=1; i<context_count; i++){
3795         update_duplicate_context_after_me(s->thread_context[i], s);
3796     }
3797     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3798     for(i=1; i<context_count; i++){
3799         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3800             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-32));
3801         merge_context_after_encode(s, s->thread_context[i]);
3802     }
3803     emms_c();
3804     return 0;
3805 }
3806
3807 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3808     const int intra= s->mb_intra;
3809     int i;
3810
3811     s->dct_count[intra]++;
3812
3813     for(i=0; i<64; i++){
3814         int level= block[i];
3815
3816         if(level){
3817             if(level>0){
3818                 s->dct_error_sum[intra][i] += level;
3819                 level -= s->dct_offset[intra][i];
3820                 if(level<0) level=0;
3821             }else{
3822                 s->dct_error_sum[intra][i] -= level;
3823                 level += s->dct_offset[intra][i];
3824                 if(level>0) level=0;
3825             }
3826             block[i]= level;
3827         }
3828     }
3829 }
3830
3831 static int dct_quantize_trellis_c(MpegEncContext *s,
3832                                   int16_t *block, int n,
3833                                   int qscale, int *overflow){
3834     const int *qmat;
3835     const uint16_t *matrix;
3836     const uint8_t *scantable= s->intra_scantable.scantable;
3837     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3838     int max=0;
3839     unsigned int threshold1, threshold2;
3840     int bias=0;
3841     int run_tab[65];
3842     int level_tab[65];
3843     int score_tab[65];
3844     int survivor[65];
3845     int survivor_count;
3846     int last_run=0;
3847     int last_level=0;
3848     int last_score= 0;
3849     int last_i;
3850     int coeff[2][64];
3851     int coeff_count[64];
3852     int qmul, qadd, start_i, last_non_zero, i, dc;
3853     const int esc_length= s->ac_esc_length;
3854     uint8_t * length;
3855     uint8_t * last_length;
3856     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3857
3858     s->fdsp.fdct(block);
3859
3860     if(s->dct_error_sum)
3861         s->denoise_dct(s, block);
3862     qmul= qscale*16;
3863     qadd= ((qscale-1)|1)*8;
3864
3865     if (s->mb_intra) {
3866         int q;
3867         if (!s->h263_aic) {
3868             if (n < 4)
3869                 q = s->y_dc_scale;
3870             else
3871                 q = s->c_dc_scale;
3872             q = q << 3;
3873         } else{
3874             /* For AIC we skip quant/dequant of INTRADC */
3875             q = 1 << 3;
3876             qadd=0;
3877         }
3878
3879         /* note: block[0] is assumed to be positive */
3880         block[0] = (block[0] + (q >> 1)) / q;
3881         start_i = 1;
3882         last_non_zero = 0;
3883         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3884         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3885         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3886             bias= 1<<(QMAT_SHIFT-1);
3887
3888         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3889             length     = s->intra_chroma_ac_vlc_length;
3890             last_length= s->intra_chroma_ac_vlc_last_length;
3891         } else {
3892             length     = s->intra_ac_vlc_length;
3893             last_length= s->intra_ac_vlc_last_length;
3894         }
3895     } else {
3896         start_i = 0;
3897         last_non_zero = -1;
3898         qmat = s->q_inter_matrix[qscale];
3899         matrix = s->inter_matrix;
3900         length     = s->inter_ac_vlc_length;
3901         last_length= s->inter_ac_vlc_last_length;
3902     }
3903     last_i= start_i;
3904
3905     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3906     threshold2= (threshold1<<1);
3907
3908     for(i=63; i>=start_i; i--) {
3909         const int j = scantable[i];
3910         int level = block[j] * qmat[j];
3911
3912         if(((unsigned)(level+threshold1))>threshold2){
3913             last_non_zero = i;
3914             break;
3915         }
3916     }
3917
3918     for(i=start_i; i<=last_non_zero; i++) {
3919         const int j = scantable[i];
3920         int level = block[j] * qmat[j];
3921
3922 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3923 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3924         if(((unsigned)(level+threshold1))>threshold2){
3925             if(level>0){
3926                 level= (bias + level)>>QMAT_SHIFT;
3927                 coeff[0][i]= level;
3928                 coeff[1][i]= level-1;
3929 //                coeff[2][k]= level-2;
3930             }else{
3931                 level= (bias - level)>>QMAT_SHIFT;
3932                 coeff[0][i]= -level;
3933                 coeff[1][i]= -level+1;
3934 //                coeff[2][k]= -level+2;
3935             }
3936             coeff_count[i]= FFMIN(level, 2);
3937             av_assert2(coeff_count[i]);
3938             max |=level;
3939         }else{
3940             coeff[0][i]= (level>>31)|1;
3941             coeff_count[i]= 1;
3942         }
3943     }
3944
3945     *overflow= s->max_qcoeff < max; //overflow might have happened
3946
3947     if(last_non_zero < start_i){
3948         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3949         return last_non_zero;
3950     }
3951
3952     score_tab[start_i]= 0;
3953     survivor[0]= start_i;
3954     survivor_count= 1;
3955
3956     for(i=start_i; i<=last_non_zero; i++){
3957         int level_index, j, zero_distortion;
3958         int dct_coeff= FFABS(block[ scantable[i] ]);
3959         int best_score=256*256*256*120;
3960
3961         if (s->fdsp.fdct == ff_fdct_ifast)
3962             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3963         zero_distortion= dct_coeff*dct_coeff;
3964
3965         for(level_index=0; level_index < coeff_count[i]; level_index++){
3966             int distortion;
3967             int level= coeff[level_index][i];
3968             const int alevel= FFABS(level);
3969             int unquant_coeff;
3970
3971             av_assert2(level);
3972
3973             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3974                 unquant_coeff= alevel*qmul + qadd;
3975             } else if(s->out_format == FMT_MJPEG) {
3976                 j = s->idsp.idct_permutation[scantable[i]];
3977                 unquant_coeff = alevel * matrix[j] * 8;
3978             }else{ //MPEG1
3979                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3980                 if(s->mb_intra){
3981                         unquant_coeff = (int)(  alevel  * qscale * matrix[j]) >> 3;
3982                         unquant_coeff =   (unquant_coeff - 1) | 1;
3983                 }else{
3984                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[j])) >> 4;
3985                         unquant_coeff =   (unquant_coeff - 1) | 1;
3986                 }
3987                 unquant_coeff<<= 3;
3988             }
3989
3990             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3991             level+=64;
3992             if((level&(~127)) == 0){
3993                 for(j=survivor_count-1; j>=0; j--){
3994                     int run= i - survivor[j];
3995                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3996                     score += score_tab[i-run];
3997
3998                     if(score < best_score){
3999                         best_score= score;
4000                         run_tab[i+1]= run;
4001                         level_tab[i+1]= level-64;
4002                     }
4003                 }
4004
4005                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4006                     for(j=survivor_count-1; j>=0; j--){
4007                         int run= i - survivor[j];
4008                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4009                         score += score_tab[i-run];
4010                         if(score < last_score){
4011                             last_score= score;
4012                             last_run= run;
4013                             last_level= level-64;
4014                             last_i= i+1;
4015                         }
4016                     }
4017                 }
4018             }else{
4019                 distortion += esc_length*lambda;
4020                 for(j=survivor_count-1; j>=0; j--){
4021                     int run= i - survivor[j];
4022                     int score= distortion + score_tab[i-run];
4023
4024                     if(score < best_score){
4025                         best_score= score;
4026                         run_tab[i+1]= run;
4027                         level_tab[i+1]= level-64;
4028                     }
4029                 }
4030
4031                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4032                   for(j=survivor_count-1; j>=0; j--){
4033                         int run= i - survivor[j];
4034                         int score= distortion + score_tab[i-run];
4035                         if(score < last_score){
4036                             last_score= score;
4037                             last_run= run;
4038                             last_level= level-64;
4039                             last_i= i+1;
4040                         }
4041                     }
4042                 }
4043             }
4044         }
4045
4046         score_tab[i+1]= best_score;
4047
4048         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
4049         if(last_non_zero <= 27){
4050             for(; survivor_count; survivor_count--){
4051                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4052                     break;
4053             }
4054         }else{
4055             for(; survivor_count; survivor_count--){
4056                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4057                     break;
4058             }
4059         }
4060
4061         survivor[ survivor_count++ ]= i+1;
4062     }
4063
4064     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4065         last_score= 256*256*256*120;
4066         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4067             int score= score_tab[i];
4068             if(i) score += lambda*2; //FIXME exacter?
4069
4070             if(score < last_score){
4071                 last_score= score;
4072                 last_i= i;
4073                 last_level= level_tab[i];
4074                 last_run= run_tab[i];
4075             }
4076         }
4077     }
4078
4079     s->coded_score[n] = last_score;
4080
4081     dc= FFABS(block[0]);
4082     last_non_zero= last_i - 1;
4083     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4084
4085     if(last_non_zero < start_i)
4086         return last_non_zero;
4087
4088     if(last_non_zero == 0 && start_i == 0){
4089         int best_level= 0;
4090         int best_score= dc * dc;
4091
4092         for(i=0; i<coeff_count[0]; i++){
4093             int level= coeff[i][0];
4094             int alevel= FFABS(level);
4095             int unquant_coeff, score, distortion;
4096
4097             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4098                     unquant_coeff= (alevel*qmul + qadd)>>3;
4099             }else{ //MPEG1
4100                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) matrix[0])) >> 4;
4101                     unquant_coeff =   (unquant_coeff - 1) | 1;
4102             }
4103             unquant_coeff = (unquant_coeff + 4) >> 3;
4104             unquant_coeff<<= 3 + 3;
4105
4106             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4107             level+=64;
4108             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4109             else                    score= distortion + esc_length*lambda;
4110
4111             if(score < best_score){
4112                 best_score= score;
4113                 best_level= level - 64;
4114             }
4115         }
4116         block[0]= best_level;
4117         s->coded_score[n] = best_score - dc*dc;
4118         if(best_level == 0) return -1;
4119         else                return last_non_zero;
4120     }
4121
4122     i= last_i;
4123     av_assert2(last_level);
4124
4125     block[ perm_scantable[last_non_zero] ]= last_level;
4126     i -= last_run + 1;
4127
4128     for(; i>start_i; i -= run_tab[i] + 1){
4129         block[ perm_scantable[i-1] ]= level_tab[i];
4130     }
4131
4132     return last_non_zero;
4133 }
4134
4135 //#define REFINE_STATS 1
4136 static int16_t basis[64][64];
4137
4138 static void build_basis(uint8_t *perm){
4139     int i, j, x, y;
4140     emms_c();
4141     for(i=0; i<8; i++){
4142         for(j=0; j<8; j++){
4143             for(y=0; y<8; y++){
4144                 for(x=0; x<8; x++){
4145                     double s= 0.25*(1<<BASIS_SHIFT);
4146                     int index= 8*i + j;
4147                     int perm_index= perm[index];
4148                     if(i==0) s*= sqrt(0.5);
4149                     if(j==0) s*= sqrt(0.5);
4150                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4151                 }
4152             }
4153         }
4154     }
4155 }
4156
4157 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4158                         int16_t *block, int16_t *weight, int16_t *orig,
4159                         int n, int qscale){
4160     int16_t rem[64];
4161     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4162     const uint8_t *scantable= s->intra_scantable.scantable;
4163     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4164 //    unsigned int threshold1, threshold2;
4165 //    int bias=0;
4166     int run_tab[65];
4167     int prev_run=0;
4168     int prev_level=0;
4169     int qmul, qadd, start_i, last_non_zero, i, dc;
4170     uint8_t * length;
4171     uint8_t * last_length;
4172     int lambda;
4173     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4174 #ifdef REFINE_STATS
4175 static int count=0;
4176 static int after_last=0;
4177 static int to_zero=0;
4178 static int from_zero=0;
4179 static int raise=0;
4180 static int lower=0;
4181 static int messed_sign=0;
4182 #endif
4183
4184     if(basis[0][0] == 0)
4185         build_basis(s->idsp.idct_permutation);
4186
4187     qmul= qscale*2;
4188     qadd= (qscale-1)|1;
4189     if (s->mb_intra) {
4190         if (!s->h263_aic) {
4191             if (n < 4)
4192                 q = s->y_dc_scale;
4193             else
4194                 q = s->c_dc_scale;
4195         } else{
4196             /* For AIC we skip quant/dequant of INTRADC */
4197             q = 1;
4198             qadd=0;
4199         }
4200         q <<= RECON_SHIFT-3;
4201         /* note: block[0] is assumed to be positive */
4202         dc= block[0]*q;
4203 //        block[0] = (block[0] + (q >> 1)) / q;
4204         start_i = 1;
4205 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4206 //            bias= 1<<(QMAT_SHIFT-1);
4207         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4208             length     = s->intra_chroma_ac_vlc_length;
4209             last_length= s->intra_chroma_ac_vlc_last_length;
4210         } else {
4211             length     = s->intra_ac_vlc_length;
4212             last_length= s->intra_ac_vlc_last_length;
4213         }
4214     } else {
4215         dc= 0;
4216         start_i = 0;
4217         length     = s->inter_ac_vlc_length;
4218         last_length= s->inter_ac_vlc_last_length;
4219     }
4220     last_non_zero = s->block_last_index[n];
4221
4222 #ifdef REFINE_STATS
4223 {START_TIMER
4224 #endif
4225     dc += (1<<(RECON_SHIFT-1));
4226     for(i=0; i<64; i++){
4227         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4228     }
4229 #ifdef REFINE_STATS
4230 STOP_TIMER("memset rem[]")}
4231 #endif
4232     sum=0;
4233     for(i=0; i<64; i++){
4234         int one= 36;
4235         int qns=4;
4236         int w;
4237
4238         w= FFABS(weight[i]) + qns*one;
4239         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4240
4241         weight[i] = w;
4242 //        w=weight[i] = (63*qns + (w/2)) / w;
4243
4244         av_assert2(w>0);
4245         av_assert2(w<(1<<6));
4246         sum += w*w;
4247     }
4248     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4249 #ifdef REFINE_STATS
4250 {START_TIMER
4251 #endif
4252     run=0;
4253     rle_index=0;
4254     for(i=start_i; i<=last_non_zero; i++){
4255         int j= perm_scantable[i];
4256         const int level= block[j];
4257         int coeff;
4258
4259         if(level){
4260             if(level<0) coeff= qmul*level - qadd;
4261             else        coeff= qmul*level + qadd;
4262             run_tab[rle_index++]=run;
4263             run=0;
4264
4265             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4266         }else{
4267             run++;
4268         }
4269     }
4270 #ifdef REFINE_STATS
4271 if(last_non_zero>0){
4272 STOP_TIMER("init rem[]")
4273 }
4274 }
4275
4276 {START_TIMER
4277 #endif
4278     for(;;){
4279         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4280         int best_coeff=0;
4281         int best_change=0;
4282         int run2, best_unquant_change=0, analyze_gradient;
4283 #ifdef REFINE_STATS
4284 {START_TIMER
4285 #endif
4286         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4287
4288         if(analyze_gradient){
4289 #ifdef REFINE_STATS
4290 {START_TIMER
4291 #endif
4292             for(i=0; i<64; i++){
4293                 int w= weight[i];
4294
4295                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4296             }
4297 #ifdef REFINE_STATS
4298 STOP_TIMER("rem*w*w")}
4299 {START_TIMER
4300 #endif
4301             s->fdsp.fdct(d1);
4302 #ifdef REFINE_STATS
4303 STOP_TIMER("dct")}
4304 #endif
4305         }
4306
4307         if(start_i){
4308             const int level= block[0];
4309             int change, old_coeff;
4310
4311             av_assert2(s->mb_intra);
4312
4313             old_coeff= q*level;
4314
4315             for(change=-1; change<=1; change+=2){
4316                 int new_level= level + change;
4317                 int score, new_coeff;
4318
4319                 new_coeff= q*new_level;
4320                 if(new_coeff >= 2048 || new_coeff < 0)
4321                     continue;
4322
4323                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4324                                                   new_coeff - old_coeff);
4325                 if(score<best_score){
4326                     best_score= score;
4327                     best_coeff= 0;
4328                     best_change= change;
4329                     best_unquant_change= new_coeff - old_coeff;
4330                 }
4331             }
4332         }
4333
4334         run=0;
4335         rle_index=0;
4336         run2= run_tab[rle_index++];
4337         prev_level=0;
4338         prev_run=0;
4339
4340         for(i=start_i; i<64; i++){
4341             int j= perm_scantable[i];
4342             const int level= block[j];
4343             int change, old_coeff;
4344
4345             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4346                 break;
4347
4348             if(level){
4349                 if(level<0) old_coeff= qmul*level - qadd;
4350                 else        old_coeff= qmul*level + qadd;
4351                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4352             }else{
4353                 old_coeff=0;
4354                 run2--;
4355                 av_assert2(run2>=0 || i >= last_non_zero );
4356             }
4357
4358             for(change=-1; change<=1; change+=2){
4359                 int new_level= level + change;
4360                 int score, new_coeff, unquant_change;
4361
4362                 score=0;
4363                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4364                    continue;
4365
4366                 if(new_level){
4367                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4368                     else            new_coeff= qmul*new_level + qadd;
4369                     if(new_coeff >= 2048 || new_coeff <= -2048)
4370                         continue;
4371                     //FIXME check for overflow
4372
4373                     if(level){
4374                         if(level < 63 && level > -63){
4375                             if(i < last_non_zero)
4376                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4377                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4378                             else
4379                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4380                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4381                         }
4382                     }else{
4383                         av_assert2(FFABS(new_level)==1);
4384
4385                         if(analyze_gradient){
4386                             int g= d1[ scantable[i] ];
4387                             if(g && (g^new_level) >= 0)
4388                                 continue;
4389                         }
4390
4391                         if(i < last_non_zero){
4392                             int next_i= i + run2 + 1;
4393                             int next_level= block[ perm_scantable[next_i] ] + 64;
4394
4395                             if(next_level&(~127))
4396                                 next_level= 0;
4397
4398                             if(next_i < last_non_zero)
4399                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4400                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4401                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4402                             else
4403                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4404                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4405                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4406                         }else{
4407                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4408                             if(prev_level){
4409                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4410                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4411                             }
4412                         }
4413                     }
4414                 }else{
4415                     new_coeff=0;
4416                     av_assert2(FFABS(level)==1);
4417
4418                     if(i < last_non_zero){
4419                         int next_i= i + run2 + 1;
4420                         int next_level= block[ perm_scantable[next_i] ] + 64;
4421
4422                         if(next_level&(~127))
4423                             next_level= 0;
4424
4425                         if(next_i < last_non_zero)
4426                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4427                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4428                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4429                         else
4430                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4431                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4432                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4433                     }else{
4434                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4435                         if(prev_level){
4436                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4437                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4438                         }
4439                     }
4440                 }
4441
4442                 score *= lambda;
4443
4444                 unquant_change= new_coeff - old_coeff;
4445                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4446
4447                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4448                                                    unquant_change);
4449                 if(score<best_score){
4450                     best_score= score;
4451                     best_coeff= i;
4452                     best_change= change;
4453                     best_unquant_change= unquant_change;
4454                 }
4455             }
4456             if(level){
4457                 prev_level= level + 64;
4458                 if(prev_level&(~127))
4459                     prev_level= 0;
4460                 prev_run= run;
4461                 run=0;
4462             }else{
4463                 run++;
4464             }
4465         }
4466 #ifdef REFINE_STATS
4467 STOP_TIMER("iterative step")}
4468 #endif
4469
4470         if(best_change){
4471             int j= perm_scantable[ best_coeff ];
4472
4473             block[j] += best_change;
4474
4475             if(best_coeff > last_non_zero){
4476                 last_non_zero= best_coeff;
4477                 av_assert2(block[j]);
4478 #ifdef REFINE_STATS
4479 after_last++;
4480 #endif
4481             }else{
4482 #ifdef REFINE_STATS
4483 if(block[j]){
4484     if(block[j] - best_change){
4485         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4486             raise++;
4487         }else{
4488             lower++;
4489         }
4490     }else{
4491         from_zero++;
4492     }
4493 }else{
4494     to_zero++;
4495 }
4496 #endif
4497                 for(; last_non_zero>=start_i; last_non_zero--){
4498                     if(block[perm_scantable[last_non_zero]])
4499                         break;
4500                 }
4501             }
4502 #ifdef REFINE_STATS
4503 count++;
4504 if(256*256*256*64 % count == 0){
4505     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4506 }
4507 #endif
4508             run=0;
4509             rle_index=0;
4510             for(i=start_i; i<=last_non_zero; i++){
4511                 int j= perm_scantable[i];
4512                 const int level= block[j];
4513
4514                  if(level){
4515                      run_tab[rle_index++]=run;
4516                      run=0;
4517                  }else{
4518                      run++;
4519                  }
4520             }
4521
4522             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4523         }else{
4524             break;
4525         }
4526     }
4527 #ifdef REFINE_STATS
4528 if(last_non_zero>0){
4529 STOP_TIMER("iterative search")
4530 }
4531 }
4532 #endif
4533
4534     return last_non_zero;
4535 }
4536
4537 /**
4538  * Permute an 8x8 block according to permuatation.
4539  * @param block the block which will be permuted according to
4540  *              the given permutation vector
4541  * @param permutation the permutation vector
4542  * @param last the last non zero coefficient in scantable order, used to
4543  *             speed the permutation up
4544  * @param scantable the used scantable, this is only used to speed the
4545  *                  permutation up, the block is not (inverse) permutated
4546  *                  to scantable order!
4547  */
4548 static void block_permute(int16_t *block, uint8_t *permutation,
4549                           const uint8_t *scantable, int last)
4550 {
4551     int i;
4552     int16_t temp[64];
4553
4554     if (last <= 0)
4555         return;
4556     //FIXME it is ok but not clean and might fail for some permutations
4557     // if (permutation[1] == 1)
4558     // return;
4559
4560     for (i = 0; i <= last; i++) {
4561         const int j = scantable[i];
4562         temp[j] = block[j];
4563         block[j] = 0;
4564     }
4565
4566     for (i = 0; i <= last; i++) {
4567         const int j = scantable[i];
4568         const int perm_j = permutation[j];
4569         block[perm_j] = temp[j];
4570     }
4571 }
4572
4573 int ff_dct_quantize_c(MpegEncContext *s,
4574                         int16_t *block, int n,
4575                         int qscale, int *overflow)
4576 {
4577     int i, j, level, last_non_zero, q, start_i;
4578     const int *qmat;
4579     const uint8_t *scantable= s->intra_scantable.scantable;
4580     int bias;
4581     int max=0;
4582     unsigned int threshold1, threshold2;
4583
4584     s->fdsp.fdct(block);
4585
4586     if(s->dct_error_sum)
4587         s->denoise_dct(s, block);
4588
4589     if (s->mb_intra) {
4590         if (!s->h263_aic) {
4591             if (n < 4)
4592                 q = s->y_dc_scale;
4593             else
4594                 q = s->c_dc_scale;
4595             q = q << 3;
4596         } else
4597             /* For AIC we skip quant/dequant of INTRADC */
4598             q = 1 << 3;
4599
4600         /* note: block[0] is assumed to be positive */
4601         block[0] = (block[0] + (q >> 1)) / q;
4602         start_i = 1;
4603         last_non_zero = 0;
4604         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4605         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4606     } else {
4607         start_i = 0;
4608         last_non_zero = -1;
4609         qmat = s->q_inter_matrix[qscale];
4610         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4611     }
4612     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4613     threshold2= (threshold1<<1);
4614     for(i=63;i>=start_i;i--) {
4615         j = scantable[i];
4616         level = block[j] * qmat[j];
4617
4618         if(((unsigned)(level+threshold1))>threshold2){
4619             last_non_zero = i;
4620             break;
4621         }else{
4622             block[j]=0;
4623         }
4624     }
4625     for(i=start_i; i<=last_non_zero; i++) {
4626         j = scantable[i];
4627         level = block[j] * qmat[j];
4628
4629 //        if(   bias+level >= (1<<QMAT_SHIFT)
4630 //           || bias-level >= (1<<QMAT_SHIFT)){
4631         if(((unsigned)(level+threshold1))>threshold2){
4632             if(level>0){
4633                 level= (bias + level)>>QMAT_SHIFT;
4634                 block[j]= level;
4635             }else{
4636                 level= (bias - level)>>QMAT_SHIFT;
4637                 block[j]= -level;
4638             }
4639             max |=level;
4640         }else{
4641             block[j]=0;
4642         }
4643     }
4644     *overflow= s->max_qcoeff < max; //overflow might have happened
4645
4646     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4647     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4648         block_permute(block, s->idsp.idct_permutation,
4649                       scantable, last_non_zero);
4650
4651     return last_non_zero;
4652 }
4653
4654 #define OFFSET(x) offsetof(MpegEncContext, x)
4655 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4656 static const AVOption h263_options[] = {
4657     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4658     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4659     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4660     FF_MPV_COMMON_OPTS
4661     { NULL },
4662 };
4663
4664 static const AVClass h263_class = {
4665     .class_name = "H.263 encoder",
4666     .item_name  = av_default_item_name,
4667     .option     = h263_options,
4668     .version    = LIBAVUTIL_VERSION_INT,
4669 };
4670
4671 AVCodec ff_h263_encoder = {
4672     .name           = "h263",
4673     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4674     .type           = AVMEDIA_TYPE_VIDEO,
4675     .id             = AV_CODEC_ID_H263,
4676     .priv_data_size = sizeof(MpegEncContext),
4677     .init           = ff_mpv_encode_init,
4678     .encode2        = ff_mpv_encode_picture,
4679     .close          = ff_mpv_encode_end,
4680     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4681     .priv_class     = &h263_class,
4682 };
4683
4684 static const AVOption h263p_options[] = {
4685     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4686     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4687     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4688     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4689     FF_MPV_COMMON_OPTS
4690     { NULL },
4691 };
4692 static const AVClass h263p_class = {
4693     .class_name = "H.263p encoder",
4694     .item_name  = av_default_item_name,
4695     .option     = h263p_options,
4696     .version    = LIBAVUTIL_VERSION_INT,
4697 };
4698
4699 AVCodec ff_h263p_encoder = {
4700     .name           = "h263p",
4701     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4702     .type           = AVMEDIA_TYPE_VIDEO,
4703     .id             = AV_CODEC_ID_H263P,
4704     .priv_data_size = sizeof(MpegEncContext),
4705     .init           = ff_mpv_encode_init,
4706     .encode2        = ff_mpv_encode_picture,
4707     .close          = ff_mpv_encode_end,
4708     .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
4709     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4710     .priv_class     = &h263p_class,
4711 };
4712
4713 static const AVClass msmpeg4v2_class = {
4714     .class_name = "msmpeg4v2 encoder",
4715     .item_name  = av_default_item_name,
4716     .option     = ff_mpv_generic_options,
4717     .version    = LIBAVUTIL_VERSION_INT,
4718 };
4719
4720 AVCodec ff_msmpeg4v2_encoder = {
4721     .name           = "msmpeg4v2",
4722     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4723     .type           = AVMEDIA_TYPE_VIDEO,
4724     .id             = AV_CODEC_ID_MSMPEG4V2,
4725     .priv_data_size = sizeof(MpegEncContext),
4726     .init           = ff_mpv_encode_init,
4727     .encode2        = ff_mpv_encode_picture,
4728     .close          = ff_mpv_encode_end,
4729     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4730     .priv_class     = &msmpeg4v2_class,
4731 };
4732
4733 static const AVClass msmpeg4v3_class = {
4734     .class_name = "msmpeg4v3 encoder",
4735     .item_name  = av_default_item_name,
4736     .option     = ff_mpv_generic_options,
4737     .version    = LIBAVUTIL_VERSION_INT,
4738 };
4739
4740 AVCodec ff_msmpeg4v3_encoder = {
4741     .name           = "msmpeg4",
4742     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4743     .type           = AVMEDIA_TYPE_VIDEO,
4744     .id             = AV_CODEC_ID_MSMPEG4V3,
4745     .priv_data_size = sizeof(MpegEncContext),
4746     .init           = ff_mpv_encode_init,
4747     .encode2        = ff_mpv_encode_picture,
4748     .close          = ff_mpv_encode_end,
4749     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4750     .priv_class     = &msmpeg4v3_class,
4751 };
4752
4753 static const AVClass wmv1_class = {
4754     .class_name = "wmv1 encoder",
4755     .item_name  = av_default_item_name,
4756     .option     = ff_mpv_generic_options,
4757     .version    = LIBAVUTIL_VERSION_INT,
4758 };
4759
4760 AVCodec ff_wmv1_encoder = {
4761     .name           = "wmv1",
4762     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4763     .type           = AVMEDIA_TYPE_VIDEO,
4764     .id             = AV_CODEC_ID_WMV1,
4765     .priv_data_size = sizeof(MpegEncContext),
4766     .init           = ff_mpv_encode_init,
4767     .encode2        = ff_mpv_encode_picture,
4768     .close          = ff_mpv_encode_end,
4769     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4770     .priv_class     = &wmv1_class,
4771 };