]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge remote-tracking branch 'cigaes/master'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/internal.h"
31 #include "libavutil/intmath.h"
32 #include "libavutil/mathematics.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "dct.h"
37 #include "dsputil.h"
38 #include "mpeg12.h"
39 #include "mpegvideo.h"
40 #include "h261.h"
41 #include "h263.h"
42 #include "mathops.h"
43 #include "mjpegenc.h"
44 #include "msmpeg4.h"
45 #include "faandct.h"
46 #include "thread.h"
47 #include "aandcttab.h"
48 #include "flv.h"
49 #include "mpeg4video.h"
50 #include "internal.h"
51 #include "bytestream.h"
52 #include <limits.h>
53 #include "sp5x.h"
54
55 //#undef NDEBUG
56 //#include <assert.h>
57
58 static int encode_picture(MpegEncContext *s, int picture_number);
59 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
60 static int sse_mb(MpegEncContext *s);
61 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
62 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
63
64 //#define DEBUG
65
66 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
67 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
68
69 const AVOption ff_mpv_generic_options[] = {
70     FF_MPV_COMMON_OPTS
71     { NULL },
72 };
73
74 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
75                        uint16_t (*qmat16)[2][64],
76                        const uint16_t *quant_matrix,
77                        int bias, int qmin, int qmax, int intra)
78 {
79     int qscale;
80     int shift = 0;
81
82     for (qscale = qmin; qscale <= qmax; qscale++) {
83         int i;
84         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
85             dsp->fdct == ff_jpeg_fdct_islow_10 ||
86             dsp->fdct == ff_faandct) {
87             for (i = 0; i < 64; i++) {
88                 const int j = dsp->idct_permutation[i];
89                 /* 16 <= qscale * quant_matrix[i] <= 7905
90                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
91                  *             19952 <=              x  <= 249205026
92                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
93                  *           3444240 >= (1 << 36) / (x) >= 275 */
94
95                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
96                                         (qscale * quant_matrix[j]));
97             }
98         } else if (dsp->fdct == ff_fdct_ifast) {
99             for (i = 0; i < 64; i++) {
100                 const int j = dsp->idct_permutation[i];
101                 /* 16 <= qscale * quant_matrix[i] <= 7905
102                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
103                  *             19952 <=              x  <= 249205026
104                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
105                  *           3444240 >= (1 << 36) / (x) >= 275 */
106
107                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
108                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
109             }
110         } else {
111             for (i = 0; i < 64; i++) {
112                 const int j = dsp->idct_permutation[i];
113                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
114                  * Assume x = qscale * quant_matrix[i]
115                  * So             16 <=              x  <= 7905
116                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
117                  * so          32768 >= (1 << 19) / (x) >= 67 */
118                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
119                                         (qscale * quant_matrix[j]));
120                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
121                 //                    (qscale * quant_matrix[i]);
122                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
123                                        (qscale * quant_matrix[j]);
124
125                 if (qmat16[qscale][0][i] == 0 ||
126                     qmat16[qscale][0][i] == 128 * 256)
127                     qmat16[qscale][0][i] = 128 * 256 - 1;
128                 qmat16[qscale][1][i] =
129                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
130                                 qmat16[qscale][0][i]);
131             }
132         }
133
134         for (i = intra; i < 64; i++) {
135             int64_t max = 8191;
136             if (dsp->fdct == ff_fdct_ifast) {
137                 max = (8191LL * ff_aanscales[i]) >> 14;
138             }
139             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
140                 shift++;
141             }
142         }
143     }
144     if (shift) {
145         av_log(NULL, AV_LOG_INFO,
146                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
147                QMAT_SHIFT - shift);
148     }
149 }
150
151 static inline void update_qscale(MpegEncContext *s)
152 {
153     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
154                 (FF_LAMBDA_SHIFT + 7);
155     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
156
157     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
158                  FF_LAMBDA_SHIFT;
159 }
160
161 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
162 {
163     int i;
164
165     if (matrix) {
166         put_bits(pb, 1, 1);
167         for (i = 0; i < 64; i++) {
168             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
169         }
170     } else
171         put_bits(pb, 1, 0);
172 }
173
174 /**
175  * init s->current_picture.qscale_table from s->lambda_table
176  */
177 void ff_init_qscale_tab(MpegEncContext *s)
178 {
179     int8_t * const qscale_table = s->current_picture.qscale_table;
180     int i;
181
182     for (i = 0; i < s->mb_num; i++) {
183         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
184         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
185         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
186                                                   s->avctx->qmax);
187     }
188 }
189
190 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst,
191                                     const AVFrame *src)
192 {
193     dst->pict_type              = src->pict_type;
194     dst->quality                = src->quality;
195     dst->coded_picture_number   = src->coded_picture_number;
196     dst->display_picture_number = src->display_picture_number;
197     //dst->reference              = src->reference;
198     dst->pts                    = src->pts;
199     dst->interlaced_frame       = src->interlaced_frame;
200     dst->top_field_first        = src->top_field_first;
201 }
202
203 static void update_duplicate_context_after_me(MpegEncContext *dst,
204                                               MpegEncContext *src)
205 {
206 #define COPY(a) dst->a= src->a
207     COPY(pict_type);
208     COPY(current_picture);
209     COPY(f_code);
210     COPY(b_code);
211     COPY(qscale);
212     COPY(lambda);
213     COPY(lambda2);
214     COPY(picture_in_gop_number);
215     COPY(gop_picture_number);
216     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
217     COPY(progressive_frame);    // FIXME don't set in encode_header
218     COPY(partitioned_frame);    // FIXME don't set in encode_header
219 #undef COPY
220 }
221
222 /**
223  * Set the given MpegEncContext to defaults for encoding.
224  * the changed fields will not depend upon the prior state of the MpegEncContext.
225  */
226 static void MPV_encode_defaults(MpegEncContext *s)
227 {
228     int i;
229     ff_MPV_common_defaults(s);
230
231     for (i = -16; i < 16; i++) {
232         default_fcode_tab[i + MAX_MV] = 1;
233     }
234     s->me.mv_penalty = default_mv_penalty;
235     s->fcode_tab     = default_fcode_tab;
236 }
237
238 av_cold int ff_dct_encode_init(MpegEncContext *s) {
239     if (ARCH_X86)
240         ff_dct_encode_init_x86(s);
241
242     if (!s->dct_quantize)
243         s->dct_quantize = ff_dct_quantize_c;
244     if (!s->denoise_dct)
245         s->denoise_dct  = denoise_dct_c;
246     s->fast_dct_quantize = s->dct_quantize;
247     if (s->avctx->trellis)
248         s->dct_quantize  = dct_quantize_trellis_c;
249
250     return 0;
251 }
252
253 /* init video encoder */
254 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
255 {
256     MpegEncContext *s = avctx->priv_data;
257     int i;
258     int chroma_h_shift, chroma_v_shift;
259
260     MPV_encode_defaults(s);
261
262     switch (avctx->codec_id) {
263     case AV_CODEC_ID_MPEG2VIDEO:
264         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
265             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
266             av_log(avctx, AV_LOG_ERROR,
267                    "only YUV420 and YUV422 are supported\n");
268             return -1;
269         }
270         break;
271     case AV_CODEC_ID_LJPEG:
272         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
273             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
274             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
275             avctx->pix_fmt != AV_PIX_FMT_BGR0     &&
276             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
277             avctx->pix_fmt != AV_PIX_FMT_BGR24    &&
278             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
279               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
280               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
281              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
282             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
283             return -1;
284         }
285         break;
286     case AV_CODEC_ID_MJPEG:
287     case AV_CODEC_ID_AMV:
288         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
289             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
290             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
291             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
292               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
293               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
294              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
295             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
296             return -1;
297         }
298         break;
299     default:
300         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
301             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
302             return -1;
303         }
304     }
305
306     switch (avctx->pix_fmt) {
307     case AV_PIX_FMT_YUVJ444P:
308     case AV_PIX_FMT_YUV444P:
309         s->chroma_format = CHROMA_444;
310         break;
311     case AV_PIX_FMT_YUVJ422P:
312     case AV_PIX_FMT_YUV422P:
313         s->chroma_format = CHROMA_422;
314         break;
315     case AV_PIX_FMT_YUVJ420P:
316     case AV_PIX_FMT_YUV420P:
317     default:
318         s->chroma_format = CHROMA_420;
319         break;
320     }
321
322     s->bit_rate = avctx->bit_rate;
323     s->width    = avctx->width;
324     s->height   = avctx->height;
325     if (avctx->gop_size > 600 &&
326         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
327         av_log(avctx, AV_LOG_WARNING,
328                "keyframe interval too large!, reducing it from %d to %d\n",
329                avctx->gop_size, 600);
330         avctx->gop_size = 600;
331     }
332     s->gop_size     = avctx->gop_size;
333     s->avctx        = avctx;
334     s->flags        = avctx->flags;
335     s->flags2       = avctx->flags2;
336     s->max_b_frames = avctx->max_b_frames;
337     s->codec_id     = avctx->codec->id;
338     s->strict_std_compliance = avctx->strict_std_compliance;
339     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
340     s->mpeg_quant         = avctx->mpeg_quant;
341     s->rtp_mode           = !!avctx->rtp_payload_size;
342     s->intra_dc_precision = avctx->intra_dc_precision;
343     s->user_specified_pts = AV_NOPTS_VALUE;
344
345     if (s->gop_size <= 1) {
346         s->intra_only = 1;
347         s->gop_size   = 12;
348     } else {
349         s->intra_only = 0;
350     }
351
352     s->me_method = avctx->me_method;
353
354     /* Fixed QSCALE */
355     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
356
357     s->adaptive_quant = (s->avctx->lumi_masking ||
358                          s->avctx->dark_masking ||
359                          s->avctx->temporal_cplx_masking ||
360                          s->avctx->spatial_cplx_masking  ||
361                          s->avctx->p_masking      ||
362                          s->avctx->border_masking ||
363                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
364                         !s->fixed_qscale;
365
366     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
367
368     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
369         switch(avctx->codec_id) {
370         case AV_CODEC_ID_MPEG1VIDEO:
371         case AV_CODEC_ID_MPEG2VIDEO:
372             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
373             break;
374         case AV_CODEC_ID_MPEG4:
375         case AV_CODEC_ID_MSMPEG4V1:
376         case AV_CODEC_ID_MSMPEG4V2:
377         case AV_CODEC_ID_MSMPEG4V3:
378             if       (avctx->rc_max_rate >= 15000000) {
379                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
380             } else if(avctx->rc_max_rate >=  2000000) {
381                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
382             } else if(avctx->rc_max_rate >=   384000) {
383                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
384             } else
385                 avctx->rc_buffer_size = 40;
386             avctx->rc_buffer_size *= 16384;
387             break;
388         }
389         if (avctx->rc_buffer_size) {
390             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
391         }
392     }
393
394     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
395         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
396         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
397             return -1;
398     }
399
400     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
401         av_log(avctx, AV_LOG_INFO,
402                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
403     }
404
405     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
406         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
407         return -1;
408     }
409
410     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
411         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
412         return -1;
413     }
414
415     if (avctx->rc_max_rate &&
416         avctx->rc_max_rate == avctx->bit_rate &&
417         avctx->rc_max_rate != avctx->rc_min_rate) {
418         av_log(avctx, AV_LOG_INFO,
419                "impossible bitrate constraints, this will fail\n");
420     }
421
422     if (avctx->rc_buffer_size &&
423         avctx->bit_rate * (int64_t)avctx->time_base.num >
424             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
425         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
426         return -1;
427     }
428
429     if (!s->fixed_qscale &&
430         avctx->bit_rate * av_q2d(avctx->time_base) >
431             avctx->bit_rate_tolerance) {
432         av_log(avctx, AV_LOG_ERROR,
433                "bitrate tolerance too small for bitrate\n");
434         return -1;
435     }
436
437     if (s->avctx->rc_max_rate &&
438         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
439         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
440          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
441         90000LL * (avctx->rc_buffer_size - 1) >
442             s->avctx->rc_max_rate * 0xFFFFLL) {
443         av_log(avctx, AV_LOG_INFO,
444                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
445                "specified vbv buffer is too large for the given bitrate!\n");
446     }
447
448     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
449         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
450         s->codec_id != AV_CODEC_ID_FLV1) {
451         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
452         return -1;
453     }
454
455     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
456         av_log(avctx, AV_LOG_ERROR,
457                "OBMC is only supported with simple mb decision\n");
458         return -1;
459     }
460
461     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
462         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
463         return -1;
464     }
465
466     if (s->max_b_frames                    &&
467         s->codec_id != AV_CODEC_ID_MPEG4      &&
468         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
469         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
470         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
471         return -1;
472     }
473
474     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
475          s->codec_id == AV_CODEC_ID_H263  ||
476          s->codec_id == AV_CODEC_ID_H263P) &&
477         (avctx->sample_aspect_ratio.num > 255 ||
478          avctx->sample_aspect_ratio.den > 255)) {
479         av_log(avctx, AV_LOG_WARNING,
480                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
481                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
482         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
483                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
484     }
485
486     if ((s->codec_id == AV_CODEC_ID_H263  ||
487          s->codec_id == AV_CODEC_ID_H263P) &&
488         (avctx->width  > 2048 ||
489          avctx->height > 1152 )) {
490         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
491         return -1;
492     }
493     if ((s->codec_id == AV_CODEC_ID_H263  ||
494          s->codec_id == AV_CODEC_ID_H263P) &&
495         ((avctx->width &3) ||
496          (avctx->height&3) )) {
497         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
498         return -1;
499     }
500
501     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
502         (avctx->width  > 4095 ||
503          avctx->height > 4095 )) {
504         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
505         return -1;
506     }
507
508     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
509         (avctx->width  > 16383 ||
510          avctx->height > 16383 )) {
511         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
512         return -1;
513     }
514
515     if (s->codec_id == AV_CODEC_ID_RV10 &&
516         (avctx->width &15 ||
517          avctx->height&15 )) {
518         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
519         return AVERROR(EINVAL);
520     }
521
522     if (s->codec_id == AV_CODEC_ID_RV20 &&
523         (avctx->width &3 ||
524          avctx->height&3 )) {
525         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
526         return AVERROR(EINVAL);
527     }
528
529     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
530          s->codec_id == AV_CODEC_ID_WMV2) &&
531          avctx->width & 1) {
532          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
533          return -1;
534     }
535
536     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
537         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
538         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
539         return -1;
540     }
541
542     // FIXME mpeg2 uses that too
543     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
544         av_log(avctx, AV_LOG_ERROR,
545                "mpeg2 style quantization not supported by codec\n");
546         return -1;
547     }
548
549     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
550         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
551         return -1;
552     }
553
554     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
555         s->avctx->mb_decision != FF_MB_DECISION_RD) {
556         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
557         return -1;
558     }
559
560     if (s->avctx->scenechange_threshold < 1000000000 &&
561         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
562         av_log(avctx, AV_LOG_ERROR,
563                "closed gop with scene change detection are not supported yet, "
564                "set threshold to 1000000000\n");
565         return -1;
566     }
567
568     if (s->flags & CODEC_FLAG_LOW_DELAY) {
569         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
570             av_log(avctx, AV_LOG_ERROR,
571                   "low delay forcing is only available for mpeg2\n");
572             return -1;
573         }
574         if (s->max_b_frames != 0) {
575             av_log(avctx, AV_LOG_ERROR,
576                    "b frames cannot be used with low delay\n");
577             return -1;
578         }
579     }
580
581     if (s->q_scale_type == 1) {
582         if (avctx->qmax > 12) {
583             av_log(avctx, AV_LOG_ERROR,
584                    "non linear quant only supports qmax <= 12 currently\n");
585             return -1;
586         }
587     }
588
589     if (s->avctx->thread_count > 1         &&
590         s->codec_id != AV_CODEC_ID_MPEG4      &&
591         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
592         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
593         s->codec_id != AV_CODEC_ID_MJPEG      &&
594         (s->codec_id != AV_CODEC_ID_H263P)) {
595         av_log(avctx, AV_LOG_ERROR,
596                "multi threaded encoding not supported by codec\n");
597         return -1;
598     }
599
600     if (s->avctx->thread_count < 1) {
601         av_log(avctx, AV_LOG_ERROR,
602                "automatic thread number detection not supported by codec, "
603                "patch welcome\n");
604         return -1;
605     }
606
607     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
608         s->rtp_mode = 1;
609
610     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
611         s->h263_slice_structured = 1;
612
613     if (!avctx->time_base.den || !avctx->time_base.num) {
614         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
615         return -1;
616     }
617
618     i = (INT_MAX / 2 + 128) >> 8;
619     if (avctx->mb_threshold >= i) {
620         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
621                i - 1);
622         return -1;
623     }
624
625     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
626         av_log(avctx, AV_LOG_INFO,
627                "notice: b_frame_strategy only affects the first pass\n");
628         avctx->b_frame_strategy = 0;
629     }
630
631     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
632     if (i > 1) {
633         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
634         avctx->time_base.den /= i;
635         avctx->time_base.num /= i;
636         //return -1;
637     }
638
639     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
640         // (a + x * 3 / 8) / x
641         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
642         s->inter_quant_bias = 0;
643     } else {
644         s->intra_quant_bias = 0;
645         // (a - x / 4) / x
646         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
647     }
648
649     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
650         s->intra_quant_bias = avctx->intra_quant_bias;
651     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
652         s->inter_quant_bias = avctx->inter_quant_bias;
653
654     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
655
656     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
657
658     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
659         s->avctx->time_base.den > (1 << 16) - 1) {
660         av_log(avctx, AV_LOG_ERROR,
661                "timebase %d/%d not supported by MPEG 4 standard, "
662                "the maximum admitted value for the timebase denominator "
663                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
664                (1 << 16) - 1);
665         return -1;
666     }
667     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
668
669     switch (avctx->codec->id) {
670     case AV_CODEC_ID_MPEG1VIDEO:
671         s->out_format = FMT_MPEG1;
672         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
673         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
674         break;
675     case AV_CODEC_ID_MPEG2VIDEO:
676         s->out_format = FMT_MPEG1;
677         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
678         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
679         s->rtp_mode   = 1;
680         break;
681     case AV_CODEC_ID_LJPEG:
682     case AV_CODEC_ID_MJPEG:
683     case AV_CODEC_ID_AMV:
684         s->out_format = FMT_MJPEG;
685         s->intra_only = 1; /* force intra only for jpeg */
686         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
687             (avctx->pix_fmt == AV_PIX_FMT_BGR0
688              || s->avctx->pix_fmt == AV_PIX_FMT_BGRA
689              || s->avctx->pix_fmt == AV_PIX_FMT_BGR24)) {
690             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
691             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
692             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
693         } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P || avctx->pix_fmt == AV_PIX_FMT_YUVJ444P) {
694             s->mjpeg_vsample[0] = s->mjpeg_vsample[1] = s->mjpeg_vsample[2] = 2;
695             s->mjpeg_hsample[0] = s->mjpeg_hsample[1] = s->mjpeg_hsample[2] = 1;
696         } else {
697             s->mjpeg_vsample[0] = 2;
698             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
699             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
700             s->mjpeg_hsample[0] = 2;
701             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
702             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
703         }
704         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
705             ff_mjpeg_encode_init(s) < 0)
706             return -1;
707         avctx->delay = 0;
708         s->low_delay = 1;
709         break;
710     case AV_CODEC_ID_H261:
711         if (!CONFIG_H261_ENCODER)
712             return -1;
713         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
714             av_log(avctx, AV_LOG_ERROR,
715                    "The specified picture size of %dx%d is not valid for the "
716                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
717                     s->width, s->height);
718             return -1;
719         }
720         s->out_format = FMT_H261;
721         avctx->delay  = 0;
722         s->low_delay  = 1;
723         break;
724     case AV_CODEC_ID_H263:
725         if (!CONFIG_H263_ENCODER)
726             return -1;
727         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
728                              s->width, s->height) == 8) {
729             av_log(avctx, AV_LOG_ERROR,
730                    "The specified picture size of %dx%d is not valid for "
731                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
732                    "352x288, 704x576, and 1408x1152. "
733                    "Try H.263+.\n", s->width, s->height);
734             return -1;
735         }
736         s->out_format = FMT_H263;
737         avctx->delay  = 0;
738         s->low_delay  = 1;
739         break;
740     case AV_CODEC_ID_H263P:
741         s->out_format = FMT_H263;
742         s->h263_plus  = 1;
743         /* Fx */
744         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
745         s->modified_quant  = s->h263_aic;
746         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
747         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
748
749         /* /Fx */
750         /* These are just to be sure */
751         avctx->delay = 0;
752         s->low_delay = 1;
753         break;
754     case AV_CODEC_ID_FLV1:
755         s->out_format      = FMT_H263;
756         s->h263_flv        = 2; /* format = 1; 11-bit codes */
757         s->unrestricted_mv = 1;
758         s->rtp_mode  = 0; /* don't allow GOB */
759         avctx->delay = 0;
760         s->low_delay = 1;
761         break;
762     case AV_CODEC_ID_RV10:
763         s->out_format = FMT_H263;
764         avctx->delay  = 0;
765         s->low_delay  = 1;
766         break;
767     case AV_CODEC_ID_RV20:
768         s->out_format      = FMT_H263;
769         avctx->delay       = 0;
770         s->low_delay       = 1;
771         s->modified_quant  = 1;
772         s->h263_aic        = 1;
773         s->h263_plus       = 1;
774         s->loop_filter     = 1;
775         s->unrestricted_mv = 0;
776         break;
777     case AV_CODEC_ID_MPEG4:
778         s->out_format      = FMT_H263;
779         s->h263_pred       = 1;
780         s->unrestricted_mv = 1;
781         s->low_delay       = s->max_b_frames ? 0 : 1;
782         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
783         break;
784     case AV_CODEC_ID_MSMPEG4V2:
785         s->out_format      = FMT_H263;
786         s->h263_pred       = 1;
787         s->unrestricted_mv = 1;
788         s->msmpeg4_version = 2;
789         avctx->delay       = 0;
790         s->low_delay       = 1;
791         break;
792     case AV_CODEC_ID_MSMPEG4V3:
793         s->out_format        = FMT_H263;
794         s->h263_pred         = 1;
795         s->unrestricted_mv   = 1;
796         s->msmpeg4_version   = 3;
797         s->flipflop_rounding = 1;
798         avctx->delay         = 0;
799         s->low_delay         = 1;
800         break;
801     case AV_CODEC_ID_WMV1:
802         s->out_format        = FMT_H263;
803         s->h263_pred         = 1;
804         s->unrestricted_mv   = 1;
805         s->msmpeg4_version   = 4;
806         s->flipflop_rounding = 1;
807         avctx->delay         = 0;
808         s->low_delay         = 1;
809         break;
810     case AV_CODEC_ID_WMV2:
811         s->out_format        = FMT_H263;
812         s->h263_pred         = 1;
813         s->unrestricted_mv   = 1;
814         s->msmpeg4_version   = 5;
815         s->flipflop_rounding = 1;
816         avctx->delay         = 0;
817         s->low_delay         = 1;
818         break;
819     default:
820         return -1;
821     }
822
823     avctx->has_b_frames = !s->low_delay;
824
825     s->encoding = 1;
826
827     s->progressive_frame    =
828     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
829                                                 CODEC_FLAG_INTERLACED_ME) ||
830                                 s->alternate_scan);
831
832     /* init */
833     if (ff_MPV_common_init(s) < 0)
834         return -1;
835
836     ff_dct_encode_init(s);
837
838     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
839         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
840
841     s->quant_precision = 5;
842
843     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
844     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
845
846     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
847         ff_h261_encode_init(s);
848     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
849         ff_h263_encode_init(s);
850     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
851         ff_msmpeg4_encode_init(s);
852     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
853         && s->out_format == FMT_MPEG1)
854         ff_mpeg1_encode_init(s);
855
856     /* init q matrix */
857     for (i = 0; i < 64; i++) {
858         int j = s->dsp.idct_permutation[i];
859         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
860             s->mpeg_quant) {
861             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
862             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
863         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
864             s->intra_matrix[j] =
865             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
866         } else {
867             /* mpeg1/2 */
868             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
869             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
870         }
871         if (s->avctx->intra_matrix)
872             s->intra_matrix[j] = s->avctx->intra_matrix[i];
873         if (s->avctx->inter_matrix)
874             s->inter_matrix[j] = s->avctx->inter_matrix[i];
875     }
876
877     /* precompute matrix */
878     /* for mjpeg, we do include qscale in the matrix */
879     if (s->out_format != FMT_MJPEG) {
880         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
881                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
882                           31, 1);
883         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
884                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
885                           31, 0);
886     }
887
888     if (ff_rate_control_init(s) < 0)
889         return -1;
890
891     return 0;
892 }
893
894 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
895 {
896     MpegEncContext *s = avctx->priv_data;
897
898     ff_rate_control_uninit(s);
899
900     ff_MPV_common_end(s);
901     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
902         s->out_format == FMT_MJPEG)
903         ff_mjpeg_encode_close(s);
904
905     av_freep(&avctx->extradata);
906
907     return 0;
908 }
909
910 static int get_sae(uint8_t *src, int ref, int stride)
911 {
912     int x,y;
913     int acc = 0;
914
915     for (y = 0; y < 16; y++) {
916         for (x = 0; x < 16; x++) {
917             acc += FFABS(src[x + y * stride] - ref);
918         }
919     }
920
921     return acc;
922 }
923
924 static int get_intra_count(MpegEncContext *s, uint8_t *src,
925                            uint8_t *ref, int stride)
926 {
927     int x, y, w, h;
928     int acc = 0;
929
930     w = s->width  & ~15;
931     h = s->height & ~15;
932
933     for (y = 0; y < h; y += 16) {
934         for (x = 0; x < w; x += 16) {
935             int offset = x + y * stride;
936             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
937                                      16);
938             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
939             int sae  = get_sae(src + offset, mean, stride);
940
941             acc += sae + 500 < sad;
942         }
943     }
944     return acc;
945 }
946
947
948 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
949 {
950     Picture *pic = NULL;
951     int64_t pts;
952     int i, display_picture_number = 0, ret;
953     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
954                                                  (s->low_delay ? 0 : 1);
955     int direct = 1;
956
957     if (pic_arg) {
958         pts = pic_arg->pts;
959         display_picture_number = s->input_picture_number++;
960
961         if (pts != AV_NOPTS_VALUE) {
962             if (s->user_specified_pts != AV_NOPTS_VALUE) {
963                 int64_t time = pts;
964                 int64_t last = s->user_specified_pts;
965
966                 if (time <= last) {
967                     av_log(s->avctx, AV_LOG_ERROR,
968                            "Error, Invalid timestamp=%"PRId64", "
969                            "last=%"PRId64"\n", pts, s->user_specified_pts);
970                     return -1;
971                 }
972
973                 if (!s->low_delay && display_picture_number == 1)
974                     s->dts_delta = time - last;
975             }
976             s->user_specified_pts = pts;
977         } else {
978             if (s->user_specified_pts != AV_NOPTS_VALUE) {
979                 s->user_specified_pts =
980                 pts = s->user_specified_pts + 1;
981                 av_log(s->avctx, AV_LOG_INFO,
982                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
983                        pts);
984             } else {
985                 pts = display_picture_number;
986             }
987         }
988     }
989
990     if (pic_arg) {
991         if (!pic_arg->buf[0])
992             direct = 0;
993         if (pic_arg->linesize[0] != s->linesize)
994             direct = 0;
995         if (pic_arg->linesize[1] != s->uvlinesize)
996             direct = 0;
997         if (pic_arg->linesize[2] != s->uvlinesize)
998             direct = 0;
999
1000         av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
1001                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1002
1003         if (direct) {
1004             i = ff_find_unused_picture(s, 1);
1005             if (i < 0)
1006                 return i;
1007
1008             pic = &s->picture[i];
1009             pic->reference = 3;
1010
1011             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
1012                 return ret;
1013             if (ff_alloc_picture(s, pic, 1) < 0) {
1014                 return -1;
1015             }
1016         } else {
1017             i = ff_find_unused_picture(s, 0);
1018             if (i < 0)
1019                 return i;
1020
1021             pic = &s->picture[i];
1022             pic->reference = 3;
1023
1024             if (ff_alloc_picture(s, pic, 0) < 0) {
1025                 return -1;
1026             }
1027
1028             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1029                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1030                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1031                 // empty
1032             } else {
1033                 int h_chroma_shift, v_chroma_shift;
1034                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1035                                                  &h_chroma_shift,
1036                                                  &v_chroma_shift);
1037
1038                 for (i = 0; i < 3; i++) {
1039                     int src_stride = pic_arg->linesize[i];
1040                     int dst_stride = i ? s->uvlinesize : s->linesize;
1041                     int h_shift = i ? h_chroma_shift : 0;
1042                     int v_shift = i ? v_chroma_shift : 0;
1043                     int w = s->width  >> h_shift;
1044                     int h = s->height >> v_shift;
1045                     uint8_t *src = pic_arg->data[i];
1046                     uint8_t *dst = pic->f.data[i];
1047
1048                     if (s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1049                         h = ((s->height + 15)/16*16) >> v_shift;
1050                     }
1051
1052                     if (!s->avctx->rc_buffer_size)
1053                         dst += INPLACE_OFFSET;
1054
1055                     if (src_stride == dst_stride)
1056                         memcpy(dst, src, src_stride * h);
1057                     else {
1058                         int h2 = h;
1059                         uint8_t *dst2 = dst;
1060                         while (h2--) {
1061                             memcpy(dst2, src, w);
1062                             dst2 += dst_stride;
1063                             src += src_stride;
1064                         }
1065                     }
1066                     if ((s->width & 15) || (s->height & 15)) {
1067                         s->dsp.draw_edges(dst, dst_stride,
1068                                           w, h,
1069                                           16>>h_shift,
1070                                           16>>v_shift,
1071                                           EDGE_BOTTOM);
1072                     }
1073                 }
1074             }
1075         }
1076         copy_picture_attributes(s, &pic->f, pic_arg);
1077         pic->f.display_picture_number = display_picture_number;
1078         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
1079     }
1080
1081     /* shift buffer entries */
1082     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1083         s->input_picture[i - 1] = s->input_picture[i];
1084
1085     s->input_picture[encoding_delay] = (Picture*) pic;
1086
1087     return 0;
1088 }
1089
1090 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1091 {
1092     int x, y, plane;
1093     int score = 0;
1094     int64_t score64 = 0;
1095
1096     for (plane = 0; plane < 3; plane++) {
1097         const int stride = p->f.linesize[plane];
1098         const int bw = plane ? 1 : 2;
1099         for (y = 0; y < s->mb_height * bw; y++) {
1100             for (x = 0; x < s->mb_width * bw; x++) {
1101                 int off = p->shared ? 0 : 16;
1102                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1103                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1104                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1105
1106                 switch (s->avctx->frame_skip_exp) {
1107                 case 0: score    =  FFMAX(score, v);          break;
1108                 case 1: score   += FFABS(v);                  break;
1109                 case 2: score   += v * v;                     break;
1110                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1111                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1112                 }
1113             }
1114         }
1115     }
1116
1117     if (score)
1118         score64 = score;
1119
1120     if (score64 < s->avctx->frame_skip_threshold)
1121         return 1;
1122     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1123         return 1;
1124     return 0;
1125 }
1126
1127 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1128 {
1129     AVPacket pkt = { 0 };
1130     int ret, got_output;
1131
1132     av_init_packet(&pkt);
1133     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1134     if (ret < 0)
1135         return ret;
1136
1137     ret = pkt.size;
1138     av_free_packet(&pkt);
1139     return ret;
1140 }
1141
1142 static int estimate_best_b_count(MpegEncContext *s)
1143 {
1144     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1145     AVCodecContext *c = avcodec_alloc_context3(NULL);
1146     AVFrame input[FF_MAX_B_FRAMES + 2];
1147     const int scale = s->avctx->brd_scale;
1148     int i, j, out_size, p_lambda, b_lambda, lambda2;
1149     int64_t best_rd  = INT64_MAX;
1150     int best_b_count = -1;
1151
1152     av_assert0(scale >= 0 && scale <= 3);
1153
1154     //emms_c();
1155     //s->next_picture_ptr->quality;
1156     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1157     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1158     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1159     if (!b_lambda) // FIXME we should do this somewhere else
1160         b_lambda = p_lambda;
1161     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1162                FF_LAMBDA_SHIFT;
1163
1164     c->width        = s->width  >> scale;
1165     c->height       = s->height >> scale;
1166     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1167                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1168     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1169     c->mb_decision  = s->avctx->mb_decision;
1170     c->me_cmp       = s->avctx->me_cmp;
1171     c->mb_cmp       = s->avctx->mb_cmp;
1172     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1173     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1174     c->time_base    = s->avctx->time_base;
1175     c->max_b_frames = s->max_b_frames;
1176
1177     if (avcodec_open2(c, codec, NULL) < 0)
1178         return -1;
1179
1180     for (i = 0; i < s->max_b_frames + 2; i++) {
1181         int ysize = c->width * c->height;
1182         int csize = (c->width / 2) * (c->height / 2);
1183         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1184                                                 s->next_picture_ptr;
1185
1186         avcodec_get_frame_defaults(&input[i]);
1187         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1188         input[i].data[1]     = input[i].data[0] + ysize;
1189         input[i].data[2]     = input[i].data[1] + csize;
1190         input[i].linesize[0] = c->width;
1191         input[i].linesize[1] =
1192         input[i].linesize[2] = c->width / 2;
1193
1194         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1195             pre_input = *pre_input_ptr;
1196
1197             if (!pre_input.shared && i) {
1198                 pre_input.f.data[0] += INPLACE_OFFSET;
1199                 pre_input.f.data[1] += INPLACE_OFFSET;
1200                 pre_input.f.data[2] += INPLACE_OFFSET;
1201             }
1202
1203             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1204                                  pre_input.f.data[0], pre_input.f.linesize[0],
1205                                  c->width,      c->height);
1206             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1207                                  pre_input.f.data[1], pre_input.f.linesize[1],
1208                                  c->width >> 1, c->height >> 1);
1209             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1210                                  pre_input.f.data[2], pre_input.f.linesize[2],
1211                                  c->width >> 1, c->height >> 1);
1212         }
1213     }
1214
1215     for (j = 0; j < s->max_b_frames + 1; j++) {
1216         int64_t rd = 0;
1217
1218         if (!s->input_picture[j])
1219             break;
1220
1221         c->error[0] = c->error[1] = c->error[2] = 0;
1222
1223         input[0].pict_type = AV_PICTURE_TYPE_I;
1224         input[0].quality   = 1 * FF_QP2LAMBDA;
1225
1226         out_size = encode_frame(c, &input[0]);
1227
1228         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1229
1230         for (i = 0; i < s->max_b_frames + 1; i++) {
1231             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1232
1233             input[i + 1].pict_type = is_p ?
1234                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1235             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1236
1237             out_size = encode_frame(c, &input[i + 1]);
1238
1239             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1240         }
1241
1242         /* get the delayed frames */
1243         while (out_size) {
1244             out_size = encode_frame(c, NULL);
1245             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1246         }
1247
1248         rd += c->error[0] + c->error[1] + c->error[2];
1249
1250         if (rd < best_rd) {
1251             best_rd = rd;
1252             best_b_count = j;
1253         }
1254     }
1255
1256     avcodec_close(c);
1257     av_freep(&c);
1258
1259     for (i = 0; i < s->max_b_frames + 2; i++) {
1260         av_freep(&input[i].data[0]);
1261     }
1262
1263     return best_b_count;
1264 }
1265
1266 static int select_input_picture(MpegEncContext *s)
1267 {
1268     int i, ret;
1269
1270     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1271         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1272     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1273
1274     /* set next picture type & ordering */
1275     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1276         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1277             s->next_picture_ptr == NULL || s->intra_only) {
1278             s->reordered_input_picture[0] = s->input_picture[0];
1279             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1280             s->reordered_input_picture[0]->f.coded_picture_number =
1281                 s->coded_picture_number++;
1282         } else {
1283             int b_frames;
1284
1285             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1286                 if (s->picture_in_gop_number < s->gop_size &&
1287                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1288                     // FIXME check that te gop check above is +-1 correct
1289                     av_frame_unref(&s->input_picture[0]->f);
1290
1291                     emms_c();
1292                     ff_vbv_update(s, 0);
1293
1294                     goto no_output_pic;
1295                 }
1296             }
1297
1298             if (s->flags & CODEC_FLAG_PASS2) {
1299                 for (i = 0; i < s->max_b_frames + 1; i++) {
1300                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1301
1302                     if (pict_num >= s->rc_context.num_entries)
1303                         break;
1304                     if (!s->input_picture[i]) {
1305                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1306                         break;
1307                     }
1308
1309                     s->input_picture[i]->f.pict_type =
1310                         s->rc_context.entry[pict_num].new_pict_type;
1311                 }
1312             }
1313
1314             if (s->avctx->b_frame_strategy == 0) {
1315                 b_frames = s->max_b_frames;
1316                 while (b_frames && !s->input_picture[b_frames])
1317                     b_frames--;
1318             } else if (s->avctx->b_frame_strategy == 1) {
1319                 for (i = 1; i < s->max_b_frames + 1; i++) {
1320                     if (s->input_picture[i] &&
1321                         s->input_picture[i]->b_frame_score == 0) {
1322                         s->input_picture[i]->b_frame_score =
1323                             get_intra_count(s,
1324                                             s->input_picture[i    ]->f.data[0],
1325                                             s->input_picture[i - 1]->f.data[0],
1326                                             s->linesize) + 1;
1327                     }
1328                 }
1329                 for (i = 0; i < s->max_b_frames + 1; i++) {
1330                     if (s->input_picture[i] == NULL ||
1331                         s->input_picture[i]->b_frame_score - 1 >
1332                             s->mb_num / s->avctx->b_sensitivity)
1333                         break;
1334                 }
1335
1336                 b_frames = FFMAX(0, i - 1);
1337
1338                 /* reset scores */
1339                 for (i = 0; i < b_frames + 1; i++) {
1340                     s->input_picture[i]->b_frame_score = 0;
1341                 }
1342             } else if (s->avctx->b_frame_strategy == 2) {
1343                 b_frames = estimate_best_b_count(s);
1344             } else {
1345                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1346                 b_frames = 0;
1347             }
1348
1349             emms_c();
1350
1351             for (i = b_frames - 1; i >= 0; i--) {
1352                 int type = s->input_picture[i]->f.pict_type;
1353                 if (type && type != AV_PICTURE_TYPE_B)
1354                     b_frames = i;
1355             }
1356             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1357                 b_frames == s->max_b_frames) {
1358                 av_log(s->avctx, AV_LOG_ERROR,
1359                        "warning, too many b frames in a row\n");
1360             }
1361
1362             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1363                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1364                     s->gop_size > s->picture_in_gop_number) {
1365                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1366                 } else {
1367                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1368                         b_frames = 0;
1369                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1370                 }
1371             }
1372
1373             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1374                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1375                 b_frames--;
1376
1377             s->reordered_input_picture[0] = s->input_picture[b_frames];
1378             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1379                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1380             s->reordered_input_picture[0]->f.coded_picture_number =
1381                 s->coded_picture_number++;
1382             for (i = 0; i < b_frames; i++) {
1383                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1384                 s->reordered_input_picture[i + 1]->f.pict_type =
1385                     AV_PICTURE_TYPE_B;
1386                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1387                     s->coded_picture_number++;
1388             }
1389         }
1390     }
1391 no_output_pic:
1392     if (s->reordered_input_picture[0]) {
1393         s->reordered_input_picture[0]->reference =
1394            s->reordered_input_picture[0]->f.pict_type !=
1395                AV_PICTURE_TYPE_B ? 3 : 0;
1396
1397         ff_mpeg_unref_picture(s, &s->new_picture);
1398         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1399             return ret;
1400
1401         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1402             // input is a shared pix, so we can't modifiy it -> alloc a new
1403             // one & ensure that the shared one is reuseable
1404
1405             Picture *pic;
1406             int i = ff_find_unused_picture(s, 0);
1407             if (i < 0)
1408                 return i;
1409             pic = &s->picture[i];
1410
1411             pic->reference = s->reordered_input_picture[0]->reference;
1412             if (ff_alloc_picture(s, pic, 0) < 0) {
1413                 return -1;
1414             }
1415
1416             copy_picture_attributes(s, &pic->f,
1417                                     &s->reordered_input_picture[0]->f);
1418
1419             /* mark us unused / free shared pic */
1420             av_frame_unref(&s->reordered_input_picture[0]->f);
1421             s->reordered_input_picture[0]->shared = 0;
1422
1423             s->current_picture_ptr = pic;
1424         } else {
1425             // input is not a shared pix -> reuse buffer for current_pix
1426             s->current_picture_ptr = s->reordered_input_picture[0];
1427             for (i = 0; i < 4; i++) {
1428                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1429             }
1430         }
1431         ff_mpeg_unref_picture(s, &s->current_picture);
1432         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1433                                        s->current_picture_ptr)) < 0)
1434             return ret;
1435
1436         s->picture_number = s->new_picture.f.display_picture_number;
1437     } else {
1438         ff_mpeg_unref_picture(s, &s->new_picture);
1439     }
1440     return 0;
1441 }
1442
1443 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1444                           AVFrame *pic_arg, int *got_packet)
1445 {
1446     MpegEncContext *s = avctx->priv_data;
1447     int i, stuffing_count, ret;
1448     int context_count = s->slice_context_count;
1449
1450     s->picture_in_gop_number++;
1451
1452     if (load_input_picture(s, pic_arg) < 0)
1453         return -1;
1454
1455     if (select_input_picture(s) < 0) {
1456         return -1;
1457     }
1458
1459     /* output? */
1460     if (s->new_picture.f.data[0]) {
1461         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1462             return ret;
1463         if (s->mb_info) {
1464             s->mb_info_ptr = av_packet_new_side_data(pkt,
1465                                  AV_PKT_DATA_H263_MB_INFO,
1466                                  s->mb_width*s->mb_height*12);
1467             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1468         }
1469
1470         for (i = 0; i < context_count; i++) {
1471             int start_y = s->thread_context[i]->start_mb_y;
1472             int   end_y = s->thread_context[i]->  end_mb_y;
1473             int h       = s->mb_height;
1474             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1475             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1476
1477             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1478         }
1479
1480         s->pict_type = s->new_picture.f.pict_type;
1481         //emms_c();
1482         if (ff_MPV_frame_start(s, avctx) < 0)
1483             return -1;
1484 vbv_retry:
1485         if (encode_picture(s, s->picture_number) < 0)
1486             return -1;
1487
1488         avctx->header_bits = s->header_bits;
1489         avctx->mv_bits     = s->mv_bits;
1490         avctx->misc_bits   = s->misc_bits;
1491         avctx->i_tex_bits  = s->i_tex_bits;
1492         avctx->p_tex_bits  = s->p_tex_bits;
1493         avctx->i_count     = s->i_count;
1494         // FIXME f/b_count in avctx
1495         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1496         avctx->skip_count  = s->skip_count;
1497
1498         ff_MPV_frame_end(s);
1499
1500         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1501             ff_mjpeg_encode_picture_trailer(s);
1502
1503         if (avctx->rc_buffer_size) {
1504             RateControlContext *rcc = &s->rc_context;
1505             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1506
1507             if (put_bits_count(&s->pb) > max_size &&
1508                 s->lambda < s->avctx->lmax) {
1509                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1510                                        (s->qscale + 1) / s->qscale);
1511                 if (s->adaptive_quant) {
1512                     int i;
1513                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1514                         s->lambda_table[i] =
1515                             FFMAX(s->lambda_table[i] + 1,
1516                                   s->lambda_table[i] * (s->qscale + 1) /
1517                                   s->qscale);
1518                 }
1519                 s->mb_skipped = 0;        // done in MPV_frame_start()
1520                 // done in encode_picture() so we must undo it
1521                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1522                     if (s->flipflop_rounding          ||
1523                         s->codec_id == AV_CODEC_ID_H263P ||
1524                         s->codec_id == AV_CODEC_ID_MPEG4)
1525                         s->no_rounding ^= 1;
1526                 }
1527                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1528                     s->time_base       = s->last_time_base;
1529                     s->last_non_b_time = s->time - s->pp_time;
1530                 }
1531                 for (i = 0; i < context_count; i++) {
1532                     PutBitContext *pb = &s->thread_context[i]->pb;
1533                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1534                 }
1535                 goto vbv_retry;
1536             }
1537
1538             assert(s->avctx->rc_max_rate);
1539         }
1540
1541         if (s->flags & CODEC_FLAG_PASS1)
1542             ff_write_pass1_stats(s);
1543
1544         for (i = 0; i < 4; i++) {
1545             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1546             avctx->error[i] += s->current_picture_ptr->f.error[i];
1547         }
1548
1549         if (s->flags & CODEC_FLAG_PASS1)
1550             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1551                    avctx->i_tex_bits + avctx->p_tex_bits ==
1552                        put_bits_count(&s->pb));
1553         flush_put_bits(&s->pb);
1554         s->frame_bits  = put_bits_count(&s->pb);
1555
1556         stuffing_count = ff_vbv_update(s, s->frame_bits);
1557         s->stuffing_bits = 8*stuffing_count;
1558         if (stuffing_count) {
1559             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1560                     stuffing_count + 50) {
1561                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1562                 return -1;
1563             }
1564
1565             switch (s->codec_id) {
1566             case AV_CODEC_ID_MPEG1VIDEO:
1567             case AV_CODEC_ID_MPEG2VIDEO:
1568                 while (stuffing_count--) {
1569                     put_bits(&s->pb, 8, 0);
1570                 }
1571             break;
1572             case AV_CODEC_ID_MPEG4:
1573                 put_bits(&s->pb, 16, 0);
1574                 put_bits(&s->pb, 16, 0x1C3);
1575                 stuffing_count -= 4;
1576                 while (stuffing_count--) {
1577                     put_bits(&s->pb, 8, 0xFF);
1578                 }
1579             break;
1580             default:
1581                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1582             }
1583             flush_put_bits(&s->pb);
1584             s->frame_bits  = put_bits_count(&s->pb);
1585         }
1586
1587         /* update mpeg1/2 vbv_delay for CBR */
1588         if (s->avctx->rc_max_rate                          &&
1589             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1590             s->out_format == FMT_MPEG1                     &&
1591             90000LL * (avctx->rc_buffer_size - 1) <=
1592                 s->avctx->rc_max_rate * 0xFFFFLL) {
1593             int vbv_delay, min_delay;
1594             double inbits  = s->avctx->rc_max_rate *
1595                              av_q2d(s->avctx->time_base);
1596             int    minbits = s->frame_bits - 8 *
1597                              (s->vbv_delay_ptr - s->pb.buf - 1);
1598             double bits    = s->rc_context.buffer_index + minbits - inbits;
1599
1600             if (bits < 0)
1601                 av_log(s->avctx, AV_LOG_ERROR,
1602                        "Internal error, negative bits\n");
1603
1604             assert(s->repeat_first_field == 0);
1605
1606             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1607             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1608                         s->avctx->rc_max_rate;
1609
1610             vbv_delay = FFMAX(vbv_delay, min_delay);
1611
1612             av_assert0(vbv_delay < 0xFFFF);
1613
1614             s->vbv_delay_ptr[0] &= 0xF8;
1615             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1616             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1617             s->vbv_delay_ptr[2] &= 0x07;
1618             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1619             avctx->vbv_delay     = vbv_delay * 300;
1620         }
1621         s->total_bits     += s->frame_bits;
1622         avctx->frame_bits  = s->frame_bits;
1623
1624         pkt->pts = s->current_picture.f.pts;
1625         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1626             if (!s->current_picture.f.coded_picture_number)
1627                 pkt->dts = pkt->pts - s->dts_delta;
1628             else
1629                 pkt->dts = s->reordered_pts;
1630             s->reordered_pts = pkt->pts;
1631         } else
1632             pkt->dts = pkt->pts;
1633         if (s->current_picture.f.key_frame)
1634             pkt->flags |= AV_PKT_FLAG_KEY;
1635         if (s->mb_info)
1636             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1637     } else {
1638         s->frame_bits = 0;
1639     }
1640     assert((s->frame_bits & 7) == 0);
1641
1642     pkt->size = s->frame_bits / 8;
1643     *got_packet = !!pkt->size;
1644     return 0;
1645 }
1646
1647 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1648                                                 int n, int threshold)
1649 {
1650     static const char tab[64] = {
1651         3, 2, 2, 1, 1, 1, 1, 1,
1652         1, 1, 1, 1, 1, 1, 1, 1,
1653         1, 1, 1, 1, 1, 1, 1, 1,
1654         0, 0, 0, 0, 0, 0, 0, 0,
1655         0, 0, 0, 0, 0, 0, 0, 0,
1656         0, 0, 0, 0, 0, 0, 0, 0,
1657         0, 0, 0, 0, 0, 0, 0, 0,
1658         0, 0, 0, 0, 0, 0, 0, 0
1659     };
1660     int score = 0;
1661     int run = 0;
1662     int i;
1663     int16_t *block = s->block[n];
1664     const int last_index = s->block_last_index[n];
1665     int skip_dc;
1666
1667     if (threshold < 0) {
1668         skip_dc = 0;
1669         threshold = -threshold;
1670     } else
1671         skip_dc = 1;
1672
1673     /* Are all we could set to zero already zero? */
1674     if (last_index <= skip_dc - 1)
1675         return;
1676
1677     for (i = 0; i <= last_index; i++) {
1678         const int j = s->intra_scantable.permutated[i];
1679         const int level = FFABS(block[j]);
1680         if (level == 1) {
1681             if (skip_dc && i == 0)
1682                 continue;
1683             score += tab[run];
1684             run = 0;
1685         } else if (level > 1) {
1686             return;
1687         } else {
1688             run++;
1689         }
1690     }
1691     if (score >= threshold)
1692         return;
1693     for (i = skip_dc; i <= last_index; i++) {
1694         const int j = s->intra_scantable.permutated[i];
1695         block[j] = 0;
1696     }
1697     if (block[0])
1698         s->block_last_index[n] = 0;
1699     else
1700         s->block_last_index[n] = -1;
1701 }
1702
1703 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1704                                int last_index)
1705 {
1706     int i;
1707     const int maxlevel = s->max_qcoeff;
1708     const int minlevel = s->min_qcoeff;
1709     int overflow = 0;
1710
1711     if (s->mb_intra) {
1712         i = 1; // skip clipping of intra dc
1713     } else
1714         i = 0;
1715
1716     for (; i <= last_index; i++) {
1717         const int j = s->intra_scantable.permutated[i];
1718         int level = block[j];
1719
1720         if (level > maxlevel) {
1721             level = maxlevel;
1722             overflow++;
1723         } else if (level < minlevel) {
1724             level = minlevel;
1725             overflow++;
1726         }
1727
1728         block[j] = level;
1729     }
1730
1731     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1732         av_log(s->avctx, AV_LOG_INFO,
1733                "warning, clipping %d dct coefficients to %d..%d\n",
1734                overflow, minlevel, maxlevel);
1735 }
1736
1737 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1738 {
1739     int x, y;
1740     // FIXME optimize
1741     for (y = 0; y < 8; y++) {
1742         for (x = 0; x < 8; x++) {
1743             int x2, y2;
1744             int sum = 0;
1745             int sqr = 0;
1746             int count = 0;
1747
1748             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1749                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1750                     int v = ptr[x2 + y2 * stride];
1751                     sum += v;
1752                     sqr += v * v;
1753                     count++;
1754                 }
1755             }
1756             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1757         }
1758     }
1759 }
1760
1761 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1762                                                 int motion_x, int motion_y,
1763                                                 int mb_block_height,
1764                                                 int mb_block_width,
1765                                                 int mb_block_count)
1766 {
1767     int16_t weight[12][64];
1768     int16_t orig[12][64];
1769     const int mb_x = s->mb_x;
1770     const int mb_y = s->mb_y;
1771     int i;
1772     int skip_dct[12];
1773     int dct_offset = s->linesize * 8; // default for progressive frames
1774     int uv_dct_offset = s->uvlinesize * 8;
1775     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1776     int wrap_y, wrap_c;
1777
1778     for (i = 0; i < mb_block_count; i++)
1779         skip_dct[i] = s->skipdct;
1780
1781     if (s->adaptive_quant) {
1782         const int last_qp = s->qscale;
1783         const int mb_xy = mb_x + mb_y * s->mb_stride;
1784
1785         s->lambda = s->lambda_table[mb_xy];
1786         update_qscale(s);
1787
1788         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1789             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1790             s->dquant = s->qscale - last_qp;
1791
1792             if (s->out_format == FMT_H263) {
1793                 s->dquant = av_clip(s->dquant, -2, 2);
1794
1795                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1796                     if (!s->mb_intra) {
1797                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1798                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1799                                 s->dquant = 0;
1800                         }
1801                         if (s->mv_type == MV_TYPE_8X8)
1802                             s->dquant = 0;
1803                     }
1804                 }
1805             }
1806         }
1807         ff_set_qscale(s, last_qp + s->dquant);
1808     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1809         ff_set_qscale(s, s->qscale + s->dquant);
1810
1811     wrap_y = s->linesize;
1812     wrap_c = s->uvlinesize;
1813     ptr_y  = s->new_picture.f.data[0] +
1814              (mb_y * 16 * wrap_y)              + mb_x * 16;
1815     ptr_cb = s->new_picture.f.data[1] +
1816              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1817     ptr_cr = s->new_picture.f.data[2] +
1818              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1819
1820     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1821         uint8_t *ebuf = s->edge_emu_buffer + 32;
1822         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
1823         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
1824         s->vdsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1825                                  mb_y * 16, s->width, s->height);
1826         ptr_y = ebuf;
1827         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, mb_block_width,
1828                                  mb_block_height, mb_x * mb_block_width, mb_y * mb_block_height,
1829                                  cw, ch);
1830         ptr_cb = ebuf + 18 * wrap_y;
1831         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr, wrap_c, mb_block_width,
1832                                  mb_block_height, mb_x * mb_block_width, mb_y * mb_block_height,
1833                                  cw, ch);
1834         ptr_cr = ebuf + 18 * wrap_y + 16;
1835     }
1836
1837     if (s->mb_intra) {
1838         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1839             int progressive_score, interlaced_score;
1840
1841             s->interlaced_dct = 0;
1842             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1843                                                     NULL, wrap_y, 8) +
1844                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1845                                                     NULL, wrap_y, 8) - 400;
1846
1847             if (progressive_score > 0) {
1848                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1849                                                        NULL, wrap_y * 2, 8) +
1850                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1851                                                        NULL, wrap_y * 2, 8);
1852                 if (progressive_score > interlaced_score) {
1853                     s->interlaced_dct = 1;
1854
1855                     dct_offset = wrap_y;
1856                     uv_dct_offset = wrap_c;
1857                     wrap_y <<= 1;
1858                     if (s->chroma_format == CHROMA_422 ||
1859                         s->chroma_format == CHROMA_444)
1860                         wrap_c <<= 1;
1861                 }
1862             }
1863         }
1864
1865         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1866         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1867         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1868         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1869
1870         if (s->flags & CODEC_FLAG_GRAY) {
1871             skip_dct[4] = 1;
1872             skip_dct[5] = 1;
1873         } else {
1874             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1875             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1876             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
1877                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
1878                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
1879             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
1880                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
1881                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
1882                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
1883                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
1884                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
1885                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
1886             }
1887         }
1888     } else {
1889         op_pixels_func (*op_pix)[4];
1890         qpel_mc_func (*op_qpix)[16];
1891         uint8_t *dest_y, *dest_cb, *dest_cr;
1892
1893         dest_y  = s->dest[0];
1894         dest_cb = s->dest[1];
1895         dest_cr = s->dest[2];
1896
1897         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1898             op_pix  = s->hdsp.put_pixels_tab;
1899             op_qpix = s->dsp.put_qpel_pixels_tab;
1900         } else {
1901             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1902             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1903         }
1904
1905         if (s->mv_dir & MV_DIR_FORWARD) {
1906             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1907                           s->last_picture.f.data,
1908                           op_pix, op_qpix);
1909             op_pix  = s->hdsp.avg_pixels_tab;
1910             op_qpix = s->dsp.avg_qpel_pixels_tab;
1911         }
1912         if (s->mv_dir & MV_DIR_BACKWARD) {
1913             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1914                           s->next_picture.f.data,
1915                           op_pix, op_qpix);
1916         }
1917
1918         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1919             int progressive_score, interlaced_score;
1920
1921             s->interlaced_dct = 0;
1922             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1923                                                     ptr_y,              wrap_y,
1924                                                     8) +
1925                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1926                                                     ptr_y + wrap_y * 8, wrap_y,
1927                                                     8) - 400;
1928
1929             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1930                 progressive_score -= 400;
1931
1932             if (progressive_score > 0) {
1933                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1934                                                        ptr_y,
1935                                                        wrap_y * 2, 8) +
1936                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1937                                                        ptr_y + wrap_y,
1938                                                        wrap_y * 2, 8);
1939
1940                 if (progressive_score > interlaced_score) {
1941                     s->interlaced_dct = 1;
1942
1943                     dct_offset = wrap_y;
1944                     uv_dct_offset = wrap_c;
1945                     wrap_y <<= 1;
1946                     if (s->chroma_format == CHROMA_422)
1947                         wrap_c <<= 1;
1948                 }
1949             }
1950         }
1951
1952         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1953         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1954         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1955                            dest_y + dct_offset, wrap_y);
1956         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1957                            dest_y + dct_offset + 8, wrap_y);
1958
1959         if (s->flags & CODEC_FLAG_GRAY) {
1960             skip_dct[4] = 1;
1961             skip_dct[5] = 1;
1962         } else {
1963             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1964             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1965             if (!s->chroma_y_shift) { /* 422 */
1966                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
1967                                    dest_cb + uv_dct_offset, wrap_c);
1968                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
1969                                    dest_cr + uv_dct_offset, wrap_c);
1970             }
1971         }
1972         /* pre quantization */
1973         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1974                 2 * s->qscale * s->qscale) {
1975             // FIXME optimize
1976             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1977                               wrap_y, 8) < 20 * s->qscale)
1978                 skip_dct[0] = 1;
1979             if (s->dsp.sad[1](NULL, ptr_y + 8,
1980                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1981                 skip_dct[1] = 1;
1982             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1983                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1984                 skip_dct[2] = 1;
1985             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1986                               dest_y + dct_offset + 8,
1987                               wrap_y, 8) < 20 * s->qscale)
1988                 skip_dct[3] = 1;
1989             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1990                               wrap_c, 8) < 20 * s->qscale)
1991                 skip_dct[4] = 1;
1992             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1993                               wrap_c, 8) < 20 * s->qscale)
1994                 skip_dct[5] = 1;
1995             if (!s->chroma_y_shift) { /* 422 */
1996                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
1997                                   dest_cb + uv_dct_offset,
1998                                   wrap_c, 8) < 20 * s->qscale)
1999                     skip_dct[6] = 1;
2000                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
2001                                   dest_cr + uv_dct_offset,
2002                                   wrap_c, 8) < 20 * s->qscale)
2003                     skip_dct[7] = 1;
2004             }
2005         }
2006     }
2007
2008     if (s->quantizer_noise_shaping) {
2009         if (!skip_dct[0])
2010             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2011         if (!skip_dct[1])
2012             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2013         if (!skip_dct[2])
2014             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2015         if (!skip_dct[3])
2016             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2017         if (!skip_dct[4])
2018             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2019         if (!skip_dct[5])
2020             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2021         if (!s->chroma_y_shift) { /* 422 */
2022             if (!skip_dct[6])
2023                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2024                                   wrap_c);
2025             if (!skip_dct[7])
2026                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2027                                   wrap_c);
2028         }
2029         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2030     }
2031
2032     /* DCT & quantize */
2033     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2034     {
2035         for (i = 0; i < mb_block_count; i++) {
2036             if (!skip_dct[i]) {
2037                 int overflow;
2038                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2039                 // FIXME we could decide to change to quantizer instead of
2040                 // clipping
2041                 // JS: I don't think that would be a good idea it could lower
2042                 //     quality instead of improve it. Just INTRADC clipping
2043                 //     deserves changes in quantizer
2044                 if (overflow)
2045                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2046             } else
2047                 s->block_last_index[i] = -1;
2048         }
2049         if (s->quantizer_noise_shaping) {
2050             for (i = 0; i < mb_block_count; i++) {
2051                 if (!skip_dct[i]) {
2052                     s->block_last_index[i] =
2053                         dct_quantize_refine(s, s->block[i], weight[i],
2054                                             orig[i], i, s->qscale);
2055                 }
2056             }
2057         }
2058
2059         if (s->luma_elim_threshold && !s->mb_intra)
2060             for (i = 0; i < 4; i++)
2061                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2062         if (s->chroma_elim_threshold && !s->mb_intra)
2063             for (i = 4; i < mb_block_count; i++)
2064                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2065
2066         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2067             for (i = 0; i < mb_block_count; i++) {
2068                 if (s->block_last_index[i] == -1)
2069                     s->coded_score[i] = INT_MAX / 256;
2070             }
2071         }
2072     }
2073
2074     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2075         s->block_last_index[4] =
2076         s->block_last_index[5] = 0;
2077         s->block[4][0] =
2078         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2079         if (!s->chroma_y_shift) { /* 422 / 444 */
2080             for (i=6; i<12; i++) {
2081                 s->block_last_index[i] = 0;
2082                 s->block[i][0] = s->block[4][0];
2083             }
2084         }
2085     }
2086
2087     // non c quantize code returns incorrect block_last_index FIXME
2088     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2089         for (i = 0; i < mb_block_count; i++) {
2090             int j;
2091             if (s->block_last_index[i] > 0) {
2092                 for (j = 63; j > 0; j--) {
2093                     if (s->block[i][s->intra_scantable.permutated[j]])
2094                         break;
2095                 }
2096                 s->block_last_index[i] = j;
2097             }
2098         }
2099     }
2100
2101     /* huffman encode */
2102     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2103     case AV_CODEC_ID_MPEG1VIDEO:
2104     case AV_CODEC_ID_MPEG2VIDEO:
2105         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2106             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2107         break;
2108     case AV_CODEC_ID_MPEG4:
2109         if (CONFIG_MPEG4_ENCODER)
2110             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2111         break;
2112     case AV_CODEC_ID_MSMPEG4V2:
2113     case AV_CODEC_ID_MSMPEG4V3:
2114     case AV_CODEC_ID_WMV1:
2115         if (CONFIG_MSMPEG4_ENCODER)
2116             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2117         break;
2118     case AV_CODEC_ID_WMV2:
2119         if (CONFIG_WMV2_ENCODER)
2120             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2121         break;
2122     case AV_CODEC_ID_H261:
2123         if (CONFIG_H261_ENCODER)
2124             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2125         break;
2126     case AV_CODEC_ID_H263:
2127     case AV_CODEC_ID_H263P:
2128     case AV_CODEC_ID_FLV1:
2129     case AV_CODEC_ID_RV10:
2130     case AV_CODEC_ID_RV20:
2131         if (CONFIG_H263_ENCODER)
2132             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2133         break;
2134     case AV_CODEC_ID_MJPEG:
2135     case AV_CODEC_ID_AMV:
2136         if (CONFIG_MJPEG_ENCODER)
2137             ff_mjpeg_encode_mb(s, s->block);
2138         break;
2139     default:
2140         av_assert1(0);
2141     }
2142 }
2143
2144 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2145 {
2146     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2147     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2148     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2149 }
2150
2151 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2152     int i;
2153
2154     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2155
2156     /* mpeg1 */
2157     d->mb_skip_run= s->mb_skip_run;
2158     for(i=0; i<3; i++)
2159         d->last_dc[i] = s->last_dc[i];
2160
2161     /* statistics */
2162     d->mv_bits= s->mv_bits;
2163     d->i_tex_bits= s->i_tex_bits;
2164     d->p_tex_bits= s->p_tex_bits;
2165     d->i_count= s->i_count;
2166     d->f_count= s->f_count;
2167     d->b_count= s->b_count;
2168     d->skip_count= s->skip_count;
2169     d->misc_bits= s->misc_bits;
2170     d->last_bits= 0;
2171
2172     d->mb_skipped= 0;
2173     d->qscale= s->qscale;
2174     d->dquant= s->dquant;
2175
2176     d->esc3_level_length= s->esc3_level_length;
2177 }
2178
2179 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2180     int i;
2181
2182     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2183     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2184
2185     /* mpeg1 */
2186     d->mb_skip_run= s->mb_skip_run;
2187     for(i=0; i<3; i++)
2188         d->last_dc[i] = s->last_dc[i];
2189
2190     /* statistics */
2191     d->mv_bits= s->mv_bits;
2192     d->i_tex_bits= s->i_tex_bits;
2193     d->p_tex_bits= s->p_tex_bits;
2194     d->i_count= s->i_count;
2195     d->f_count= s->f_count;
2196     d->b_count= s->b_count;
2197     d->skip_count= s->skip_count;
2198     d->misc_bits= s->misc_bits;
2199
2200     d->mb_intra= s->mb_intra;
2201     d->mb_skipped= s->mb_skipped;
2202     d->mv_type= s->mv_type;
2203     d->mv_dir= s->mv_dir;
2204     d->pb= s->pb;
2205     if(s->data_partitioning){
2206         d->pb2= s->pb2;
2207         d->tex_pb= s->tex_pb;
2208     }
2209     d->block= s->block;
2210     for(i=0; i<8; i++)
2211         d->block_last_index[i]= s->block_last_index[i];
2212     d->interlaced_dct= s->interlaced_dct;
2213     d->qscale= s->qscale;
2214
2215     d->esc3_level_length= s->esc3_level_length;
2216 }
2217
2218 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2219                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2220                            int *dmin, int *next_block, int motion_x, int motion_y)
2221 {
2222     int score;
2223     uint8_t *dest_backup[3];
2224
2225     copy_context_before_encode(s, backup, type);
2226
2227     s->block= s->blocks[*next_block];
2228     s->pb= pb[*next_block];
2229     if(s->data_partitioning){
2230         s->pb2   = pb2   [*next_block];
2231         s->tex_pb= tex_pb[*next_block];
2232     }
2233
2234     if(*next_block){
2235         memcpy(dest_backup, s->dest, sizeof(s->dest));
2236         s->dest[0] = s->rd_scratchpad;
2237         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2238         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2239         assert(s->linesize >= 32); //FIXME
2240     }
2241
2242     encode_mb(s, motion_x, motion_y);
2243
2244     score= put_bits_count(&s->pb);
2245     if(s->data_partitioning){
2246         score+= put_bits_count(&s->pb2);
2247         score+= put_bits_count(&s->tex_pb);
2248     }
2249
2250     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2251         ff_MPV_decode_mb(s, s->block);
2252
2253         score *= s->lambda2;
2254         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2255     }
2256
2257     if(*next_block){
2258         memcpy(s->dest, dest_backup, sizeof(s->dest));
2259     }
2260
2261     if(score<*dmin){
2262         *dmin= score;
2263         *next_block^=1;
2264
2265         copy_context_after_encode(best, s, type);
2266     }
2267 }
2268
2269 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2270     uint32_t *sq = ff_squareTbl + 256;
2271     int acc=0;
2272     int x,y;
2273
2274     if(w==16 && h==16)
2275         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2276     else if(w==8 && h==8)
2277         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2278
2279     for(y=0; y<h; y++){
2280         for(x=0; x<w; x++){
2281             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2282         }
2283     }
2284
2285     av_assert2(acc>=0);
2286
2287     return acc;
2288 }
2289
2290 static int sse_mb(MpegEncContext *s){
2291     int w= 16;
2292     int h= 16;
2293
2294     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2295     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2296
2297     if(w==16 && h==16)
2298       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2299         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2300                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2301                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2302       }else{
2303         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2304                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2305                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2306       }
2307     else
2308         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2309                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2310                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2311 }
2312
2313 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2314     MpegEncContext *s= *(void**)arg;
2315
2316
2317     s->me.pre_pass=1;
2318     s->me.dia_size= s->avctx->pre_dia_size;
2319     s->first_slice_line=1;
2320     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2321         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2322             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2323         }
2324         s->first_slice_line=0;
2325     }
2326
2327     s->me.pre_pass=0;
2328
2329     return 0;
2330 }
2331
2332 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2333     MpegEncContext *s= *(void**)arg;
2334
2335     ff_check_alignment();
2336
2337     s->me.dia_size= s->avctx->dia_size;
2338     s->first_slice_line=1;
2339     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2340         s->mb_x=0; //for block init below
2341         ff_init_block_index(s);
2342         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2343             s->block_index[0]+=2;
2344             s->block_index[1]+=2;
2345             s->block_index[2]+=2;
2346             s->block_index[3]+=2;
2347
2348             /* compute motion vector & mb_type and store in context */
2349             if(s->pict_type==AV_PICTURE_TYPE_B)
2350                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2351             else
2352                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2353         }
2354         s->first_slice_line=0;
2355     }
2356     return 0;
2357 }
2358
2359 static int mb_var_thread(AVCodecContext *c, void *arg){
2360     MpegEncContext *s= *(void**)arg;
2361     int mb_x, mb_y;
2362
2363     ff_check_alignment();
2364
2365     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2366         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2367             int xx = mb_x * 16;
2368             int yy = mb_y * 16;
2369             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2370             int varc;
2371             int sum = s->dsp.pix_sum(pix, s->linesize);
2372
2373             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2374
2375             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2376             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2377             s->me.mb_var_sum_temp    += varc;
2378         }
2379     }
2380     return 0;
2381 }
2382
2383 static void write_slice_end(MpegEncContext *s){
2384     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2385         if(s->partitioned_frame){
2386             ff_mpeg4_merge_partitions(s);
2387         }
2388
2389         ff_mpeg4_stuffing(&s->pb);
2390     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2391         ff_mjpeg_encode_stuffing(s);
2392     }
2393
2394     avpriv_align_put_bits(&s->pb);
2395     flush_put_bits(&s->pb);
2396
2397     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2398         s->misc_bits+= get_bits_diff(s);
2399 }
2400
2401 static void write_mb_info(MpegEncContext *s)
2402 {
2403     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2404     int offset = put_bits_count(&s->pb);
2405     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2406     int gobn = s->mb_y / s->gob_index;
2407     int pred_x, pred_y;
2408     if (CONFIG_H263_ENCODER)
2409         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2410     bytestream_put_le32(&ptr, offset);
2411     bytestream_put_byte(&ptr, s->qscale);
2412     bytestream_put_byte(&ptr, gobn);
2413     bytestream_put_le16(&ptr, mba);
2414     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2415     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2416     /* 4MV not implemented */
2417     bytestream_put_byte(&ptr, 0); /* hmv2 */
2418     bytestream_put_byte(&ptr, 0); /* vmv2 */
2419 }
2420
2421 static void update_mb_info(MpegEncContext *s, int startcode)
2422 {
2423     if (!s->mb_info)
2424         return;
2425     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2426         s->mb_info_size += 12;
2427         s->prev_mb_info = s->last_mb_info;
2428     }
2429     if (startcode) {
2430         s->prev_mb_info = put_bits_count(&s->pb)/8;
2431         /* This might have incremented mb_info_size above, and we return without
2432          * actually writing any info into that slot yet. But in that case,
2433          * this will be called again at the start of the after writing the
2434          * start code, actually writing the mb info. */
2435         return;
2436     }
2437
2438     s->last_mb_info = put_bits_count(&s->pb)/8;
2439     if (!s->mb_info_size)
2440         s->mb_info_size += 12;
2441     write_mb_info(s);
2442 }
2443
2444 static int encode_thread(AVCodecContext *c, void *arg){
2445     MpegEncContext *s= *(void**)arg;
2446     int mb_x, mb_y, pdif = 0;
2447     int chr_h= 16>>s->chroma_y_shift;
2448     int i, j;
2449     MpegEncContext best_s, backup_s;
2450     uint8_t bit_buf[2][MAX_MB_BYTES];
2451     uint8_t bit_buf2[2][MAX_MB_BYTES];
2452     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2453     PutBitContext pb[2], pb2[2], tex_pb[2];
2454
2455     ff_check_alignment();
2456
2457     for(i=0; i<2; i++){
2458         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2459         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2460         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2461     }
2462
2463     s->last_bits= put_bits_count(&s->pb);
2464     s->mv_bits=0;
2465     s->misc_bits=0;
2466     s->i_tex_bits=0;
2467     s->p_tex_bits=0;
2468     s->i_count=0;
2469     s->f_count=0;
2470     s->b_count=0;
2471     s->skip_count=0;
2472
2473     for(i=0; i<3; i++){
2474         /* init last dc values */
2475         /* note: quant matrix value (8) is implied here */
2476         s->last_dc[i] = 128 << s->intra_dc_precision;
2477
2478         s->current_picture.f.error[i] = 0;
2479     }
2480     if(s->codec_id==AV_CODEC_ID_AMV){
2481         s->last_dc[0] = 128*8/13;
2482         s->last_dc[1] = 128*8/14;
2483         s->last_dc[2] = 128*8/14;
2484     }
2485     s->mb_skip_run = 0;
2486     memset(s->last_mv, 0, sizeof(s->last_mv));
2487
2488     s->last_mv_dir = 0;
2489
2490     switch(s->codec_id){
2491     case AV_CODEC_ID_H263:
2492     case AV_CODEC_ID_H263P:
2493     case AV_CODEC_ID_FLV1:
2494         if (CONFIG_H263_ENCODER)
2495             s->gob_index = ff_h263_get_gob_height(s);
2496         break;
2497     case AV_CODEC_ID_MPEG4:
2498         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2499             ff_mpeg4_init_partitions(s);
2500         break;
2501     }
2502
2503     s->resync_mb_x=0;
2504     s->resync_mb_y=0;
2505     s->first_slice_line = 1;
2506     s->ptr_lastgob = s->pb.buf;
2507     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2508         s->mb_x=0;
2509         s->mb_y= mb_y;
2510
2511         ff_set_qscale(s, s->qscale);
2512         ff_init_block_index(s);
2513
2514         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2515             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2516             int mb_type= s->mb_type[xy];
2517 //            int d;
2518             int dmin= INT_MAX;
2519             int dir;
2520
2521             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2522                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2523                 return -1;
2524             }
2525             if(s->data_partitioning){
2526                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2527                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2528                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2529                     return -1;
2530                 }
2531             }
2532
2533             s->mb_x = mb_x;
2534             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2535             ff_update_block_index(s);
2536
2537             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2538                 ff_h261_reorder_mb_index(s);
2539                 xy= s->mb_y*s->mb_stride + s->mb_x;
2540                 mb_type= s->mb_type[xy];
2541             }
2542
2543             /* write gob / video packet header  */
2544             if(s->rtp_mode){
2545                 int current_packet_size, is_gob_start;
2546
2547                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2548
2549                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2550
2551                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2552
2553                 switch(s->codec_id){
2554                 case AV_CODEC_ID_H263:
2555                 case AV_CODEC_ID_H263P:
2556                     if(!s->h263_slice_structured)
2557                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2558                     break;
2559                 case AV_CODEC_ID_MPEG2VIDEO:
2560                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2561                 case AV_CODEC_ID_MPEG1VIDEO:
2562                     if(s->mb_skip_run) is_gob_start=0;
2563                     break;
2564                 case AV_CODEC_ID_MJPEG:
2565                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2566                     break;
2567                 }
2568
2569                 if(is_gob_start){
2570                     if(s->start_mb_y != mb_y || mb_x!=0){
2571                         write_slice_end(s);
2572                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2573                             ff_mpeg4_init_partitions(s);
2574                         }
2575                     }
2576
2577                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2578                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2579
2580                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2581                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2582                         int d= 100 / s->avctx->error_rate;
2583                         if(r % d == 0){
2584                             current_packet_size=0;
2585                             s->pb.buf_ptr= s->ptr_lastgob;
2586                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2587                         }
2588                     }
2589
2590                     if (s->avctx->rtp_callback){
2591                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2592                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2593                     }
2594                     update_mb_info(s, 1);
2595
2596                     switch(s->codec_id){
2597                     case AV_CODEC_ID_MPEG4:
2598                         if (CONFIG_MPEG4_ENCODER) {
2599                             ff_mpeg4_encode_video_packet_header(s);
2600                             ff_mpeg4_clean_buffers(s);
2601                         }
2602                     break;
2603                     case AV_CODEC_ID_MPEG1VIDEO:
2604                     case AV_CODEC_ID_MPEG2VIDEO:
2605                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2606                             ff_mpeg1_encode_slice_header(s);
2607                             ff_mpeg1_clean_buffers(s);
2608                         }
2609                     break;
2610                     case AV_CODEC_ID_H263:
2611                     case AV_CODEC_ID_H263P:
2612                         if (CONFIG_H263_ENCODER)
2613                             ff_h263_encode_gob_header(s, mb_y);
2614                     break;
2615                     }
2616
2617                     if(s->flags&CODEC_FLAG_PASS1){
2618                         int bits= put_bits_count(&s->pb);
2619                         s->misc_bits+= bits - s->last_bits;
2620                         s->last_bits= bits;
2621                     }
2622
2623                     s->ptr_lastgob += current_packet_size;
2624                     s->first_slice_line=1;
2625                     s->resync_mb_x=mb_x;
2626                     s->resync_mb_y=mb_y;
2627                 }
2628             }
2629
2630             if(  (s->resync_mb_x   == s->mb_x)
2631                && s->resync_mb_y+1 == s->mb_y){
2632                 s->first_slice_line=0;
2633             }
2634
2635             s->mb_skipped=0;
2636             s->dquant=0; //only for QP_RD
2637
2638             update_mb_info(s, 0);
2639
2640             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2641                 int next_block=0;
2642                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2643
2644                 copy_context_before_encode(&backup_s, s, -1);
2645                 backup_s.pb= s->pb;
2646                 best_s.data_partitioning= s->data_partitioning;
2647                 best_s.partitioned_frame= s->partitioned_frame;
2648                 if(s->data_partitioning){
2649                     backup_s.pb2= s->pb2;
2650                     backup_s.tex_pb= s->tex_pb;
2651                 }
2652
2653                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2654                     s->mv_dir = MV_DIR_FORWARD;
2655                     s->mv_type = MV_TYPE_16X16;
2656                     s->mb_intra= 0;
2657                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2658                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2659                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2660                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2661                 }
2662                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2663                     s->mv_dir = MV_DIR_FORWARD;
2664                     s->mv_type = MV_TYPE_FIELD;
2665                     s->mb_intra= 0;
2666                     for(i=0; i<2; i++){
2667                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2668                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2669                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2670                     }
2671                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2672                                  &dmin, &next_block, 0, 0);
2673                 }
2674                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2675                     s->mv_dir = MV_DIR_FORWARD;
2676                     s->mv_type = MV_TYPE_16X16;
2677                     s->mb_intra= 0;
2678                     s->mv[0][0][0] = 0;
2679                     s->mv[0][0][1] = 0;
2680                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2681                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2682                 }
2683                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2684                     s->mv_dir = MV_DIR_FORWARD;
2685                     s->mv_type = MV_TYPE_8X8;
2686                     s->mb_intra= 0;
2687                     for(i=0; i<4; i++){
2688                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2689                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2690                     }
2691                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2692                                  &dmin, &next_block, 0, 0);
2693                 }
2694                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2695                     s->mv_dir = MV_DIR_FORWARD;
2696                     s->mv_type = MV_TYPE_16X16;
2697                     s->mb_intra= 0;
2698                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2699                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2700                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2701                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2702                 }
2703                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2704                     s->mv_dir = MV_DIR_BACKWARD;
2705                     s->mv_type = MV_TYPE_16X16;
2706                     s->mb_intra= 0;
2707                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2708                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2709                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2710                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2711                 }
2712                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2713                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2714                     s->mv_type = MV_TYPE_16X16;
2715                     s->mb_intra= 0;
2716                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2717                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2718                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2719                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2720                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2721                                  &dmin, &next_block, 0, 0);
2722                 }
2723                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2724                     s->mv_dir = MV_DIR_FORWARD;
2725                     s->mv_type = MV_TYPE_FIELD;
2726                     s->mb_intra= 0;
2727                     for(i=0; i<2; i++){
2728                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2729                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2730                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2731                     }
2732                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2733                                  &dmin, &next_block, 0, 0);
2734                 }
2735                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2736                     s->mv_dir = MV_DIR_BACKWARD;
2737                     s->mv_type = MV_TYPE_FIELD;
2738                     s->mb_intra= 0;
2739                     for(i=0; i<2; i++){
2740                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2741                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2742                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2743                     }
2744                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2745                                  &dmin, &next_block, 0, 0);
2746                 }
2747                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2748                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2749                     s->mv_type = MV_TYPE_FIELD;
2750                     s->mb_intra= 0;
2751                     for(dir=0; dir<2; dir++){
2752                         for(i=0; i<2; i++){
2753                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2754                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2755                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2756                         }
2757                     }
2758                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2759                                  &dmin, &next_block, 0, 0);
2760                 }
2761                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2762                     s->mv_dir = 0;
2763                     s->mv_type = MV_TYPE_16X16;
2764                     s->mb_intra= 1;
2765                     s->mv[0][0][0] = 0;
2766                     s->mv[0][0][1] = 0;
2767                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2768                                  &dmin, &next_block, 0, 0);
2769                     if(s->h263_pred || s->h263_aic){
2770                         if(best_s.mb_intra)
2771                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2772                         else
2773                             ff_clean_intra_table_entries(s); //old mode?
2774                     }
2775                 }
2776
2777                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2778                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2779                         const int last_qp= backup_s.qscale;
2780                         int qpi, qp, dc[6];
2781                         int16_t ac[6][16];
2782                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2783                         static const int dquant_tab[4]={-1,1,-2,2};
2784                         int storecoefs = s->mb_intra && s->dc_val[0];
2785
2786                         av_assert2(backup_s.dquant == 0);
2787
2788                         //FIXME intra
2789                         s->mv_dir= best_s.mv_dir;
2790                         s->mv_type = MV_TYPE_16X16;
2791                         s->mb_intra= best_s.mb_intra;
2792                         s->mv[0][0][0] = best_s.mv[0][0][0];
2793                         s->mv[0][0][1] = best_s.mv[0][0][1];
2794                         s->mv[1][0][0] = best_s.mv[1][0][0];
2795                         s->mv[1][0][1] = best_s.mv[1][0][1];
2796
2797                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2798                         for(; qpi<4; qpi++){
2799                             int dquant= dquant_tab[qpi];
2800                             qp= last_qp + dquant;
2801                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2802                                 continue;
2803                             backup_s.dquant= dquant;
2804                             if(storecoefs){
2805                                 for(i=0; i<6; i++){
2806                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2807                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2808                                 }
2809                             }
2810
2811                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2812                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2813                             if(best_s.qscale != qp){
2814                                 if(storecoefs){
2815                                     for(i=0; i<6; i++){
2816                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2817                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2818                                     }
2819                                 }
2820                             }
2821                         }
2822                     }
2823                 }
2824                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2825                     int mx= s->b_direct_mv_table[xy][0];
2826                     int my= s->b_direct_mv_table[xy][1];
2827
2828                     backup_s.dquant = 0;
2829                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2830                     s->mb_intra= 0;
2831                     ff_mpeg4_set_direct_mv(s, mx, my);
2832                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2833                                  &dmin, &next_block, mx, my);
2834                 }
2835                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2836                     backup_s.dquant = 0;
2837                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2838                     s->mb_intra= 0;
2839                     ff_mpeg4_set_direct_mv(s, 0, 0);
2840                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2841                                  &dmin, &next_block, 0, 0);
2842                 }
2843                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2844                     int coded=0;
2845                     for(i=0; i<6; i++)
2846                         coded |= s->block_last_index[i];
2847                     if(coded){
2848                         int mx,my;
2849                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2850                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2851                             mx=my=0; //FIXME find the one we actually used
2852                             ff_mpeg4_set_direct_mv(s, mx, my);
2853                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2854                             mx= s->mv[1][0][0];
2855                             my= s->mv[1][0][1];
2856                         }else{
2857                             mx= s->mv[0][0][0];
2858                             my= s->mv[0][0][1];
2859                         }
2860
2861                         s->mv_dir= best_s.mv_dir;
2862                         s->mv_type = best_s.mv_type;
2863                         s->mb_intra= 0;
2864 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2865                         s->mv[0][0][1] = best_s.mv[0][0][1];
2866                         s->mv[1][0][0] = best_s.mv[1][0][0];
2867                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2868                         backup_s.dquant= 0;
2869                         s->skipdct=1;
2870                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2871                                         &dmin, &next_block, mx, my);
2872                         s->skipdct=0;
2873                     }
2874                 }
2875
2876                 s->current_picture.qscale_table[xy] = best_s.qscale;
2877
2878                 copy_context_after_encode(s, &best_s, -1);
2879
2880                 pb_bits_count= put_bits_count(&s->pb);
2881                 flush_put_bits(&s->pb);
2882                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2883                 s->pb= backup_s.pb;
2884
2885                 if(s->data_partitioning){
2886                     pb2_bits_count= put_bits_count(&s->pb2);
2887                     flush_put_bits(&s->pb2);
2888                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2889                     s->pb2= backup_s.pb2;
2890
2891                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2892                     flush_put_bits(&s->tex_pb);
2893                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2894                     s->tex_pb= backup_s.tex_pb;
2895                 }
2896                 s->last_bits= put_bits_count(&s->pb);
2897
2898                 if (CONFIG_H263_ENCODER &&
2899                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2900                     ff_h263_update_motion_val(s);
2901
2902                 if(next_block==0){ //FIXME 16 vs linesize16
2903                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2904                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2905                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2906                 }
2907
2908                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2909                     ff_MPV_decode_mb(s, s->block);
2910             } else {
2911                 int motion_x = 0, motion_y = 0;
2912                 s->mv_type=MV_TYPE_16X16;
2913                 // only one MB-Type possible
2914
2915                 switch(mb_type){
2916                 case CANDIDATE_MB_TYPE_INTRA:
2917                     s->mv_dir = 0;
2918                     s->mb_intra= 1;
2919                     motion_x= s->mv[0][0][0] = 0;
2920                     motion_y= s->mv[0][0][1] = 0;
2921                     break;
2922                 case CANDIDATE_MB_TYPE_INTER:
2923                     s->mv_dir = MV_DIR_FORWARD;
2924                     s->mb_intra= 0;
2925                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2926                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2927                     break;
2928                 case CANDIDATE_MB_TYPE_INTER_I:
2929                     s->mv_dir = MV_DIR_FORWARD;
2930                     s->mv_type = MV_TYPE_FIELD;
2931                     s->mb_intra= 0;
2932                     for(i=0; i<2; i++){
2933                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2934                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2935                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2936                     }
2937                     break;
2938                 case CANDIDATE_MB_TYPE_INTER4V:
2939                     s->mv_dir = MV_DIR_FORWARD;
2940                     s->mv_type = MV_TYPE_8X8;
2941                     s->mb_intra= 0;
2942                     for(i=0; i<4; i++){
2943                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2944                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2945                     }
2946                     break;
2947                 case CANDIDATE_MB_TYPE_DIRECT:
2948                     if (CONFIG_MPEG4_ENCODER) {
2949                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2950                         s->mb_intra= 0;
2951                         motion_x=s->b_direct_mv_table[xy][0];
2952                         motion_y=s->b_direct_mv_table[xy][1];
2953                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2954                     }
2955                     break;
2956                 case CANDIDATE_MB_TYPE_DIRECT0:
2957                     if (CONFIG_MPEG4_ENCODER) {
2958                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2959                         s->mb_intra= 0;
2960                         ff_mpeg4_set_direct_mv(s, 0, 0);
2961                     }
2962                     break;
2963                 case CANDIDATE_MB_TYPE_BIDIR:
2964                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2965                     s->mb_intra= 0;
2966                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2967                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2968                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2969                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2970                     break;
2971                 case CANDIDATE_MB_TYPE_BACKWARD:
2972                     s->mv_dir = MV_DIR_BACKWARD;
2973                     s->mb_intra= 0;
2974                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2975                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2976                     break;
2977                 case CANDIDATE_MB_TYPE_FORWARD:
2978                     s->mv_dir = MV_DIR_FORWARD;
2979                     s->mb_intra= 0;
2980                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2981                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2982                     break;
2983                 case CANDIDATE_MB_TYPE_FORWARD_I:
2984                     s->mv_dir = MV_DIR_FORWARD;
2985                     s->mv_type = MV_TYPE_FIELD;
2986                     s->mb_intra= 0;
2987                     for(i=0; i<2; i++){
2988                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2989                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2990                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2991                     }
2992                     break;
2993                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2994                     s->mv_dir = MV_DIR_BACKWARD;
2995                     s->mv_type = MV_TYPE_FIELD;
2996                     s->mb_intra= 0;
2997                     for(i=0; i<2; i++){
2998                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2999                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3000                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3001                     }
3002                     break;
3003                 case CANDIDATE_MB_TYPE_BIDIR_I:
3004                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3005                     s->mv_type = MV_TYPE_FIELD;
3006                     s->mb_intra= 0;
3007                     for(dir=0; dir<2; dir++){
3008                         for(i=0; i<2; i++){
3009                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3010                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3011                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3012                         }
3013                     }
3014                     break;
3015                 default:
3016                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3017                 }
3018
3019                 encode_mb(s, motion_x, motion_y);
3020
3021                 // RAL: Update last macroblock type
3022                 s->last_mv_dir = s->mv_dir;
3023
3024                 if (CONFIG_H263_ENCODER &&
3025                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3026                     ff_h263_update_motion_val(s);
3027
3028                 ff_MPV_decode_mb(s, s->block);
3029             }
3030
3031             /* clean the MV table in IPS frames for direct mode in B frames */
3032             if(s->mb_intra /* && I,P,S_TYPE */){
3033                 s->p_mv_table[xy][0]=0;
3034                 s->p_mv_table[xy][1]=0;
3035             }
3036
3037             if(s->flags&CODEC_FLAG_PSNR){
3038                 int w= 16;
3039                 int h= 16;
3040
3041                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3042                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3043
3044                 s->current_picture.f.error[0] += sse(
3045                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3046                     s->dest[0], w, h, s->linesize);
3047                 s->current_picture.f.error[1] += sse(
3048                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3049                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3050                 s->current_picture.f.error[2] += sse(
3051                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3052                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3053             }
3054             if(s->loop_filter){
3055                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3056                     ff_h263_loop_filter(s);
3057             }
3058             av_dlog(s->avctx, "MB %d %d bits\n",
3059                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3060         }
3061     }
3062
3063     //not beautiful here but we must write it before flushing so it has to be here
3064     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3065         ff_msmpeg4_encode_ext_header(s);
3066
3067     write_slice_end(s);
3068
3069     /* Send the last GOB if RTP */
3070     if (s->avctx->rtp_callback) {
3071         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3072         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3073         /* Call the RTP callback to send the last GOB */
3074         emms_c();
3075         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3076     }
3077
3078     return 0;
3079 }
3080
3081 #define MERGE(field) dst->field += src->field; src->field=0
3082 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3083     MERGE(me.scene_change_score);
3084     MERGE(me.mc_mb_var_sum_temp);
3085     MERGE(me.mb_var_sum_temp);
3086 }
3087
3088 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3089     int i;
3090
3091     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3092     MERGE(dct_count[1]);
3093     MERGE(mv_bits);
3094     MERGE(i_tex_bits);
3095     MERGE(p_tex_bits);
3096     MERGE(i_count);
3097     MERGE(f_count);
3098     MERGE(b_count);
3099     MERGE(skip_count);
3100     MERGE(misc_bits);
3101     MERGE(er.error_count);
3102     MERGE(padding_bug_score);
3103     MERGE(current_picture.f.error[0]);
3104     MERGE(current_picture.f.error[1]);
3105     MERGE(current_picture.f.error[2]);
3106
3107     if(dst->avctx->noise_reduction){
3108         for(i=0; i<64; i++){
3109             MERGE(dct_error_sum[0][i]);
3110             MERGE(dct_error_sum[1][i]);
3111         }
3112     }
3113
3114     assert(put_bits_count(&src->pb) % 8 ==0);
3115     assert(put_bits_count(&dst->pb) % 8 ==0);
3116     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3117     flush_put_bits(&dst->pb);
3118 }
3119
3120 static int estimate_qp(MpegEncContext *s, int dry_run){
3121     if (s->next_lambda){
3122         s->current_picture_ptr->f.quality =
3123         s->current_picture.f.quality = s->next_lambda;
3124         if(!dry_run) s->next_lambda= 0;
3125     } else if (!s->fixed_qscale) {
3126         s->current_picture_ptr->f.quality =
3127         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3128         if (s->current_picture.f.quality < 0)
3129             return -1;
3130     }
3131
3132     if(s->adaptive_quant){
3133         switch(s->codec_id){
3134         case AV_CODEC_ID_MPEG4:
3135             if (CONFIG_MPEG4_ENCODER)
3136                 ff_clean_mpeg4_qscales(s);
3137             break;
3138         case AV_CODEC_ID_H263:
3139         case AV_CODEC_ID_H263P:
3140         case AV_CODEC_ID_FLV1:
3141             if (CONFIG_H263_ENCODER)
3142                 ff_clean_h263_qscales(s);
3143             break;
3144         default:
3145             ff_init_qscale_tab(s);
3146         }
3147
3148         s->lambda= s->lambda_table[0];
3149         //FIXME broken
3150     }else
3151         s->lambda = s->current_picture.f.quality;
3152     update_qscale(s);
3153     return 0;
3154 }
3155
3156 /* must be called before writing the header */
3157 static void set_frame_distances(MpegEncContext * s){
3158     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3159     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3160
3161     if(s->pict_type==AV_PICTURE_TYPE_B){
3162         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3163         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3164     }else{
3165         s->pp_time= s->time - s->last_non_b_time;
3166         s->last_non_b_time= s->time;
3167         assert(s->picture_number==0 || s->pp_time > 0);
3168     }
3169 }
3170
3171 static int encode_picture(MpegEncContext *s, int picture_number)
3172 {
3173     int i, ret;
3174     int bits;
3175     int context_count = s->slice_context_count;
3176
3177     s->picture_number = picture_number;
3178
3179     /* Reset the average MB variance */
3180     s->me.mb_var_sum_temp    =
3181     s->me.mc_mb_var_sum_temp = 0;
3182
3183     /* we need to initialize some time vars before we can encode b-frames */
3184     // RAL: Condition added for MPEG1VIDEO
3185     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3186         set_frame_distances(s);
3187     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3188         ff_set_mpeg4_time(s);
3189
3190     s->me.scene_change_score=0;
3191
3192 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3193
3194     if(s->pict_type==AV_PICTURE_TYPE_I){
3195         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3196         else                        s->no_rounding=0;
3197     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3198         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3199             s->no_rounding ^= 1;
3200     }
3201
3202     if(s->flags & CODEC_FLAG_PASS2){
3203         if (estimate_qp(s,1) < 0)
3204             return -1;
3205         ff_get_2pass_fcode(s);
3206     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3207         if(s->pict_type==AV_PICTURE_TYPE_B)
3208             s->lambda= s->last_lambda_for[s->pict_type];
3209         else
3210             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3211         update_qscale(s);
3212     }
3213
3214     if(s->codec_id != AV_CODEC_ID_AMV){
3215         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3216         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3217         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3218         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3219     }
3220
3221     s->mb_intra=0; //for the rate distortion & bit compare functions
3222     for(i=1; i<context_count; i++){
3223         ret = ff_update_duplicate_context(s->thread_context[i], s);
3224         if (ret < 0)
3225             return ret;
3226     }
3227
3228     if(ff_init_me(s)<0)
3229         return -1;
3230
3231     /* Estimate motion for every MB */
3232     if(s->pict_type != AV_PICTURE_TYPE_I){
3233         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3234         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3235         if (s->pict_type != AV_PICTURE_TYPE_B) {
3236             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3237                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3238             }
3239         }
3240
3241         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3242     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3243         /* I-Frame */
3244         for(i=0; i<s->mb_stride*s->mb_height; i++)
3245             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3246
3247         if(!s->fixed_qscale){
3248             /* finding spatial complexity for I-frame rate control */
3249             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3250         }
3251     }
3252     for(i=1; i<context_count; i++){
3253         merge_context_after_me(s, s->thread_context[i]);
3254     }
3255     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3256     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3257     emms_c();
3258
3259     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3260         s->pict_type= AV_PICTURE_TYPE_I;
3261         for(i=0; i<s->mb_stride*s->mb_height; i++)
3262             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3263         if(s->msmpeg4_version >= 3)
3264             s->no_rounding=1;
3265         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3266                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3267     }
3268
3269     if(!s->umvplus){
3270         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3271             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3272
3273             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3274                 int a,b;
3275                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3276                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3277                 s->f_code= FFMAX3(s->f_code, a, b);
3278             }
3279
3280             ff_fix_long_p_mvs(s);
3281             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3282             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3283                 int j;
3284                 for(i=0; i<2; i++){
3285                     for(j=0; j<2; j++)
3286                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3287                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3288                 }
3289             }
3290         }
3291
3292         if(s->pict_type==AV_PICTURE_TYPE_B){
3293             int a, b;
3294
3295             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3296             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3297             s->f_code = FFMAX(a, b);
3298
3299             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3300             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3301             s->b_code = FFMAX(a, b);
3302
3303             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3304             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3305             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3306             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3307             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3308                 int dir, j;
3309                 for(dir=0; dir<2; dir++){
3310                     for(i=0; i<2; i++){
3311                         for(j=0; j<2; j++){
3312                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3313                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3314                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3315                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3316                         }
3317                     }
3318                 }
3319             }
3320         }
3321     }
3322
3323     if (estimate_qp(s, 0) < 0)
3324         return -1;
3325
3326     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3327         s->qscale= 3; //reduce clipping problems
3328
3329     if (s->out_format == FMT_MJPEG) {
3330         /* for mjpeg, we do include qscale in the matrix */
3331         for(i=1;i<64;i++){
3332             int j= s->dsp.idct_permutation[i];
3333
3334             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3335         }
3336         s->y_dc_scale_table=
3337         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3338         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3339         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3340                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3341         s->qscale= 8;
3342     }
3343     if(s->codec_id == AV_CODEC_ID_AMV){
3344         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3345         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3346         for(i=1;i<64;i++){
3347             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3348
3349             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3350             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3351         }
3352         s->y_dc_scale_table= y;
3353         s->c_dc_scale_table= c;
3354         s->intra_matrix[0] = 13;
3355         s->chroma_intra_matrix[0] = 14;
3356         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3357                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3358         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3359                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3360         s->qscale= 8;
3361     }
3362
3363     //FIXME var duplication
3364     s->current_picture_ptr->f.key_frame =
3365     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3366     s->current_picture_ptr->f.pict_type =
3367     s->current_picture.f.pict_type = s->pict_type;
3368
3369     if (s->current_picture.f.key_frame)
3370         s->picture_in_gop_number=0;
3371
3372     s->mb_x = s->mb_y = 0;
3373     s->last_bits= put_bits_count(&s->pb);
3374     switch(s->out_format) {
3375     case FMT_MJPEG:
3376         if (CONFIG_MJPEG_ENCODER)
3377             ff_mjpeg_encode_picture_header(s);
3378         break;
3379     case FMT_H261:
3380         if (CONFIG_H261_ENCODER)
3381             ff_h261_encode_picture_header(s, picture_number);
3382         break;
3383     case FMT_H263:
3384         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3385             ff_wmv2_encode_picture_header(s, picture_number);
3386         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3387             ff_msmpeg4_encode_picture_header(s, picture_number);
3388         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3389             ff_mpeg4_encode_picture_header(s, picture_number);
3390         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3391             ff_rv10_encode_picture_header(s, picture_number);
3392         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3393             ff_rv20_encode_picture_header(s, picture_number);
3394         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3395             ff_flv_encode_picture_header(s, picture_number);
3396         else if (CONFIG_H263_ENCODER)
3397             ff_h263_encode_picture_header(s, picture_number);
3398         break;
3399     case FMT_MPEG1:
3400         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3401             ff_mpeg1_encode_picture_header(s, picture_number);
3402         break;
3403     default:
3404         av_assert0(0);
3405     }
3406     bits= put_bits_count(&s->pb);
3407     s->header_bits= bits - s->last_bits;
3408
3409     for(i=1; i<context_count; i++){
3410         update_duplicate_context_after_me(s->thread_context[i], s);
3411     }
3412     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3413     for(i=1; i<context_count; i++){
3414         merge_context_after_encode(s, s->thread_context[i]);
3415     }
3416     emms_c();
3417     return 0;
3418 }
3419
3420 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3421     const int intra= s->mb_intra;
3422     int i;
3423
3424     s->dct_count[intra]++;
3425
3426     for(i=0; i<64; i++){
3427         int level= block[i];
3428
3429         if(level){
3430             if(level>0){
3431                 s->dct_error_sum[intra][i] += level;
3432                 level -= s->dct_offset[intra][i];
3433                 if(level<0) level=0;
3434             }else{
3435                 s->dct_error_sum[intra][i] -= level;
3436                 level += s->dct_offset[intra][i];
3437                 if(level>0) level=0;
3438             }
3439             block[i]= level;
3440         }
3441     }
3442 }
3443
3444 static int dct_quantize_trellis_c(MpegEncContext *s,
3445                                   int16_t *block, int n,
3446                                   int qscale, int *overflow){
3447     const int *qmat;
3448     const uint8_t *scantable= s->intra_scantable.scantable;
3449     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3450     int max=0;
3451     unsigned int threshold1, threshold2;
3452     int bias=0;
3453     int run_tab[65];
3454     int level_tab[65];
3455     int score_tab[65];
3456     int survivor[65];
3457     int survivor_count;
3458     int last_run=0;
3459     int last_level=0;
3460     int last_score= 0;
3461     int last_i;
3462     int coeff[2][64];
3463     int coeff_count[64];
3464     int qmul, qadd, start_i, last_non_zero, i, dc;
3465     const int esc_length= s->ac_esc_length;
3466     uint8_t * length;
3467     uint8_t * last_length;
3468     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3469
3470     s->dsp.fdct (block);
3471
3472     if(s->dct_error_sum)
3473         s->denoise_dct(s, block);
3474     qmul= qscale*16;
3475     qadd= ((qscale-1)|1)*8;
3476
3477     if (s->mb_intra) {
3478         int q;
3479         if (!s->h263_aic) {
3480             if (n < 4)
3481                 q = s->y_dc_scale;
3482             else
3483                 q = s->c_dc_scale;
3484             q = q << 3;
3485         } else{
3486             /* For AIC we skip quant/dequant of INTRADC */
3487             q = 1 << 3;
3488             qadd=0;
3489         }
3490
3491         /* note: block[0] is assumed to be positive */
3492         block[0] = (block[0] + (q >> 1)) / q;
3493         start_i = 1;
3494         last_non_zero = 0;
3495         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3496         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3497             bias= 1<<(QMAT_SHIFT-1);
3498         length     = s->intra_ac_vlc_length;
3499         last_length= s->intra_ac_vlc_last_length;
3500     } else {
3501         start_i = 0;
3502         last_non_zero = -1;
3503         qmat = s->q_inter_matrix[qscale];
3504         length     = s->inter_ac_vlc_length;
3505         last_length= s->inter_ac_vlc_last_length;
3506     }
3507     last_i= start_i;
3508
3509     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3510     threshold2= (threshold1<<1);
3511
3512     for(i=63; i>=start_i; i--) {
3513         const int j = scantable[i];
3514         int level = block[j] * qmat[j];
3515
3516         if(((unsigned)(level+threshold1))>threshold2){
3517             last_non_zero = i;
3518             break;
3519         }
3520     }
3521
3522     for(i=start_i; i<=last_non_zero; i++) {
3523         const int j = scantable[i];
3524         int level = block[j] * qmat[j];
3525
3526 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3527 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3528         if(((unsigned)(level+threshold1))>threshold2){
3529             if(level>0){
3530                 level= (bias + level)>>QMAT_SHIFT;
3531                 coeff[0][i]= level;
3532                 coeff[1][i]= level-1;
3533 //                coeff[2][k]= level-2;
3534             }else{
3535                 level= (bias - level)>>QMAT_SHIFT;
3536                 coeff[0][i]= -level;
3537                 coeff[1][i]= -level+1;
3538 //                coeff[2][k]= -level+2;
3539             }
3540             coeff_count[i]= FFMIN(level, 2);
3541             av_assert2(coeff_count[i]);
3542             max |=level;
3543         }else{
3544             coeff[0][i]= (level>>31)|1;
3545             coeff_count[i]= 1;
3546         }
3547     }
3548
3549     *overflow= s->max_qcoeff < max; //overflow might have happened
3550
3551     if(last_non_zero < start_i){
3552         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3553         return last_non_zero;
3554     }
3555
3556     score_tab[start_i]= 0;
3557     survivor[0]= start_i;
3558     survivor_count= 1;
3559
3560     for(i=start_i; i<=last_non_zero; i++){
3561         int level_index, j, zero_distortion;
3562         int dct_coeff= FFABS(block[ scantable[i] ]);
3563         int best_score=256*256*256*120;
3564
3565         if (s->dsp.fdct == ff_fdct_ifast)
3566             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3567         zero_distortion= dct_coeff*dct_coeff;
3568
3569         for(level_index=0; level_index < coeff_count[i]; level_index++){
3570             int distortion;
3571             int level= coeff[level_index][i];
3572             const int alevel= FFABS(level);
3573             int unquant_coeff;
3574
3575             av_assert2(level);
3576
3577             if(s->out_format == FMT_H263){
3578                 unquant_coeff= alevel*qmul + qadd;
3579             }else{ //MPEG1
3580                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3581                 if(s->mb_intra){
3582                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3583                         unquant_coeff =   (unquant_coeff - 1) | 1;
3584                 }else{
3585                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3586                         unquant_coeff =   (unquant_coeff - 1) | 1;
3587                 }
3588                 unquant_coeff<<= 3;
3589             }
3590
3591             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3592             level+=64;
3593             if((level&(~127)) == 0){
3594                 for(j=survivor_count-1; j>=0; j--){
3595                     int run= i - survivor[j];
3596                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3597                     score += score_tab[i-run];
3598
3599                     if(score < best_score){
3600                         best_score= score;
3601                         run_tab[i+1]= run;
3602                         level_tab[i+1]= level-64;
3603                     }
3604                 }
3605
3606                 if(s->out_format == FMT_H263){
3607                     for(j=survivor_count-1; j>=0; j--){
3608                         int run= i - survivor[j];
3609                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3610                         score += score_tab[i-run];
3611                         if(score < last_score){
3612                             last_score= score;
3613                             last_run= run;
3614                             last_level= level-64;
3615                             last_i= i+1;
3616                         }
3617                     }
3618                 }
3619             }else{
3620                 distortion += esc_length*lambda;
3621                 for(j=survivor_count-1; j>=0; j--){
3622                     int run= i - survivor[j];
3623                     int score= distortion + score_tab[i-run];
3624
3625                     if(score < best_score){
3626                         best_score= score;
3627                         run_tab[i+1]= run;
3628                         level_tab[i+1]= level-64;
3629                     }
3630                 }
3631
3632                 if(s->out_format == FMT_H263){
3633                   for(j=survivor_count-1; j>=0; j--){
3634                         int run= i - survivor[j];
3635                         int score= distortion + score_tab[i-run];
3636                         if(score < last_score){
3637                             last_score= score;
3638                             last_run= run;
3639                             last_level= level-64;
3640                             last_i= i+1;
3641                         }
3642                     }
3643                 }
3644             }
3645         }
3646
3647         score_tab[i+1]= best_score;
3648
3649         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3650         if(last_non_zero <= 27){
3651             for(; survivor_count; survivor_count--){
3652                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3653                     break;
3654             }
3655         }else{
3656             for(; survivor_count; survivor_count--){
3657                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3658                     break;
3659             }
3660         }
3661
3662         survivor[ survivor_count++ ]= i+1;
3663     }
3664
3665     if(s->out_format != FMT_H263){
3666         last_score= 256*256*256*120;
3667         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3668             int score= score_tab[i];
3669             if(i) score += lambda*2; //FIXME exacter?
3670
3671             if(score < last_score){
3672                 last_score= score;
3673                 last_i= i;
3674                 last_level= level_tab[i];
3675                 last_run= run_tab[i];
3676             }
3677         }
3678     }
3679
3680     s->coded_score[n] = last_score;
3681
3682     dc= FFABS(block[0]);
3683     last_non_zero= last_i - 1;
3684     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3685
3686     if(last_non_zero < start_i)
3687         return last_non_zero;
3688
3689     if(last_non_zero == 0 && start_i == 0){
3690         int best_level= 0;
3691         int best_score= dc * dc;
3692
3693         for(i=0; i<coeff_count[0]; i++){
3694             int level= coeff[i][0];
3695             int alevel= FFABS(level);
3696             int unquant_coeff, score, distortion;
3697
3698             if(s->out_format == FMT_H263){
3699                     unquant_coeff= (alevel*qmul + qadd)>>3;
3700             }else{ //MPEG1
3701                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3702                     unquant_coeff =   (unquant_coeff - 1) | 1;
3703             }
3704             unquant_coeff = (unquant_coeff + 4) >> 3;
3705             unquant_coeff<<= 3 + 3;
3706
3707             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3708             level+=64;
3709             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3710             else                    score= distortion + esc_length*lambda;
3711
3712             if(score < best_score){
3713                 best_score= score;
3714                 best_level= level - 64;
3715             }
3716         }
3717         block[0]= best_level;
3718         s->coded_score[n] = best_score - dc*dc;
3719         if(best_level == 0) return -1;
3720         else                return last_non_zero;
3721     }
3722
3723     i= last_i;
3724     av_assert2(last_level);
3725
3726     block[ perm_scantable[last_non_zero] ]= last_level;
3727     i -= last_run + 1;
3728
3729     for(; i>start_i; i -= run_tab[i] + 1){
3730         block[ perm_scantable[i-1] ]= level_tab[i];
3731     }
3732
3733     return last_non_zero;
3734 }
3735
3736 //#define REFINE_STATS 1
3737 static int16_t basis[64][64];
3738
3739 static void build_basis(uint8_t *perm){
3740     int i, j, x, y;
3741     emms_c();
3742     for(i=0; i<8; i++){
3743         for(j=0; j<8; j++){
3744             for(y=0; y<8; y++){
3745                 for(x=0; x<8; x++){
3746                     double s= 0.25*(1<<BASIS_SHIFT);
3747                     int index= 8*i + j;
3748                     int perm_index= perm[index];
3749                     if(i==0) s*= sqrt(0.5);
3750                     if(j==0) s*= sqrt(0.5);
3751                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3752                 }
3753             }
3754         }
3755     }
3756 }
3757
3758 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3759                         int16_t *block, int16_t *weight, int16_t *orig,
3760                         int n, int qscale){
3761     int16_t rem[64];
3762     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3763     const uint8_t *scantable= s->intra_scantable.scantable;
3764     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3765 //    unsigned int threshold1, threshold2;
3766 //    int bias=0;
3767     int run_tab[65];
3768     int prev_run=0;
3769     int prev_level=0;
3770     int qmul, qadd, start_i, last_non_zero, i, dc;
3771     uint8_t * length;
3772     uint8_t * last_length;
3773     int lambda;
3774     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3775 #ifdef REFINE_STATS
3776 static int count=0;
3777 static int after_last=0;
3778 static int to_zero=0;
3779 static int from_zero=0;
3780 static int raise=0;
3781 static int lower=0;
3782 static int messed_sign=0;
3783 #endif
3784
3785     if(basis[0][0] == 0)
3786         build_basis(s->dsp.idct_permutation);
3787
3788     qmul= qscale*2;
3789     qadd= (qscale-1)|1;
3790     if (s->mb_intra) {
3791         if (!s->h263_aic) {
3792             if (n < 4)
3793                 q = s->y_dc_scale;
3794             else
3795                 q = s->c_dc_scale;
3796         } else{
3797             /* For AIC we skip quant/dequant of INTRADC */
3798             q = 1;
3799             qadd=0;
3800         }
3801         q <<= RECON_SHIFT-3;
3802         /* note: block[0] is assumed to be positive */
3803         dc= block[0]*q;
3804 //        block[0] = (block[0] + (q >> 1)) / q;
3805         start_i = 1;
3806 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3807 //            bias= 1<<(QMAT_SHIFT-1);
3808         length     = s->intra_ac_vlc_length;
3809         last_length= s->intra_ac_vlc_last_length;
3810     } else {
3811         dc= 0;
3812         start_i = 0;
3813         length     = s->inter_ac_vlc_length;
3814         last_length= s->inter_ac_vlc_last_length;
3815     }
3816     last_non_zero = s->block_last_index[n];
3817
3818 #ifdef REFINE_STATS
3819 {START_TIMER
3820 #endif
3821     dc += (1<<(RECON_SHIFT-1));
3822     for(i=0; i<64; i++){
3823         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3824     }
3825 #ifdef REFINE_STATS
3826 STOP_TIMER("memset rem[]")}
3827 #endif
3828     sum=0;
3829     for(i=0; i<64; i++){
3830         int one= 36;
3831         int qns=4;
3832         int w;
3833
3834         w= FFABS(weight[i]) + qns*one;
3835         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3836
3837         weight[i] = w;
3838 //        w=weight[i] = (63*qns + (w/2)) / w;
3839
3840         av_assert2(w>0);
3841         av_assert2(w<(1<<6));
3842         sum += w*w;
3843     }
3844     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3845 #ifdef REFINE_STATS
3846 {START_TIMER
3847 #endif
3848     run=0;
3849     rle_index=0;
3850     for(i=start_i; i<=last_non_zero; i++){
3851         int j= perm_scantable[i];
3852         const int level= block[j];
3853         int coeff;
3854
3855         if(level){
3856             if(level<0) coeff= qmul*level - qadd;
3857             else        coeff= qmul*level + qadd;
3858             run_tab[rle_index++]=run;
3859             run=0;
3860
3861             s->dsp.add_8x8basis(rem, basis[j], coeff);
3862         }else{
3863             run++;
3864         }
3865     }
3866 #ifdef REFINE_STATS
3867 if(last_non_zero>0){
3868 STOP_TIMER("init rem[]")
3869 }
3870 }
3871
3872 {START_TIMER
3873 #endif
3874     for(;;){
3875         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3876         int best_coeff=0;
3877         int best_change=0;
3878         int run2, best_unquant_change=0, analyze_gradient;
3879 #ifdef REFINE_STATS
3880 {START_TIMER
3881 #endif
3882         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3883
3884         if(analyze_gradient){
3885 #ifdef REFINE_STATS
3886 {START_TIMER
3887 #endif
3888             for(i=0; i<64; i++){
3889                 int w= weight[i];
3890
3891                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3892             }
3893 #ifdef REFINE_STATS
3894 STOP_TIMER("rem*w*w")}
3895 {START_TIMER
3896 #endif
3897             s->dsp.fdct(d1);
3898 #ifdef REFINE_STATS
3899 STOP_TIMER("dct")}
3900 #endif
3901         }
3902
3903         if(start_i){
3904             const int level= block[0];
3905             int change, old_coeff;
3906
3907             av_assert2(s->mb_intra);
3908
3909             old_coeff= q*level;
3910
3911             for(change=-1; change<=1; change+=2){
3912                 int new_level= level + change;
3913                 int score, new_coeff;
3914
3915                 new_coeff= q*new_level;
3916                 if(new_coeff >= 2048 || new_coeff < 0)
3917                     continue;
3918
3919                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3920                 if(score<best_score){
3921                     best_score= score;
3922                     best_coeff= 0;
3923                     best_change= change;
3924                     best_unquant_change= new_coeff - old_coeff;
3925                 }
3926             }
3927         }
3928
3929         run=0;
3930         rle_index=0;
3931         run2= run_tab[rle_index++];
3932         prev_level=0;
3933         prev_run=0;
3934
3935         for(i=start_i; i<64; i++){
3936             int j= perm_scantable[i];
3937             const int level= block[j];
3938             int change, old_coeff;
3939
3940             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3941                 break;
3942
3943             if(level){
3944                 if(level<0) old_coeff= qmul*level - qadd;
3945                 else        old_coeff= qmul*level + qadd;
3946                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3947             }else{
3948                 old_coeff=0;
3949                 run2--;
3950                 av_assert2(run2>=0 || i >= last_non_zero );
3951             }
3952
3953             for(change=-1; change<=1; change+=2){
3954                 int new_level= level + change;
3955                 int score, new_coeff, unquant_change;
3956
3957                 score=0;
3958                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3959                    continue;
3960
3961                 if(new_level){
3962                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3963                     else            new_coeff= qmul*new_level + qadd;
3964                     if(new_coeff >= 2048 || new_coeff <= -2048)
3965                         continue;
3966                     //FIXME check for overflow
3967
3968                     if(level){
3969                         if(level < 63 && level > -63){
3970                             if(i < last_non_zero)
3971                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3972                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3973                             else
3974                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3975                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3976                         }
3977                     }else{
3978                         av_assert2(FFABS(new_level)==1);
3979
3980                         if(analyze_gradient){
3981                             int g= d1[ scantable[i] ];
3982                             if(g && (g^new_level) >= 0)
3983                                 continue;
3984                         }
3985
3986                         if(i < last_non_zero){
3987                             int next_i= i + run2 + 1;
3988                             int next_level= block[ perm_scantable[next_i] ] + 64;
3989
3990                             if(next_level&(~127))
3991                                 next_level= 0;
3992
3993                             if(next_i < last_non_zero)
3994                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3995                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3996                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3997                             else
3998                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3999                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4000                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4001                         }else{
4002                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4003                             if(prev_level){
4004                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4005                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4006                             }
4007                         }
4008                     }
4009                 }else{
4010                     new_coeff=0;
4011                     av_assert2(FFABS(level)==1);
4012
4013                     if(i < last_non_zero){
4014                         int next_i= i + run2 + 1;
4015                         int next_level= block[ perm_scantable[next_i] ] + 64;
4016
4017                         if(next_level&(~127))
4018                             next_level= 0;
4019
4020                         if(next_i < last_non_zero)
4021                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4022                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4023                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4024                         else
4025                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4026                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4027                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4028                     }else{
4029                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4030                         if(prev_level){
4031                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4032                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4033                         }
4034                     }
4035                 }
4036
4037                 score *= lambda;
4038
4039                 unquant_change= new_coeff - old_coeff;
4040                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4041
4042                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4043                 if(score<best_score){
4044                     best_score= score;
4045                     best_coeff= i;
4046                     best_change= change;
4047                     best_unquant_change= unquant_change;
4048                 }
4049             }
4050             if(level){
4051                 prev_level= level + 64;
4052                 if(prev_level&(~127))
4053                     prev_level= 0;
4054                 prev_run= run;
4055                 run=0;
4056             }else{
4057                 run++;
4058             }
4059         }
4060 #ifdef REFINE_STATS
4061 STOP_TIMER("iterative step")}
4062 #endif
4063
4064         if(best_change){
4065             int j= perm_scantable[ best_coeff ];
4066
4067             block[j] += best_change;
4068
4069             if(best_coeff > last_non_zero){
4070                 last_non_zero= best_coeff;
4071                 av_assert2(block[j]);
4072 #ifdef REFINE_STATS
4073 after_last++;
4074 #endif
4075             }else{
4076 #ifdef REFINE_STATS
4077 if(block[j]){
4078     if(block[j] - best_change){
4079         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4080             raise++;
4081         }else{
4082             lower++;
4083         }
4084     }else{
4085         from_zero++;
4086     }
4087 }else{
4088     to_zero++;
4089 }
4090 #endif
4091                 for(; last_non_zero>=start_i; last_non_zero--){
4092                     if(block[perm_scantable[last_non_zero]])
4093                         break;
4094                 }
4095             }
4096 #ifdef REFINE_STATS
4097 count++;
4098 if(256*256*256*64 % count == 0){
4099     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4100 }
4101 #endif
4102             run=0;
4103             rle_index=0;
4104             for(i=start_i; i<=last_non_zero; i++){
4105                 int j= perm_scantable[i];
4106                 const int level= block[j];
4107
4108                  if(level){
4109                      run_tab[rle_index++]=run;
4110                      run=0;
4111                  }else{
4112                      run++;
4113                  }
4114             }
4115
4116             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4117         }else{
4118             break;
4119         }
4120     }
4121 #ifdef REFINE_STATS
4122 if(last_non_zero>0){
4123 STOP_TIMER("iterative search")
4124 }
4125 }
4126 #endif
4127
4128     return last_non_zero;
4129 }
4130
4131 int ff_dct_quantize_c(MpegEncContext *s,
4132                         int16_t *block, int n,
4133                         int qscale, int *overflow)
4134 {
4135     int i, j, level, last_non_zero, q, start_i;
4136     const int *qmat;
4137     const uint8_t *scantable= s->intra_scantable.scantable;
4138     int bias;
4139     int max=0;
4140     unsigned int threshold1, threshold2;
4141
4142     s->dsp.fdct (block);
4143
4144     if(s->dct_error_sum)
4145         s->denoise_dct(s, block);
4146
4147     if (s->mb_intra) {
4148         if (!s->h263_aic) {
4149             if (n < 4)
4150                 q = s->y_dc_scale;
4151             else
4152                 q = s->c_dc_scale;
4153             q = q << 3;
4154         } else
4155             /* For AIC we skip quant/dequant of INTRADC */
4156             q = 1 << 3;
4157
4158         /* note: block[0] is assumed to be positive */
4159         block[0] = (block[0] + (q >> 1)) / q;
4160         start_i = 1;
4161         last_non_zero = 0;
4162         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4163         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4164     } else {
4165         start_i = 0;
4166         last_non_zero = -1;
4167         qmat = s->q_inter_matrix[qscale];
4168         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4169     }
4170     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4171     threshold2= (threshold1<<1);
4172     for(i=63;i>=start_i;i--) {
4173         j = scantable[i];
4174         level = block[j] * qmat[j];
4175
4176         if(((unsigned)(level+threshold1))>threshold2){
4177             last_non_zero = i;
4178             break;
4179         }else{
4180             block[j]=0;
4181         }
4182     }
4183     for(i=start_i; i<=last_non_zero; i++) {
4184         j = scantable[i];
4185         level = block[j] * qmat[j];
4186
4187 //        if(   bias+level >= (1<<QMAT_SHIFT)
4188 //           || bias-level >= (1<<QMAT_SHIFT)){
4189         if(((unsigned)(level+threshold1))>threshold2){
4190             if(level>0){
4191                 level= (bias + level)>>QMAT_SHIFT;
4192                 block[j]= level;
4193             }else{
4194                 level= (bias - level)>>QMAT_SHIFT;
4195                 block[j]= -level;
4196             }
4197             max |=level;
4198         }else{
4199             block[j]=0;
4200         }
4201     }
4202     *overflow= s->max_qcoeff < max; //overflow might have happened
4203
4204     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4205     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4206         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4207
4208     return last_non_zero;
4209 }
4210
4211 #define OFFSET(x) offsetof(MpegEncContext, x)
4212 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4213 static const AVOption h263_options[] = {
4214     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4215     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4216     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4217     FF_MPV_COMMON_OPTS
4218     { NULL },
4219 };
4220
4221 static const AVClass h263_class = {
4222     .class_name = "H.263 encoder",
4223     .item_name  = av_default_item_name,
4224     .option     = h263_options,
4225     .version    = LIBAVUTIL_VERSION_INT,
4226 };
4227
4228 AVCodec ff_h263_encoder = {
4229     .name           = "h263",
4230     .type           = AVMEDIA_TYPE_VIDEO,
4231     .id             = AV_CODEC_ID_H263,
4232     .priv_data_size = sizeof(MpegEncContext),
4233     .init           = ff_MPV_encode_init,
4234     .encode2        = ff_MPV_encode_picture,
4235     .close          = ff_MPV_encode_end,
4236     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4237     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4238     .priv_class     = &h263_class,
4239 };
4240
4241 static const AVOption h263p_options[] = {
4242     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4243     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4244     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4245     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4246     FF_MPV_COMMON_OPTS
4247     { NULL },
4248 };
4249 static const AVClass h263p_class = {
4250     .class_name = "H.263p encoder",
4251     .item_name  = av_default_item_name,
4252     .option     = h263p_options,
4253     .version    = LIBAVUTIL_VERSION_INT,
4254 };
4255
4256 AVCodec ff_h263p_encoder = {
4257     .name           = "h263p",
4258     .type           = AVMEDIA_TYPE_VIDEO,
4259     .id             = AV_CODEC_ID_H263P,
4260     .priv_data_size = sizeof(MpegEncContext),
4261     .init           = ff_MPV_encode_init,
4262     .encode2        = ff_MPV_encode_picture,
4263     .close          = ff_MPV_encode_end,
4264     .capabilities   = CODEC_CAP_SLICE_THREADS,
4265     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4266     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4267     .priv_class     = &h263p_class,
4268 };
4269
4270 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4271
4272 AVCodec ff_msmpeg4v2_encoder = {
4273     .name           = "msmpeg4v2",
4274     .type           = AVMEDIA_TYPE_VIDEO,
4275     .id             = AV_CODEC_ID_MSMPEG4V2,
4276     .priv_data_size = sizeof(MpegEncContext),
4277     .init           = ff_MPV_encode_init,
4278     .encode2        = ff_MPV_encode_picture,
4279     .close          = ff_MPV_encode_end,
4280     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4281     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4282     .priv_class     = &msmpeg4v2_class,
4283 };
4284
4285 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4286
4287 AVCodec ff_msmpeg4v3_encoder = {
4288     .name           = "msmpeg4",
4289     .type           = AVMEDIA_TYPE_VIDEO,
4290     .id             = AV_CODEC_ID_MSMPEG4V3,
4291     .priv_data_size = sizeof(MpegEncContext),
4292     .init           = ff_MPV_encode_init,
4293     .encode2        = ff_MPV_encode_picture,
4294     .close          = ff_MPV_encode_end,
4295     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4296     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4297     .priv_class     = &msmpeg4v3_class,
4298 };
4299
4300 FF_MPV_GENERIC_CLASS(wmv1)
4301
4302 AVCodec ff_wmv1_encoder = {
4303     .name           = "wmv1",
4304     .type           = AVMEDIA_TYPE_VIDEO,
4305     .id             = AV_CODEC_ID_WMV1,
4306     .priv_data_size = sizeof(MpegEncContext),
4307     .init           = ff_MPV_encode_init,
4308     .encode2        = ff_MPV_encode_picture,
4309     .close          = ff_MPV_encode_end,
4310     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4311     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4312     .priv_class     = &wmv1_class,
4313 };