]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
pcm: switch to ff_alloc_packet2().
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include "bytestream.h"
47 #include <limits.h>
48 #include "sp5x.h"
49
50 //#undef NDEBUG
51 //#include <assert.h>
52
53 static int encode_picture(MpegEncContext *s, int picture_number);
54 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
55 static int sse_mb(MpegEncContext *s);
56 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
57 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
58
59 /* enable all paranoid tests for rounding, overflows, etc... */
60 //#define PARANOID
61
62 //#define DEBUG
63
64 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
65 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
66
67 const AVOption ff_mpv_generic_options[] = {
68     FF_MPV_COMMON_OPTS
69     { NULL },
70 };
71
72 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
73                        uint16_t (*qmat16)[2][64],
74                        const uint16_t *quant_matrix,
75                        int bias, int qmin, int qmax, int intra)
76 {
77     int qscale;
78     int shift = 0;
79
80     for (qscale = qmin; qscale <= qmax; qscale++) {
81         int i;
82         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
83             dsp->fdct == ff_jpeg_fdct_islow_10
84 #ifdef FAAN_POSTSCALE
85             || dsp->fdct == ff_faandct
86 #endif
87             ) {
88             for (i = 0; i < 64; i++) {
89                 const int j = dsp->idct_permutation[i];
90                 /* 16 <= qscale * quant_matrix[i] <= 7905
91                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
92                  *             19952 <=              x  <= 249205026
93                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
94                  *           3444240 >= (1 << 36) / (x) >= 275 */
95
96                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
97                                         (qscale * quant_matrix[j]));
98             }
99         } else if (dsp->fdct == ff_fdct_ifast
100 #ifndef FAAN_POSTSCALE
101                    || dsp->fdct == ff_faandct
102 #endif
103                    ) {
104             for (i = 0; i < 64; i++) {
105                 const int j = dsp->idct_permutation[i];
106                 /* 16 <= qscale * quant_matrix[i] <= 7905
107                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
108                  *             19952 <=              x  <= 249205026
109                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
110                  *           3444240 >= (1 << 36) / (x) >= 275 */
111
112                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
113                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
114             }
115         } else {
116             for (i = 0; i < 64; i++) {
117                 const int j = dsp->idct_permutation[i];
118                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
119                  * Assume x = qscale * quant_matrix[i]
120                  * So             16 <=              x  <= 7905
121                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
122                  * so          32768 >= (1 << 19) / (x) >= 67 */
123                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
124                                         (qscale * quant_matrix[j]));
125                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
126                 //                    (qscale * quant_matrix[i]);
127                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
128                                        (qscale * quant_matrix[j]);
129
130                 if (qmat16[qscale][0][i] == 0 ||
131                     qmat16[qscale][0][i] == 128 * 256)
132                     qmat16[qscale][0][i] = 128 * 256 - 1;
133                 qmat16[qscale][1][i] =
134                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
135                                 qmat16[qscale][0][i]);
136             }
137         }
138
139         for (i = intra; i < 64; i++) {
140             int64_t max = 8191;
141             if (dsp->fdct == ff_fdct_ifast
142 #ifndef FAAN_POSTSCALE
143                 || dsp->fdct == ff_faandct
144 #endif
145                ) {
146                 max = (8191LL * ff_aanscales[i]) >> 14;
147             }
148             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
149                 shift++;
150             }
151         }
152     }
153     if (shift) {
154         av_log(NULL, AV_LOG_INFO,
155                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
156                QMAT_SHIFT - shift);
157     }
158 }
159
160 static inline void update_qscale(MpegEncContext *s)
161 {
162     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
163                 (FF_LAMBDA_SHIFT + 7);
164     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
165
166     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
167                  FF_LAMBDA_SHIFT;
168 }
169
170 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
171 {
172     int i;
173
174     if (matrix) {
175         put_bits(pb, 1, 1);
176         for (i = 0; i < 64; i++) {
177             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
178         }
179     } else
180         put_bits(pb, 1, 0);
181 }
182
183 /**
184  * init s->current_picture.qscale_table from s->lambda_table
185  */
186 void ff_init_qscale_tab(MpegEncContext *s)
187 {
188     int8_t * const qscale_table = s->current_picture.f.qscale_table;
189     int i;
190
191     for (i = 0; i < s->mb_num; i++) {
192         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
193         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
194         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
195                                                   s->avctx->qmax);
196     }
197 }
198
199 static void copy_picture_attributes(MpegEncContext *s,
200                                     AVFrame *dst,
201                                     AVFrame *src)
202 {
203     int i;
204
205     dst->pict_type              = src->pict_type;
206     dst->quality                = src->quality;
207     dst->coded_picture_number   = src->coded_picture_number;
208     dst->display_picture_number = src->display_picture_number;
209     //dst->reference              = src->reference;
210     dst->pts                    = src->pts;
211     dst->interlaced_frame       = src->interlaced_frame;
212     dst->top_field_first        = src->top_field_first;
213
214     if (s->avctx->me_threshold) {
215         if (!src->motion_val[0])
216             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
217         if (!src->mb_type)
218             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
219         if (!src->ref_index[0])
220             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
221         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
222             av_log(s->avctx, AV_LOG_ERROR,
223                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
224                    src->motion_subsample_log2, dst->motion_subsample_log2);
225
226         memcpy(dst->mb_type, src->mb_type,
227                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
228
229         for (i = 0; i < 2; i++) {
230             int stride = ((16 * s->mb_width ) >>
231                           src->motion_subsample_log2) + 1;
232             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
233
234             if (src->motion_val[i] &&
235                 src->motion_val[i] != dst->motion_val[i]) {
236                 memcpy(dst->motion_val[i], src->motion_val[i],
237                        2 * stride * height * sizeof(int16_t));
238             }
239             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
240                 memcpy(dst->ref_index[i], src->ref_index[i],
241                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
242             }
243         }
244     }
245 }
246
247 static void update_duplicate_context_after_me(MpegEncContext *dst,
248                                               MpegEncContext *src)
249 {
250 #define COPY(a) dst->a= src->a
251     COPY(pict_type);
252     COPY(current_picture);
253     COPY(f_code);
254     COPY(b_code);
255     COPY(qscale);
256     COPY(lambda);
257     COPY(lambda2);
258     COPY(picture_in_gop_number);
259     COPY(gop_picture_number);
260     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
261     COPY(progressive_frame);    // FIXME don't set in encode_header
262     COPY(partitioned_frame);    // FIXME don't set in encode_header
263 #undef COPY
264 }
265
266 /**
267  * Set the given MpegEncContext to defaults for encoding.
268  * the changed fields will not depend upon the prior state of the MpegEncContext.
269  */
270 static void MPV_encode_defaults(MpegEncContext *s)
271 {
272     int i;
273     ff_MPV_common_defaults(s);
274
275     for (i = -16; i < 16; i++) {
276         default_fcode_tab[i + MAX_MV] = 1;
277     }
278     s->me.mv_penalty = default_mv_penalty;
279     s->fcode_tab     = default_fcode_tab;
280 }
281
282 /* init video encoder */
283 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
284 {
285     MpegEncContext *s = avctx->priv_data;
286     int i;
287     int chroma_h_shift, chroma_v_shift;
288
289     MPV_encode_defaults(s);
290
291     switch (avctx->codec_id) {
292     case CODEC_ID_MPEG2VIDEO:
293         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
294             avctx->pix_fmt != PIX_FMT_YUV422P) {
295             av_log(avctx, AV_LOG_ERROR,
296                    "only YUV420 and YUV422 are supported\n");
297             return -1;
298         }
299         break;
300     case CODEC_ID_LJPEG:
301         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
302             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
303             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
304             avctx->pix_fmt != PIX_FMT_BGR0     &&
305             avctx->pix_fmt != PIX_FMT_BGRA     &&
306             avctx->pix_fmt != PIX_FMT_BGR24    &&
307             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
308               avctx->pix_fmt != PIX_FMT_YUV422P &&
309               avctx->pix_fmt != PIX_FMT_YUV444P) ||
310              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
311             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
312             return -1;
313         }
314         break;
315     case CODEC_ID_MJPEG:
316     case CODEC_ID_AMV:
317         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
318             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
319             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
320               avctx->pix_fmt != PIX_FMT_YUV422P) ||
321              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
322             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
323             return -1;
324         }
325         break;
326     default:
327         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
328             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
329             return -1;
330         }
331     }
332
333     switch (avctx->pix_fmt) {
334     case PIX_FMT_YUVJ422P:
335     case PIX_FMT_YUV422P:
336         s->chroma_format = CHROMA_422;
337         break;
338     case PIX_FMT_YUVJ420P:
339     case PIX_FMT_YUV420P:
340     default:
341         s->chroma_format = CHROMA_420;
342         break;
343     }
344
345     s->bit_rate = avctx->bit_rate;
346     s->width    = avctx->width;
347     s->height   = avctx->height;
348     if (avctx->gop_size > 600 &&
349         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
350         av_log(avctx, AV_LOG_WARNING,
351                "keyframe interval too large!, reducing it from %d to %d\n",
352                avctx->gop_size, 600);
353         avctx->gop_size = 600;
354     }
355     s->gop_size     = avctx->gop_size;
356     s->avctx        = avctx;
357     s->flags        = avctx->flags;
358     s->flags2       = avctx->flags2;
359     s->max_b_frames = avctx->max_b_frames;
360     s->codec_id     = avctx->codec->id;
361 #if FF_API_MPV_GLOBAL_OPTS
362     if (avctx->luma_elim_threshold)
363         s->luma_elim_threshold   = avctx->luma_elim_threshold;
364     if (avctx->chroma_elim_threshold)
365         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
366 #endif
367     s->strict_std_compliance = avctx->strict_std_compliance;
368     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
369     s->mpeg_quant         = avctx->mpeg_quant;
370     s->rtp_mode           = !!avctx->rtp_payload_size;
371     s->intra_dc_precision = avctx->intra_dc_precision;
372     s->user_specified_pts = AV_NOPTS_VALUE;
373
374     if (s->gop_size <= 1) {
375         s->intra_only = 1;
376         s->gop_size   = 12;
377     } else {
378         s->intra_only = 0;
379     }
380
381     s->me_method = avctx->me_method;
382
383     /* Fixed QSCALE */
384     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
385
386 #if FF_API_MPV_GLOBAL_OPTS
387     if (s->flags & CODEC_FLAG_QP_RD)
388         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
389 #endif
390
391     s->adaptive_quant = (s->avctx->lumi_masking ||
392                          s->avctx->dark_masking ||
393                          s->avctx->temporal_cplx_masking ||
394                          s->avctx->spatial_cplx_masking  ||
395                          s->avctx->p_masking      ||
396                          s->avctx->border_masking ||
397                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
398                         !s->fixed_qscale;
399
400     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
401
402     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
403         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
404         return -1;
405     }
406
407     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
408         av_log(avctx, AV_LOG_INFO,
409                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
410     }
411
412     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
413         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
414         return -1;
415     }
416
417     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
418         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
419         return -1;
420     }
421
422     if (avctx->rc_max_rate &&
423         avctx->rc_max_rate == avctx->bit_rate &&
424         avctx->rc_max_rate != avctx->rc_min_rate) {
425         av_log(avctx, AV_LOG_INFO,
426                "impossible bitrate constraints, this will fail\n");
427     }
428
429     if (avctx->rc_buffer_size &&
430         avctx->bit_rate * (int64_t)avctx->time_base.num >
431             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
432         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
433         return -1;
434     }
435
436     if (!s->fixed_qscale &&
437         avctx->bit_rate * av_q2d(avctx->time_base) >
438             avctx->bit_rate_tolerance) {
439         av_log(avctx, AV_LOG_ERROR,
440                "bitrate tolerance too small for bitrate\n");
441         return -1;
442     }
443
444     if (s->avctx->rc_max_rate &&
445         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
446         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
447          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
448         90000LL * (avctx->rc_buffer_size - 1) >
449             s->avctx->rc_max_rate * 0xFFFFLL) {
450         av_log(avctx, AV_LOG_INFO,
451                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
452                "specified vbv buffer is too large for the given bitrate!\n");
453     }
454
455     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
456         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
457         s->codec_id != CODEC_ID_FLV1) {
458         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
459         return -1;
460     }
461
462     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
463         av_log(avctx, AV_LOG_ERROR,
464                "OBMC is only supported with simple mb decision\n");
465         return -1;
466     }
467
468     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
469         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
470         return -1;
471     }
472
473     if (s->max_b_frames                    &&
474         s->codec_id != CODEC_ID_MPEG4      &&
475         s->codec_id != CODEC_ID_MPEG1VIDEO &&
476         s->codec_id != CODEC_ID_MPEG2VIDEO) {
477         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
478         return -1;
479     }
480
481     if ((s->codec_id == CODEC_ID_MPEG4 ||
482          s->codec_id == CODEC_ID_H263  ||
483          s->codec_id == CODEC_ID_H263P) &&
484         (avctx->sample_aspect_ratio.num > 255 ||
485          avctx->sample_aspect_ratio.den > 255)) {
486         av_log(avctx, AV_LOG_WARNING,
487                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
488                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
489         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
490                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
491     }
492
493     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
494         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
495         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
496         return -1;
497     }
498
499     // FIXME mpeg2 uses that too
500     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
501         av_log(avctx, AV_LOG_ERROR,
502                "mpeg2 style quantization not supported by codec\n");
503         return -1;
504     }
505
506 #if FF_API_MPV_GLOBAL_OPTS
507     if (s->flags & CODEC_FLAG_CBP_RD)
508         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
509 #endif
510
511     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
512         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
513         return -1;
514     }
515
516     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
517         s->avctx->mb_decision != FF_MB_DECISION_RD) {
518         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
519         return -1;
520     }
521
522     if (s->avctx->scenechange_threshold < 1000000000 &&
523         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
524         av_log(avctx, AV_LOG_ERROR,
525                "closed gop with scene change detection are not supported yet, "
526                "set threshold to 1000000000\n");
527         return -1;
528     }
529
530     if (s->flags & CODEC_FLAG_LOW_DELAY) {
531         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
532             av_log(avctx, AV_LOG_ERROR,
533                   "low delay forcing is only available for mpeg2\n");
534             return -1;
535         }
536         if (s->max_b_frames != 0) {
537             av_log(avctx, AV_LOG_ERROR,
538                    "b frames cannot be used with low delay\n");
539             return -1;
540         }
541     }
542
543     if (s->q_scale_type == 1) {
544         if (avctx->qmax > 12) {
545             av_log(avctx, AV_LOG_ERROR,
546                    "non linear quant only supports qmax <= 12 currently\n");
547             return -1;
548         }
549     }
550
551     if (s->avctx->thread_count > 1         &&
552         s->codec_id != CODEC_ID_MPEG4      &&
553         s->codec_id != CODEC_ID_MPEG1VIDEO &&
554         s->codec_id != CODEC_ID_MPEG2VIDEO &&
555         (s->codec_id != CODEC_ID_H263P)) {
556         av_log(avctx, AV_LOG_ERROR,
557                "multi threaded encoding not supported by codec\n");
558         return -1;
559     }
560
561     if (s->avctx->thread_count < 1) {
562         av_log(avctx, AV_LOG_ERROR,
563                "automatic thread number detection not supported by codec, "
564                "patch welcome\n");
565         return -1;
566     }
567
568     if (s->avctx->thread_count > 1)
569         s->rtp_mode = 1;
570
571     if (!avctx->time_base.den || !avctx->time_base.num) {
572         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
573         return -1;
574     }
575
576     i = (INT_MAX / 2 + 128) >> 8;
577     if (avctx->me_threshold >= i) {
578         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
579                i - 1);
580         return -1;
581     }
582     if (avctx->mb_threshold >= i) {
583         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
584                i - 1);
585         return -1;
586     }
587
588     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
589         av_log(avctx, AV_LOG_INFO,
590                "notice: b_frame_strategy only affects the first pass\n");
591         avctx->b_frame_strategy = 0;
592     }
593
594     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
595     if (i > 1) {
596         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
597         avctx->time_base.den /= i;
598         avctx->time_base.num /= i;
599         //return -1;
600     }
601
602     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG || s->codec_id==CODEC_ID_AMV) {
603         // (a + x * 3 / 8) / x
604         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
605         s->inter_quant_bias = 0;
606     } else {
607         s->intra_quant_bias = 0;
608         // (a - x / 4) / x
609         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
610     }
611
612     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
613         s->intra_quant_bias = avctx->intra_quant_bias;
614     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
615         s->inter_quant_bias = avctx->inter_quant_bias;
616
617     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
618
619     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
620                                   &chroma_v_shift);
621
622     if (avctx->codec_id == CODEC_ID_MPEG4 &&
623         s->avctx->time_base.den > (1 << 16) - 1) {
624         av_log(avctx, AV_LOG_ERROR,
625                "timebase %d/%d not supported by MPEG 4 standard, "
626                "the maximum admitted value for the timebase denominator "
627                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
628                (1 << 16) - 1);
629         return -1;
630     }
631     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
632
633 #if FF_API_MPV_GLOBAL_OPTS
634     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
635         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
636     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
637         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
638     if (avctx->quantizer_noise_shaping)
639         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
640 #endif
641
642     switch (avctx->codec->id) {
643     case CODEC_ID_MPEG1VIDEO:
644         s->out_format = FMT_MPEG1;
645         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
646         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
647         break;
648     case CODEC_ID_MPEG2VIDEO:
649         s->out_format = FMT_MPEG1;
650         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
651         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
652         s->rtp_mode   = 1;
653         break;
654     case CODEC_ID_LJPEG:
655     case CODEC_ID_MJPEG:
656     case CODEC_ID_AMV:
657         s->out_format = FMT_MJPEG;
658         s->intra_only = 1; /* force intra only for jpeg */
659         if (avctx->codec->id == CODEC_ID_LJPEG &&
660             (avctx->pix_fmt == PIX_FMT_BGR0
661              || s->avctx->pix_fmt == PIX_FMT_BGRA
662              || s->avctx->pix_fmt == PIX_FMT_BGR24)) {
663             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
664             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
665             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
666         } else {
667             s->mjpeg_vsample[0] = 2;
668             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
669             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
670             s->mjpeg_hsample[0] = 2;
671             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
672             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
673         }
674         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
675             ff_mjpeg_encode_init(s) < 0)
676             return -1;
677         avctx->delay = 0;
678         s->low_delay = 1;
679         break;
680     case CODEC_ID_H261:
681         if (!CONFIG_H261_ENCODER)
682             return -1;
683         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
684             av_log(avctx, AV_LOG_ERROR,
685                    "The specified picture size of %dx%d is not valid for the "
686                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
687                     s->width, s->height);
688             return -1;
689         }
690         s->out_format = FMT_H261;
691         avctx->delay  = 0;
692         s->low_delay  = 1;
693         break;
694     case CODEC_ID_H263:
695         if (!CONFIG_H263_ENCODER)
696             return -1;
697         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
698                              s->width, s->height) == 8) {
699             av_log(avctx, AV_LOG_ERROR,
700                    "The specified picture size of %dx%d is not valid for "
701                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
702                    "352x288, 704x576, and 1408x1152. "
703                    "Try H.263+.\n", s->width, s->height);
704             return -1;
705         }
706         s->out_format = FMT_H263;
707         avctx->delay  = 0;
708         s->low_delay  = 1;
709         break;
710     case CODEC_ID_H263P:
711         s->out_format = FMT_H263;
712         s->h263_plus  = 1;
713         /* Fx */
714         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
715         s->modified_quant  = s->h263_aic;
716         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
717         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
718
719         /* /Fx */
720         /* These are just to be sure */
721         avctx->delay = 0;
722         s->low_delay = 1;
723         break;
724     case CODEC_ID_FLV1:
725         s->out_format      = FMT_H263;
726         s->h263_flv        = 2; /* format = 1; 11-bit codes */
727         s->unrestricted_mv = 1;
728         s->rtp_mode  = 0; /* don't allow GOB */
729         avctx->delay = 0;
730         s->low_delay = 1;
731         break;
732     case CODEC_ID_RV10:
733         s->out_format = FMT_H263;
734         avctx->delay  = 0;
735         s->low_delay  = 1;
736         break;
737     case CODEC_ID_RV20:
738         s->out_format      = FMT_H263;
739         avctx->delay       = 0;
740         s->low_delay       = 1;
741         s->modified_quant  = 1;
742         s->h263_aic        = 1;
743         s->h263_plus       = 1;
744         s->loop_filter     = 1;
745         s->unrestricted_mv = 0;
746         break;
747     case CODEC_ID_MPEG4:
748         s->out_format      = FMT_H263;
749         s->h263_pred       = 1;
750         s->unrestricted_mv = 1;
751         s->low_delay       = s->max_b_frames ? 0 : 1;
752         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
753         break;
754     case CODEC_ID_MSMPEG4V2:
755         s->out_format      = FMT_H263;
756         s->h263_pred       = 1;
757         s->unrestricted_mv = 1;
758         s->msmpeg4_version = 2;
759         avctx->delay       = 0;
760         s->low_delay       = 1;
761         break;
762     case CODEC_ID_MSMPEG4V3:
763         s->out_format        = FMT_H263;
764         s->h263_pred         = 1;
765         s->unrestricted_mv   = 1;
766         s->msmpeg4_version   = 3;
767         s->flipflop_rounding = 1;
768         avctx->delay         = 0;
769         s->low_delay         = 1;
770         break;
771     case CODEC_ID_WMV1:
772         s->out_format        = FMT_H263;
773         s->h263_pred         = 1;
774         s->unrestricted_mv   = 1;
775         s->msmpeg4_version   = 4;
776         s->flipflop_rounding = 1;
777         avctx->delay         = 0;
778         s->low_delay         = 1;
779         break;
780     case CODEC_ID_WMV2:
781         s->out_format        = FMT_H263;
782         s->h263_pred         = 1;
783         s->unrestricted_mv   = 1;
784         s->msmpeg4_version   = 5;
785         s->flipflop_rounding = 1;
786         avctx->delay         = 0;
787         s->low_delay         = 1;
788         break;
789     default:
790         return -1;
791     }
792
793     avctx->has_b_frames = !s->low_delay;
794
795     s->encoding = 1;
796
797     s->progressive_frame    =
798     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
799                                                 CODEC_FLAG_INTERLACED_ME) ||
800                                 s->alternate_scan);
801
802     /* init */
803     if (ff_MPV_common_init(s) < 0)
804         return -1;
805
806     if (!s->dct_quantize)
807         s->dct_quantize = ff_dct_quantize_c;
808     if (!s->denoise_dct)
809         s->denoise_dct  = denoise_dct_c;
810     s->fast_dct_quantize = s->dct_quantize;
811     if (avctx->trellis)
812         s->dct_quantize  = dct_quantize_trellis_c;
813
814     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
815         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
816
817     s->quant_precision = 5;
818
819     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
820     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
821
822     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
823         ff_h261_encode_init(s);
824     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
825         ff_h263_encode_init(s);
826     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
827         ff_msmpeg4_encode_init(s);
828     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
829         && s->out_format == FMT_MPEG1)
830         ff_mpeg1_encode_init(s);
831
832     /* init q matrix */
833     for (i = 0; i < 64; i++) {
834         int j = s->dsp.idct_permutation[i];
835         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
836             s->mpeg_quant) {
837             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
838             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
839         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
840             s->intra_matrix[j] =
841             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
842         } else {
843             /* mpeg1/2 */
844             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
845             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
846         }
847         if (s->avctx->intra_matrix)
848             s->intra_matrix[j] = s->avctx->intra_matrix[i];
849         if (s->avctx->inter_matrix)
850             s->inter_matrix[j] = s->avctx->inter_matrix[i];
851     }
852
853     /* precompute matrix */
854     /* for mjpeg, we do include qscale in the matrix */
855     if (s->out_format != FMT_MJPEG) {
856         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
857                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
858                           31, 1);
859         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
860                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
861                           31, 0);
862     }
863
864     if (ff_rate_control_init(s) < 0)
865         return -1;
866
867     return 0;
868 }
869
870 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
871 {
872     MpegEncContext *s = avctx->priv_data;
873
874     ff_rate_control_uninit(s);
875
876     ff_MPV_common_end(s);
877     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
878         s->out_format == FMT_MJPEG)
879         ff_mjpeg_encode_close(s);
880
881     av_freep(&avctx->extradata);
882
883     return 0;
884 }
885
886 static int get_sae(uint8_t *src, int ref, int stride)
887 {
888     int x,y;
889     int acc = 0;
890
891     for (y = 0; y < 16; y++) {
892         for (x = 0; x < 16; x++) {
893             acc += FFABS(src[x + y * stride] - ref);
894         }
895     }
896
897     return acc;
898 }
899
900 static int get_intra_count(MpegEncContext *s, uint8_t *src,
901                            uint8_t *ref, int stride)
902 {
903     int x, y, w, h;
904     int acc = 0;
905
906     w = s->width  & ~15;
907     h = s->height & ~15;
908
909     for (y = 0; y < h; y += 16) {
910         for (x = 0; x < w; x += 16) {
911             int offset = x + y * stride;
912             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
913                                      16);
914             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
915             int sae  = get_sae(src + offset, mean, stride);
916
917             acc += sae + 500 < sad;
918         }
919     }
920     return acc;
921 }
922
923
924 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
925 {
926     AVFrame *pic = NULL;
927     int64_t pts;
928     int i;
929     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
930                                                  (s->low_delay ? 0 : 1);
931     int direct = 1;
932
933     if (pic_arg) {
934         pts = pic_arg->pts;
935         pic_arg->display_picture_number = s->input_picture_number++;
936
937         if (pts != AV_NOPTS_VALUE) {
938             if (s->user_specified_pts != AV_NOPTS_VALUE) {
939                 int64_t time = pts;
940                 int64_t last = s->user_specified_pts;
941
942                 if (time <= last) {
943                     av_log(s->avctx, AV_LOG_ERROR,
944                            "Error, Invalid timestamp=%"PRId64", "
945                            "last=%"PRId64"\n", pts, s->user_specified_pts);
946                     return -1;
947                 }
948
949                 if (!s->low_delay && pic_arg->display_picture_number == 1)
950                     s->dts_delta = time - last;
951             }
952             s->user_specified_pts = pts;
953         } else {
954             if (s->user_specified_pts != AV_NOPTS_VALUE) {
955                 s->user_specified_pts =
956                 pts = s->user_specified_pts + 1;
957                 av_log(s->avctx, AV_LOG_INFO,
958                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
959                        pts);
960             } else {
961                 pts = pic_arg->display_picture_number;
962             }
963         }
964     }
965
966   if (pic_arg) {
967     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
968         direct = 0;
969     if (pic_arg->linesize[0] != s->linesize)
970         direct = 0;
971     if (pic_arg->linesize[1] != s->uvlinesize)
972         direct = 0;
973     if (pic_arg->linesize[2] != s->uvlinesize)
974         direct = 0;
975
976     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
977     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
978
979     if (direct) {
980         i = ff_find_unused_picture(s, 1);
981         if (i < 0)
982             return i;
983
984         pic = &s->picture[i].f;
985         pic->reference = 3;
986
987         for (i = 0; i < 4; i++) {
988             pic->data[i]     = pic_arg->data[i];
989             pic->linesize[i] = pic_arg->linesize[i];
990         }
991         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
992             return -1;
993         }
994     } else {
995         i = ff_find_unused_picture(s, 0);
996         if (i < 0)
997             return i;
998
999         pic = &s->picture[i].f;
1000         pic->reference = 3;
1001
1002         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1003             return -1;
1004         }
1005
1006         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1007             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1008             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1009             // empty
1010         } else {
1011             int h_chroma_shift, v_chroma_shift;
1012             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1013                                           &v_chroma_shift);
1014
1015             for (i = 0; i < 3; i++) {
1016                 int src_stride = pic_arg->linesize[i];
1017                 int dst_stride = i ? s->uvlinesize : s->linesize;
1018                 int h_shift = i ? h_chroma_shift : 0;
1019                 int v_shift = i ? v_chroma_shift : 0;
1020                 int w = s->width  >> h_shift;
1021                 int h = s->height >> v_shift;
1022                 uint8_t *src = pic_arg->data[i];
1023                 uint8_t *dst = pic->data[i];
1024
1025                 if(s->codec_id == CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1026                     h= ((s->height+15)/16*16)>>v_shift;
1027                 }
1028
1029                 if (!s->avctx->rc_buffer_size)
1030                     dst += INPLACE_OFFSET;
1031
1032                 if (src_stride == dst_stride)
1033                     memcpy(dst, src, src_stride * h);
1034                 else {
1035                     while (h--) {
1036                         memcpy(dst, src, w);
1037                         dst += dst_stride;
1038                         src += src_stride;
1039                     }
1040                 }
1041             }
1042         }
1043     }
1044     copy_picture_attributes(s, pic, pic_arg);
1045     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1046   }
1047
1048     /* shift buffer entries */
1049     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1050         s->input_picture[i - 1] = s->input_picture[i];
1051
1052     s->input_picture[encoding_delay] = (Picture*) pic;
1053
1054     return 0;
1055 }
1056
1057 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1058 {
1059     int x, y, plane;
1060     int score = 0;
1061     int64_t score64 = 0;
1062
1063     for (plane = 0; plane < 3; plane++) {
1064         const int stride = p->f.linesize[plane];
1065         const int bw = plane ? 1 : 2;
1066         for (y = 0; y < s->mb_height * bw; y++) {
1067             for (x = 0; x < s->mb_width * bw; x++) {
1068                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1069                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1070                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1071                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1072
1073                 switch (s->avctx->frame_skip_exp) {
1074                 case 0: score    =  FFMAX(score, v);          break;
1075                 case 1: score   += FFABS(v);                  break;
1076                 case 2: score   += v * v;                     break;
1077                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1078                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1079                 }
1080             }
1081         }
1082     }
1083
1084     if (score)
1085         score64 = score;
1086
1087     if (score64 < s->avctx->frame_skip_threshold)
1088         return 1;
1089     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1090         return 1;
1091     return 0;
1092 }
1093
1094 static int estimate_best_b_count(MpegEncContext *s)
1095 {
1096     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1097     AVCodecContext *c = avcodec_alloc_context3(NULL);
1098     AVFrame input[FF_MAX_B_FRAMES + 2];
1099     const int scale = s->avctx->brd_scale;
1100     int i, j, out_size, p_lambda, b_lambda, lambda2;
1101     int outbuf_size  = s->width * s->height; // FIXME
1102     uint8_t *outbuf  = av_malloc(outbuf_size);
1103     int64_t best_rd  = INT64_MAX;
1104     int best_b_count = -1;
1105
1106     assert(scale >= 0 && scale <= 3);
1107
1108     //emms_c();
1109     //s->next_picture_ptr->quality;
1110     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1111     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1112     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1113     if (!b_lambda) // FIXME we should do this somewhere else
1114         b_lambda = p_lambda;
1115     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1116                FF_LAMBDA_SHIFT;
1117
1118     c->width        = s->width  >> scale;
1119     c->height       = s->height >> scale;
1120     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1121                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1122     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1123     c->mb_decision  = s->avctx->mb_decision;
1124     c->me_cmp       = s->avctx->me_cmp;
1125     c->mb_cmp       = s->avctx->mb_cmp;
1126     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1127     c->pix_fmt      = PIX_FMT_YUV420P;
1128     c->time_base    = s->avctx->time_base;
1129     c->max_b_frames = s->max_b_frames;
1130
1131     if (avcodec_open2(c, codec, NULL) < 0)
1132         return -1;
1133
1134     for (i = 0; i < s->max_b_frames + 2; i++) {
1135         int ysize = c->width * c->height;
1136         int csize = (c->width / 2) * (c->height / 2);
1137         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1138                                                 s->next_picture_ptr;
1139
1140         avcodec_get_frame_defaults(&input[i]);
1141         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1142         input[i].data[1]     = input[i].data[0] + ysize;
1143         input[i].data[2]     = input[i].data[1] + csize;
1144         input[i].linesize[0] = c->width;
1145         input[i].linesize[1] =
1146         input[i].linesize[2] = c->width / 2;
1147
1148         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1149             pre_input = *pre_input_ptr;
1150
1151             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1152                 pre_input.f.data[0] += INPLACE_OFFSET;
1153                 pre_input.f.data[1] += INPLACE_OFFSET;
1154                 pre_input.f.data[2] += INPLACE_OFFSET;
1155             }
1156
1157             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1158                                  pre_input.f.data[0], pre_input.f.linesize[0],
1159                                  c->width,      c->height);
1160             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1161                                  pre_input.f.data[1], pre_input.f.linesize[1],
1162                                  c->width >> 1, c->height >> 1);
1163             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1164                                  pre_input.f.data[2], pre_input.f.linesize[2],
1165                                  c->width >> 1, c->height >> 1);
1166         }
1167     }
1168
1169     for (j = 0; j < s->max_b_frames + 1; j++) {
1170         int64_t rd = 0;
1171
1172         if (!s->input_picture[j])
1173             break;
1174
1175         c->error[0] = c->error[1] = c->error[2] = 0;
1176
1177         input[0].pict_type = AV_PICTURE_TYPE_I;
1178         input[0].quality   = 1 * FF_QP2LAMBDA;
1179         out_size           = avcodec_encode_video(c, outbuf,
1180                                                   outbuf_size, &input[0]);
1181         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1182
1183         for (i = 0; i < s->max_b_frames + 1; i++) {
1184             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1185
1186             input[i + 1].pict_type = is_p ?
1187                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1188             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1189             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1190                                             &input[i + 1]);
1191             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1192         }
1193
1194         /* get the delayed frames */
1195         while (out_size) {
1196             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1197             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1198         }
1199
1200         rd += c->error[0] + c->error[1] + c->error[2];
1201
1202         if (rd < best_rd) {
1203             best_rd = rd;
1204             best_b_count = j;
1205         }
1206     }
1207
1208     av_freep(&outbuf);
1209     avcodec_close(c);
1210     av_freep(&c);
1211
1212     for (i = 0; i < s->max_b_frames + 2; i++) {
1213         av_freep(&input[i].data[0]);
1214     }
1215
1216     return best_b_count;
1217 }
1218
1219 static int select_input_picture(MpegEncContext *s)
1220 {
1221     int i;
1222
1223     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1224         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1225     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1226
1227     /* set next picture type & ordering */
1228     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1229         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1230             s->next_picture_ptr == NULL || s->intra_only) {
1231             s->reordered_input_picture[0] = s->input_picture[0];
1232             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1233             s->reordered_input_picture[0]->f.coded_picture_number =
1234                 s->coded_picture_number++;
1235         } else {
1236             int b_frames;
1237
1238             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1239                 if (s->picture_in_gop_number < s->gop_size &&
1240                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1241                     // FIXME check that te gop check above is +-1 correct
1242                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1243                     //       s->input_picture[0]->f.data[0],
1244                     //       s->input_picture[0]->pts);
1245
1246                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1247                         for (i = 0; i < 4; i++)
1248                             s->input_picture[0]->f.data[i] = NULL;
1249                         s->input_picture[0]->f.type = 0;
1250                     } else {
1251                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1252                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1253
1254                         s->avctx->release_buffer(s->avctx,
1255                                                  &s->input_picture[0]->f);
1256                     }
1257
1258                     emms_c();
1259                     ff_vbv_update(s, 0);
1260
1261                     goto no_output_pic;
1262                 }
1263             }
1264
1265             if (s->flags & CODEC_FLAG_PASS2) {
1266                 for (i = 0; i < s->max_b_frames + 1; i++) {
1267                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1268
1269                     if (pict_num >= s->rc_context.num_entries)
1270                         break;
1271                     if (!s->input_picture[i]) {
1272                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1273                         break;
1274                     }
1275
1276                     s->input_picture[i]->f.pict_type =
1277                         s->rc_context.entry[pict_num].new_pict_type;
1278                 }
1279             }
1280
1281             if (s->avctx->b_frame_strategy == 0) {
1282                 b_frames = s->max_b_frames;
1283                 while (b_frames && !s->input_picture[b_frames])
1284                     b_frames--;
1285             } else if (s->avctx->b_frame_strategy == 1) {
1286                 for (i = 1; i < s->max_b_frames + 1; i++) {
1287                     if (s->input_picture[i] &&
1288                         s->input_picture[i]->b_frame_score == 0) {
1289                         s->input_picture[i]->b_frame_score =
1290                             get_intra_count(s,
1291                                             s->input_picture[i    ]->f.data[0],
1292                                             s->input_picture[i - 1]->f.data[0],
1293                                             s->linesize) + 1;
1294                     }
1295                 }
1296                 for (i = 0; i < s->max_b_frames + 1; i++) {
1297                     if (s->input_picture[i] == NULL ||
1298                         s->input_picture[i]->b_frame_score - 1 >
1299                             s->mb_num / s->avctx->b_sensitivity)
1300                         break;
1301                 }
1302
1303                 b_frames = FFMAX(0, i - 1);
1304
1305                 /* reset scores */
1306                 for (i = 0; i < b_frames + 1; i++) {
1307                     s->input_picture[i]->b_frame_score = 0;
1308                 }
1309             } else if (s->avctx->b_frame_strategy == 2) {
1310                 b_frames = estimate_best_b_count(s);
1311             } else {
1312                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1313                 b_frames = 0;
1314             }
1315
1316             emms_c();
1317             //static int b_count = 0;
1318             //b_count += b_frames;
1319             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1320
1321             for (i = b_frames - 1; i >= 0; i--) {
1322                 int type = s->input_picture[i]->f.pict_type;
1323                 if (type && type != AV_PICTURE_TYPE_B)
1324                     b_frames = i;
1325             }
1326             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1327                 b_frames == s->max_b_frames) {
1328                 av_log(s->avctx, AV_LOG_ERROR,
1329                        "warning, too many b frames in a row\n");
1330             }
1331
1332             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1333                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1334                     s->gop_size > s->picture_in_gop_number) {
1335                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1336                 } else {
1337                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1338                         b_frames = 0;
1339                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1340                 }
1341             }
1342
1343             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1344                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1345                 b_frames--;
1346
1347             s->reordered_input_picture[0] = s->input_picture[b_frames];
1348             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1349                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1350             s->reordered_input_picture[0]->f.coded_picture_number =
1351                 s->coded_picture_number++;
1352             for (i = 0; i < b_frames; i++) {
1353                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1354                 s->reordered_input_picture[i + 1]->f.pict_type =
1355                     AV_PICTURE_TYPE_B;
1356                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1357                     s->coded_picture_number++;
1358             }
1359         }
1360     }
1361 no_output_pic:
1362     if (s->reordered_input_picture[0]) {
1363         s->reordered_input_picture[0]->f.reference =
1364            s->reordered_input_picture[0]->f.pict_type !=
1365                AV_PICTURE_TYPE_B ? 3 : 0;
1366
1367         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1368
1369         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1370             s->avctx->rc_buffer_size) {
1371             // input is a shared pix, so we can't modifiy it -> alloc a new
1372             // one & ensure that the shared one is reuseable
1373
1374             Picture *pic;
1375             int i = ff_find_unused_picture(s, 0);
1376             if (i < 0)
1377                 return i;
1378             pic = &s->picture[i];
1379
1380             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1381             if (ff_alloc_picture(s, pic, 0) < 0) {
1382                 return -1;
1383             }
1384
1385             /* mark us unused / free shared pic */
1386             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1387                 s->avctx->release_buffer(s->avctx,
1388                                          &s->reordered_input_picture[0]->f);
1389             for (i = 0; i < 4; i++)
1390                 s->reordered_input_picture[0]->f.data[i] = NULL;
1391             s->reordered_input_picture[0]->f.type = 0;
1392
1393             copy_picture_attributes(s, &pic->f,
1394                                     &s->reordered_input_picture[0]->f);
1395
1396             s->current_picture_ptr = pic;
1397         } else {
1398             // input is not a shared pix -> reuse buffer for current_pix
1399
1400             assert(s->reordered_input_picture[0]->f.type ==
1401                        FF_BUFFER_TYPE_USER ||
1402                    s->reordered_input_picture[0]->f.type ==
1403                        FF_BUFFER_TYPE_INTERNAL);
1404
1405             s->current_picture_ptr = s->reordered_input_picture[0];
1406             for (i = 0; i < 4; i++) {
1407                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1408             }
1409         }
1410         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1411
1412         s->picture_number = s->new_picture.f.display_picture_number;
1413         //printf("dpn:%d\n", s->picture_number);
1414     } else {
1415         memset(&s->new_picture, 0, sizeof(Picture));
1416     }
1417     return 0;
1418 }
1419
1420 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1421                           AVFrame *pic_arg, int *got_packet)
1422 {
1423     MpegEncContext *s = avctx->priv_data;
1424     int i, stuffing_count, ret;
1425     int context_count = s->slice_context_count;
1426
1427     s->picture_in_gop_number++;
1428
1429     if (load_input_picture(s, pic_arg) < 0)
1430         return -1;
1431
1432     if (select_input_picture(s) < 0) {
1433         return -1;
1434     }
1435
1436     /* output? */
1437     if (s->new_picture.f.data[0]) {
1438         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1439             return ret;
1440         if (s->mb_info) {
1441             s->mb_info_ptr = av_packet_new_side_data(pkt,
1442                                  AV_PKT_DATA_H263_MB_INFO,
1443                                  s->mb_width*s->mb_height*12);
1444             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1445         }
1446
1447         for (i = 0; i < context_count; i++) {
1448             int start_y = s->thread_context[i]->start_mb_y;
1449             int   end_y = s->thread_context[i]->  end_mb_y;
1450             int h       = s->mb_height;
1451             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1452             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1453
1454             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1455         }
1456
1457         s->pict_type = s->new_picture.f.pict_type;
1458         //emms_c();
1459         //printf("qs:%f %f %d\n", s->new_picture.quality,
1460         //       s->current_picture.quality, s->qscale);
1461         ff_MPV_frame_start(s, avctx);
1462 vbv_retry:
1463         if (encode_picture(s, s->picture_number) < 0)
1464             return -1;
1465
1466         avctx->header_bits = s->header_bits;
1467         avctx->mv_bits     = s->mv_bits;
1468         avctx->misc_bits   = s->misc_bits;
1469         avctx->i_tex_bits  = s->i_tex_bits;
1470         avctx->p_tex_bits  = s->p_tex_bits;
1471         avctx->i_count     = s->i_count;
1472         // FIXME f/b_count in avctx
1473         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1474         avctx->skip_count  = s->skip_count;
1475
1476         ff_MPV_frame_end(s);
1477
1478         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1479             ff_mjpeg_encode_picture_trailer(s);
1480
1481         if (avctx->rc_buffer_size) {
1482             RateControlContext *rcc = &s->rc_context;
1483             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1484
1485             if (put_bits_count(&s->pb) > max_size &&
1486                 s->lambda < s->avctx->lmax) {
1487                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1488                                        (s->qscale + 1) / s->qscale);
1489                 if (s->adaptive_quant) {
1490                     int i;
1491                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1492                         s->lambda_table[i] =
1493                             FFMAX(s->lambda_table[i] + 1,
1494                                   s->lambda_table[i] * (s->qscale + 1) /
1495                                   s->qscale);
1496                 }
1497                 s->mb_skipped = 0;        // done in MPV_frame_start()
1498                 // done in encode_picture() so we must undo it
1499                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1500                     if (s->flipflop_rounding          ||
1501                         s->codec_id == CODEC_ID_H263P ||
1502                         s->codec_id == CODEC_ID_MPEG4)
1503                         s->no_rounding ^= 1;
1504                 }
1505                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1506                     s->time_base       = s->last_time_base;
1507                     s->last_non_b_time = s->time - s->pp_time;
1508                 }
1509                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1510                 for (i = 0; i < context_count; i++) {
1511                     PutBitContext *pb = &s->thread_context[i]->pb;
1512                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1513                 }
1514                 goto vbv_retry;
1515             }
1516
1517             assert(s->avctx->rc_max_rate);
1518         }
1519
1520         if (s->flags & CODEC_FLAG_PASS1)
1521             ff_write_pass1_stats(s);
1522
1523         for (i = 0; i < 4; i++) {
1524             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1525             avctx->error[i] += s->current_picture_ptr->f.error[i];
1526         }
1527
1528         if (s->flags & CODEC_FLAG_PASS1)
1529             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1530                    avctx->i_tex_bits + avctx->p_tex_bits ==
1531                        put_bits_count(&s->pb));
1532         flush_put_bits(&s->pb);
1533         s->frame_bits  = put_bits_count(&s->pb);
1534
1535         stuffing_count = ff_vbv_update(s, s->frame_bits);
1536         if (stuffing_count) {
1537             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1538                     stuffing_count + 50) {
1539                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1540                 return -1;
1541             }
1542
1543             switch (s->codec_id) {
1544             case CODEC_ID_MPEG1VIDEO:
1545             case CODEC_ID_MPEG2VIDEO:
1546                 while (stuffing_count--) {
1547                     put_bits(&s->pb, 8, 0);
1548                 }
1549             break;
1550             case CODEC_ID_MPEG4:
1551                 put_bits(&s->pb, 16, 0);
1552                 put_bits(&s->pb, 16, 0x1C3);
1553                 stuffing_count -= 4;
1554                 while (stuffing_count--) {
1555                     put_bits(&s->pb, 8, 0xFF);
1556                 }
1557             break;
1558             default:
1559                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1560             }
1561             flush_put_bits(&s->pb);
1562             s->frame_bits  = put_bits_count(&s->pb);
1563         }
1564
1565         /* update mpeg1/2 vbv_delay for CBR */
1566         if (s->avctx->rc_max_rate                          &&
1567             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1568             s->out_format == FMT_MPEG1                     &&
1569             90000LL * (avctx->rc_buffer_size - 1) <=
1570                 s->avctx->rc_max_rate * 0xFFFFLL) {
1571             int vbv_delay, min_delay;
1572             double inbits  = s->avctx->rc_max_rate *
1573                              av_q2d(s->avctx->time_base);
1574             int    minbits = s->frame_bits - 8 *
1575                              (s->vbv_delay_ptr - s->pb.buf - 1);
1576             double bits    = s->rc_context.buffer_index + minbits - inbits;
1577
1578             if (bits < 0)
1579                 av_log(s->avctx, AV_LOG_ERROR,
1580                        "Internal error, negative bits\n");
1581
1582             assert(s->repeat_first_field == 0);
1583
1584             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1585             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1586                         s->avctx->rc_max_rate;
1587
1588             vbv_delay = FFMAX(vbv_delay, min_delay);
1589
1590             assert(vbv_delay < 0xFFFF);
1591
1592             s->vbv_delay_ptr[0] &= 0xF8;
1593             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1594             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1595             s->vbv_delay_ptr[2] &= 0x07;
1596             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1597             avctx->vbv_delay     = vbv_delay * 300;
1598         }
1599         s->total_bits     += s->frame_bits;
1600         avctx->frame_bits  = s->frame_bits;
1601
1602         pkt->pts = s->current_picture.f.pts;
1603         if (!s->low_delay) {
1604             if (!s->current_picture.f.coded_picture_number)
1605                 pkt->dts = pkt->pts - s->dts_delta;
1606             else
1607                 pkt->dts = s->reordered_pts;
1608             s->reordered_pts = s->input_picture[0]->f.pts;
1609         } else
1610             pkt->dts = pkt->pts;
1611         if (s->current_picture.f.key_frame)
1612             pkt->flags |= AV_PKT_FLAG_KEY;
1613         if (s->mb_info)
1614             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1615     } else {
1616         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1617         s->frame_bits = 0;
1618     }
1619     assert((s->frame_bits & 7) == 0);
1620
1621     pkt->size = s->frame_bits / 8;
1622     *got_packet = !!pkt->size;
1623     return 0;
1624 }
1625
1626 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1627                                                 int n, int threshold)
1628 {
1629     static const char tab[64] = {
1630         3, 2, 2, 1, 1, 1, 1, 1,
1631         1, 1, 1, 1, 1, 1, 1, 1,
1632         1, 1, 1, 1, 1, 1, 1, 1,
1633         0, 0, 0, 0, 0, 0, 0, 0,
1634         0, 0, 0, 0, 0, 0, 0, 0,
1635         0, 0, 0, 0, 0, 0, 0, 0,
1636         0, 0, 0, 0, 0, 0, 0, 0,
1637         0, 0, 0, 0, 0, 0, 0, 0
1638     };
1639     int score = 0;
1640     int run = 0;
1641     int i;
1642     DCTELEM *block = s->block[n];
1643     const int last_index = s->block_last_index[n];
1644     int skip_dc;
1645
1646     if (threshold < 0) {
1647         skip_dc = 0;
1648         threshold = -threshold;
1649     } else
1650         skip_dc = 1;
1651
1652     /* Are all we could set to zero already zero? */
1653     if (last_index <= skip_dc - 1)
1654         return;
1655
1656     for (i = 0; i <= last_index; i++) {
1657         const int j = s->intra_scantable.permutated[i];
1658         const int level = FFABS(block[j]);
1659         if (level == 1) {
1660             if (skip_dc && i == 0)
1661                 continue;
1662             score += tab[run];
1663             run = 0;
1664         } else if (level > 1) {
1665             return;
1666         } else {
1667             run++;
1668         }
1669     }
1670     if (score >= threshold)
1671         return;
1672     for (i = skip_dc; i <= last_index; i++) {
1673         const int j = s->intra_scantable.permutated[i];
1674         block[j] = 0;
1675     }
1676     if (block[0])
1677         s->block_last_index[n] = 0;
1678     else
1679         s->block_last_index[n] = -1;
1680 }
1681
1682 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1683                                int last_index)
1684 {
1685     int i;
1686     const int maxlevel = s->max_qcoeff;
1687     const int minlevel = s->min_qcoeff;
1688     int overflow = 0;
1689
1690     if (s->mb_intra) {
1691         i = 1; // skip clipping of intra dc
1692     } else
1693         i = 0;
1694
1695     for (; i <= last_index; i++) {
1696         const int j = s->intra_scantable.permutated[i];
1697         int level = block[j];
1698
1699         if (level > maxlevel) {
1700             level = maxlevel;
1701             overflow++;
1702         } else if (level < minlevel) {
1703             level = minlevel;
1704             overflow++;
1705         }
1706
1707         block[j] = level;
1708     }
1709
1710     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1711         av_log(s->avctx, AV_LOG_INFO,
1712                "warning, clipping %d dct coefficients to %d..%d\n",
1713                overflow, minlevel, maxlevel);
1714 }
1715
1716 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1717 {
1718     int x, y;
1719     // FIXME optimize
1720     for (y = 0; y < 8; y++) {
1721         for (x = 0; x < 8; x++) {
1722             int x2, y2;
1723             int sum = 0;
1724             int sqr = 0;
1725             int count = 0;
1726
1727             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1728                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1729                     int v = ptr[x2 + y2 * stride];
1730                     sum += v;
1731                     sqr += v * v;
1732                     count++;
1733                 }
1734             }
1735             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1736         }
1737     }
1738 }
1739
1740 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1741                                                 int motion_x, int motion_y,
1742                                                 int mb_block_height,
1743                                                 int mb_block_count)
1744 {
1745     int16_t weight[8][64];
1746     DCTELEM orig[8][64];
1747     const int mb_x = s->mb_x;
1748     const int mb_y = s->mb_y;
1749     int i;
1750     int skip_dct[8];
1751     int dct_offset = s->linesize * 8; // default for progressive frames
1752     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1753     int wrap_y, wrap_c;
1754
1755     for (i = 0; i < mb_block_count; i++)
1756         skip_dct[i] = s->skipdct;
1757
1758     if (s->adaptive_quant) {
1759         const int last_qp = s->qscale;
1760         const int mb_xy = mb_x + mb_y * s->mb_stride;
1761
1762         s->lambda = s->lambda_table[mb_xy];
1763         update_qscale(s);
1764
1765         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1766             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1767             s->dquant = s->qscale - last_qp;
1768
1769             if (s->out_format == FMT_H263) {
1770                 s->dquant = av_clip(s->dquant, -2, 2);
1771
1772                 if (s->codec_id == CODEC_ID_MPEG4) {
1773                     if (!s->mb_intra) {
1774                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1775                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1776                                 s->dquant = 0;
1777                         }
1778                         if (s->mv_type == MV_TYPE_8X8)
1779                             s->dquant = 0;
1780                     }
1781                 }
1782             }
1783         }
1784         ff_set_qscale(s, last_qp + s->dquant);
1785     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1786         ff_set_qscale(s, s->qscale + s->dquant);
1787
1788     wrap_y = s->linesize;
1789     wrap_c = s->uvlinesize;
1790     ptr_y  = s->new_picture.f.data[0] +
1791              (mb_y * 16 * wrap_y)              + mb_x * 16;
1792     ptr_cb = s->new_picture.f.data[1] +
1793              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1794     ptr_cr = s->new_picture.f.data[2] +
1795              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1796
1797     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != CODEC_ID_AMV){
1798         uint8_t *ebuf = s->edge_emu_buffer + 32;
1799         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1800                                 mb_y * 16, s->width, s->height);
1801         ptr_y = ebuf;
1802         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1803                                 mb_block_height, mb_x * 8, mb_y * 8,
1804                                 s->width >> 1, s->height >> 1);
1805         ptr_cb = ebuf + 18 * wrap_y;
1806         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1807                                 mb_block_height, mb_x * 8, mb_y * 8,
1808                                 s->width >> 1, s->height >> 1);
1809         ptr_cr = ebuf + 18 * wrap_y + 8;
1810     }
1811
1812     if (s->mb_intra) {
1813         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1814             int progressive_score, interlaced_score;
1815
1816             s->interlaced_dct = 0;
1817             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1818                                                     NULL, wrap_y, 8) +
1819                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1820                                                     NULL, wrap_y, 8) - 400;
1821
1822             if (progressive_score > 0) {
1823                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1824                                                        NULL, wrap_y * 2, 8) +
1825                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1826                                                        NULL, wrap_y * 2, 8);
1827                 if (progressive_score > interlaced_score) {
1828                     s->interlaced_dct = 1;
1829
1830                     dct_offset = wrap_y;
1831                     wrap_y <<= 1;
1832                     if (s->chroma_format == CHROMA_422)
1833                         wrap_c <<= 1;
1834                 }
1835             }
1836         }
1837
1838         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1839         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1840         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1841         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1842
1843         if (s->flags & CODEC_FLAG_GRAY) {
1844             skip_dct[4] = 1;
1845             skip_dct[5] = 1;
1846         } else {
1847             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1848             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1849             if (!s->chroma_y_shift) { /* 422 */
1850                 s->dsp.get_pixels(s->block[6],
1851                                   ptr_cb + (dct_offset >> 1), wrap_c);
1852                 s->dsp.get_pixels(s->block[7],
1853                                   ptr_cr + (dct_offset >> 1), wrap_c);
1854             }
1855         }
1856     } else {
1857         op_pixels_func (*op_pix)[4];
1858         qpel_mc_func (*op_qpix)[16];
1859         uint8_t *dest_y, *dest_cb, *dest_cr;
1860
1861         dest_y  = s->dest[0];
1862         dest_cb = s->dest[1];
1863         dest_cr = s->dest[2];
1864
1865         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1866             op_pix  = s->dsp.put_pixels_tab;
1867             op_qpix = s->dsp.put_qpel_pixels_tab;
1868         } else {
1869             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1870             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1871         }
1872
1873         if (s->mv_dir & MV_DIR_FORWARD) {
1874             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1875                        op_pix, op_qpix);
1876             op_pix  = s->dsp.avg_pixels_tab;
1877             op_qpix = s->dsp.avg_qpel_pixels_tab;
1878         }
1879         if (s->mv_dir & MV_DIR_BACKWARD) {
1880             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1881                        op_pix, op_qpix);
1882         }
1883
1884         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1885             int progressive_score, interlaced_score;
1886
1887             s->interlaced_dct = 0;
1888             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1889                                                     ptr_y,              wrap_y,
1890                                                     8) +
1891                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1892                                                     ptr_y + wrap_y * 8, wrap_y,
1893                                                     8) - 400;
1894
1895             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1896                 progressive_score -= 400;
1897
1898             if (progressive_score > 0) {
1899                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1900                                                        ptr_y,
1901                                                        wrap_y * 2, 8) +
1902                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1903                                                        ptr_y + wrap_y,
1904                                                        wrap_y * 2, 8);
1905
1906                 if (progressive_score > interlaced_score) {
1907                     s->interlaced_dct = 1;
1908
1909                     dct_offset = wrap_y;
1910                     wrap_y <<= 1;
1911                     if (s->chroma_format == CHROMA_422)
1912                         wrap_c <<= 1;
1913                 }
1914             }
1915         }
1916
1917         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1918         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1919         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1920                            dest_y + dct_offset, wrap_y);
1921         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1922                            dest_y + dct_offset + 8, wrap_y);
1923
1924         if (s->flags & CODEC_FLAG_GRAY) {
1925             skip_dct[4] = 1;
1926             skip_dct[5] = 1;
1927         } else {
1928             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1929             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1930             if (!s->chroma_y_shift) { /* 422 */
1931                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1932                                    dest_cb + (dct_offset >> 1), wrap_c);
1933                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1934                                    dest_cr + (dct_offset >> 1), wrap_c);
1935             }
1936         }
1937         /* pre quantization */
1938         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1939                 2 * s->qscale * s->qscale) {
1940             // FIXME optimize
1941             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1942                               wrap_y, 8) < 20 * s->qscale)
1943                 skip_dct[0] = 1;
1944             if (s->dsp.sad[1](NULL, ptr_y + 8,
1945                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1946                 skip_dct[1] = 1;
1947             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1948                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1949                 skip_dct[2] = 1;
1950             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1951                               dest_y + dct_offset + 8,
1952                               wrap_y, 8) < 20 * s->qscale)
1953                 skip_dct[3] = 1;
1954             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1955                               wrap_c, 8) < 20 * s->qscale)
1956                 skip_dct[4] = 1;
1957             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1958                               wrap_c, 8) < 20 * s->qscale)
1959                 skip_dct[5] = 1;
1960             if (!s->chroma_y_shift) { /* 422 */
1961                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1962                                   dest_cb + (dct_offset >> 1),
1963                                   wrap_c, 8) < 20 * s->qscale)
1964                     skip_dct[6] = 1;
1965                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1966                                   dest_cr + (dct_offset >> 1),
1967                                   wrap_c, 8) < 20 * s->qscale)
1968                     skip_dct[7] = 1;
1969             }
1970         }
1971     }
1972
1973     if (s->quantizer_noise_shaping) {
1974         if (!skip_dct[0])
1975             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1976         if (!skip_dct[1])
1977             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1978         if (!skip_dct[2])
1979             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1980         if (!skip_dct[3])
1981             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1982         if (!skip_dct[4])
1983             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1984         if (!skip_dct[5])
1985             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1986         if (!s->chroma_y_shift) { /* 422 */
1987             if (!skip_dct[6])
1988                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1989                                   wrap_c);
1990             if (!skip_dct[7])
1991                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1992                                   wrap_c);
1993         }
1994         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1995     }
1996
1997     /* DCT & quantize */
1998     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1999     {
2000         for (i = 0; i < mb_block_count; i++) {
2001             if (!skip_dct[i]) {
2002                 int overflow;
2003                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2004                 // FIXME we could decide to change to quantizer instead of
2005                 // clipping
2006                 // JS: I don't think that would be a good idea it could lower
2007                 //     quality instead of improve it. Just INTRADC clipping
2008                 //     deserves changes in quantizer
2009                 if (overflow)
2010                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2011             } else
2012                 s->block_last_index[i] = -1;
2013         }
2014         if (s->quantizer_noise_shaping) {
2015             for (i = 0; i < mb_block_count; i++) {
2016                 if (!skip_dct[i]) {
2017                     s->block_last_index[i] =
2018                         dct_quantize_refine(s, s->block[i], weight[i],
2019                                             orig[i], i, s->qscale);
2020                 }
2021             }
2022         }
2023
2024         if (s->luma_elim_threshold && !s->mb_intra)
2025             for (i = 0; i < 4; i++)
2026                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2027         if (s->chroma_elim_threshold && !s->mb_intra)
2028             for (i = 4; i < mb_block_count; i++)
2029                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2030
2031         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2032             for (i = 0; i < mb_block_count; i++) {
2033                 if (s->block_last_index[i] == -1)
2034                     s->coded_score[i] = INT_MAX / 256;
2035             }
2036         }
2037     }
2038
2039     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2040         s->block_last_index[4] =
2041         s->block_last_index[5] = 0;
2042         s->block[4][0] =
2043         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2044     }
2045
2046     // non c quantize code returns incorrect block_last_index FIXME
2047     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2048         for (i = 0; i < mb_block_count; i++) {
2049             int j;
2050             if (s->block_last_index[i] > 0) {
2051                 for (j = 63; j > 0; j--) {
2052                     if (s->block[i][s->intra_scantable.permutated[j]])
2053                         break;
2054                 }
2055                 s->block_last_index[i] = j;
2056             }
2057         }
2058     }
2059
2060     /* huffman encode */
2061     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2062     case CODEC_ID_MPEG1VIDEO:
2063     case CODEC_ID_MPEG2VIDEO:
2064         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2065             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2066         break;
2067     case CODEC_ID_MPEG4:
2068         if (CONFIG_MPEG4_ENCODER)
2069             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2070         break;
2071     case CODEC_ID_MSMPEG4V2:
2072     case CODEC_ID_MSMPEG4V3:
2073     case CODEC_ID_WMV1:
2074         if (CONFIG_MSMPEG4_ENCODER)
2075             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2076         break;
2077     case CODEC_ID_WMV2:
2078         if (CONFIG_WMV2_ENCODER)
2079             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2080         break;
2081     case CODEC_ID_H261:
2082         if (CONFIG_H261_ENCODER)
2083             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2084         break;
2085     case CODEC_ID_H263:
2086     case CODEC_ID_H263P:
2087     case CODEC_ID_FLV1:
2088     case CODEC_ID_RV10:
2089     case CODEC_ID_RV20:
2090         if (CONFIG_H263_ENCODER)
2091             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2092         break;
2093     case CODEC_ID_MJPEG:
2094     case CODEC_ID_AMV:
2095         if (CONFIG_MJPEG_ENCODER)
2096             ff_mjpeg_encode_mb(s, s->block);
2097         break;
2098     default:
2099         assert(0);
2100     }
2101 }
2102
2103 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2104 {
2105     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2106     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2107 }
2108
2109 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2110     int i;
2111
2112     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2113
2114     /* mpeg1 */
2115     d->mb_skip_run= s->mb_skip_run;
2116     for(i=0; i<3; i++)
2117         d->last_dc[i] = s->last_dc[i];
2118
2119     /* statistics */
2120     d->mv_bits= s->mv_bits;
2121     d->i_tex_bits= s->i_tex_bits;
2122     d->p_tex_bits= s->p_tex_bits;
2123     d->i_count= s->i_count;
2124     d->f_count= s->f_count;
2125     d->b_count= s->b_count;
2126     d->skip_count= s->skip_count;
2127     d->misc_bits= s->misc_bits;
2128     d->last_bits= 0;
2129
2130     d->mb_skipped= 0;
2131     d->qscale= s->qscale;
2132     d->dquant= s->dquant;
2133
2134     d->esc3_level_length= s->esc3_level_length;
2135 }
2136
2137 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2138     int i;
2139
2140     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2141     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2142
2143     /* mpeg1 */
2144     d->mb_skip_run= s->mb_skip_run;
2145     for(i=0; i<3; i++)
2146         d->last_dc[i] = s->last_dc[i];
2147
2148     /* statistics */
2149     d->mv_bits= s->mv_bits;
2150     d->i_tex_bits= s->i_tex_bits;
2151     d->p_tex_bits= s->p_tex_bits;
2152     d->i_count= s->i_count;
2153     d->f_count= s->f_count;
2154     d->b_count= s->b_count;
2155     d->skip_count= s->skip_count;
2156     d->misc_bits= s->misc_bits;
2157
2158     d->mb_intra= s->mb_intra;
2159     d->mb_skipped= s->mb_skipped;
2160     d->mv_type= s->mv_type;
2161     d->mv_dir= s->mv_dir;
2162     d->pb= s->pb;
2163     if(s->data_partitioning){
2164         d->pb2= s->pb2;
2165         d->tex_pb= s->tex_pb;
2166     }
2167     d->block= s->block;
2168     for(i=0; i<8; i++)
2169         d->block_last_index[i]= s->block_last_index[i];
2170     d->interlaced_dct= s->interlaced_dct;
2171     d->qscale= s->qscale;
2172
2173     d->esc3_level_length= s->esc3_level_length;
2174 }
2175
2176 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2177                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2178                            int *dmin, int *next_block, int motion_x, int motion_y)
2179 {
2180     int score;
2181     uint8_t *dest_backup[3];
2182
2183     copy_context_before_encode(s, backup, type);
2184
2185     s->block= s->blocks[*next_block];
2186     s->pb= pb[*next_block];
2187     if(s->data_partitioning){
2188         s->pb2   = pb2   [*next_block];
2189         s->tex_pb= tex_pb[*next_block];
2190     }
2191
2192     if(*next_block){
2193         memcpy(dest_backup, s->dest, sizeof(s->dest));
2194         s->dest[0] = s->rd_scratchpad;
2195         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2196         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2197         assert(s->linesize >= 32); //FIXME
2198     }
2199
2200     encode_mb(s, motion_x, motion_y);
2201
2202     score= put_bits_count(&s->pb);
2203     if(s->data_partitioning){
2204         score+= put_bits_count(&s->pb2);
2205         score+= put_bits_count(&s->tex_pb);
2206     }
2207
2208     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2209         ff_MPV_decode_mb(s, s->block);
2210
2211         score *= s->lambda2;
2212         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2213     }
2214
2215     if(*next_block){
2216         memcpy(s->dest, dest_backup, sizeof(s->dest));
2217     }
2218
2219     if(score<*dmin){
2220         *dmin= score;
2221         *next_block^=1;
2222
2223         copy_context_after_encode(best, s, type);
2224     }
2225 }
2226
2227 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2228     uint32_t *sq = ff_squareTbl + 256;
2229     int acc=0;
2230     int x,y;
2231
2232     if(w==16 && h==16)
2233         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2234     else if(w==8 && h==8)
2235         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2236
2237     for(y=0; y<h; y++){
2238         for(x=0; x<w; x++){
2239             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2240         }
2241     }
2242
2243     assert(acc>=0);
2244
2245     return acc;
2246 }
2247
2248 static int sse_mb(MpegEncContext *s){
2249     int w= 16;
2250     int h= 16;
2251
2252     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2253     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2254
2255     if(w==16 && h==16)
2256       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2257         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2258                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2259                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2260       }else{
2261         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2262                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2263                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2264       }
2265     else
2266         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2267                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2268                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2269 }
2270
2271 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2272     MpegEncContext *s= *(void**)arg;
2273
2274
2275     s->me.pre_pass=1;
2276     s->me.dia_size= s->avctx->pre_dia_size;
2277     s->first_slice_line=1;
2278     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2279         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2280             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2281         }
2282         s->first_slice_line=0;
2283     }
2284
2285     s->me.pre_pass=0;
2286
2287     return 0;
2288 }
2289
2290 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2291     MpegEncContext *s= *(void**)arg;
2292
2293     ff_check_alignment();
2294
2295     s->me.dia_size= s->avctx->dia_size;
2296     s->first_slice_line=1;
2297     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2298         s->mb_x=0; //for block init below
2299         ff_init_block_index(s);
2300         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2301             s->block_index[0]+=2;
2302             s->block_index[1]+=2;
2303             s->block_index[2]+=2;
2304             s->block_index[3]+=2;
2305
2306             /* compute motion vector & mb_type and store in context */
2307             if(s->pict_type==AV_PICTURE_TYPE_B)
2308                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2309             else
2310                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2311         }
2312         s->first_slice_line=0;
2313     }
2314     return 0;
2315 }
2316
2317 static int mb_var_thread(AVCodecContext *c, void *arg){
2318     MpegEncContext *s= *(void**)arg;
2319     int mb_x, mb_y;
2320
2321     ff_check_alignment();
2322
2323     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2324         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2325             int xx = mb_x * 16;
2326             int yy = mb_y * 16;
2327             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2328             int varc;
2329             int sum = s->dsp.pix_sum(pix, s->linesize);
2330
2331             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2332
2333             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2334             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2335             s->me.mb_var_sum_temp    += varc;
2336         }
2337     }
2338     return 0;
2339 }
2340
2341 static void write_slice_end(MpegEncContext *s){
2342     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2343         if(s->partitioned_frame){
2344             ff_mpeg4_merge_partitions(s);
2345         }
2346
2347         ff_mpeg4_stuffing(&s->pb);
2348     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2349         ff_mjpeg_encode_stuffing(&s->pb);
2350     }
2351
2352     avpriv_align_put_bits(&s->pb);
2353     flush_put_bits(&s->pb);
2354
2355     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2356         s->misc_bits+= get_bits_diff(s);
2357 }
2358
2359 static void write_mb_info(MpegEncContext *s)
2360 {
2361     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2362     int offset = put_bits_count(&s->pb);
2363     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2364     int gobn = s->mb_y / s->gob_index;
2365     int pred_x, pred_y;
2366     if (CONFIG_H263_ENCODER)
2367         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2368     bytestream_put_le32(&ptr, offset);
2369     bytestream_put_byte(&ptr, s->qscale);
2370     bytestream_put_byte(&ptr, gobn);
2371     bytestream_put_le16(&ptr, mba);
2372     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2373     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2374     /* 4MV not implemented */
2375     bytestream_put_byte(&ptr, 0); /* hmv2 */
2376     bytestream_put_byte(&ptr, 0); /* vmv2 */
2377 }
2378
2379 static void update_mb_info(MpegEncContext *s, int startcode)
2380 {
2381     if (!s->mb_info)
2382         return;
2383     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2384         s->mb_info_size += 12;
2385         s->prev_mb_info = s->last_mb_info;
2386     }
2387     if (startcode) {
2388         s->prev_mb_info = put_bits_count(&s->pb)/8;
2389         /* This might have incremented mb_info_size above, and we return without
2390          * actually writing any info into that slot yet. But in that case,
2391          * this will be called again at the start of the after writing the
2392          * start code, actually writing the mb info. */
2393         return;
2394     }
2395
2396     s->last_mb_info = put_bits_count(&s->pb)/8;
2397     if (!s->mb_info_size)
2398         s->mb_info_size += 12;
2399     write_mb_info(s);
2400 }
2401
2402 static int encode_thread(AVCodecContext *c, void *arg){
2403     MpegEncContext *s= *(void**)arg;
2404     int mb_x, mb_y, pdif = 0;
2405     int chr_h= 16>>s->chroma_y_shift;
2406     int i, j;
2407     MpegEncContext best_s, backup_s;
2408     uint8_t bit_buf[2][MAX_MB_BYTES];
2409     uint8_t bit_buf2[2][MAX_MB_BYTES];
2410     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2411     PutBitContext pb[2], pb2[2], tex_pb[2];
2412 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2413
2414     ff_check_alignment();
2415
2416     for(i=0; i<2; i++){
2417         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2418         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2419         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2420     }
2421
2422     s->last_bits= put_bits_count(&s->pb);
2423     s->mv_bits=0;
2424     s->misc_bits=0;
2425     s->i_tex_bits=0;
2426     s->p_tex_bits=0;
2427     s->i_count=0;
2428     s->f_count=0;
2429     s->b_count=0;
2430     s->skip_count=0;
2431
2432     for(i=0; i<3; i++){
2433         /* init last dc values */
2434         /* note: quant matrix value (8) is implied here */
2435         s->last_dc[i] = 128 << s->intra_dc_precision;
2436
2437         s->current_picture.f.error[i] = 0;
2438     }
2439     if(s->codec_id==CODEC_ID_AMV){
2440         s->last_dc[0] = 128*8/13;
2441         s->last_dc[1] = 128*8/14;
2442         s->last_dc[2] = 128*8/14;
2443     }
2444     s->mb_skip_run = 0;
2445     memset(s->last_mv, 0, sizeof(s->last_mv));
2446
2447     s->last_mv_dir = 0;
2448
2449     switch(s->codec_id){
2450     case CODEC_ID_H263:
2451     case CODEC_ID_H263P:
2452     case CODEC_ID_FLV1:
2453         if (CONFIG_H263_ENCODER)
2454             s->gob_index = ff_h263_get_gob_height(s);
2455         break;
2456     case CODEC_ID_MPEG4:
2457         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2458             ff_mpeg4_init_partitions(s);
2459         break;
2460     }
2461
2462     s->resync_mb_x=0;
2463     s->resync_mb_y=0;
2464     s->first_slice_line = 1;
2465     s->ptr_lastgob = s->pb.buf;
2466     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2467 //    printf("row %d at %X\n", s->mb_y, (int)s);
2468         s->mb_x=0;
2469         s->mb_y= mb_y;
2470
2471         ff_set_qscale(s, s->qscale);
2472         ff_init_block_index(s);
2473
2474         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2475             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2476             int mb_type= s->mb_type[xy];
2477 //            int d;
2478             int dmin= INT_MAX;
2479             int dir;
2480
2481             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2482                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2483                 return -1;
2484             }
2485             if(s->data_partitioning){
2486                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2487                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2488                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2489                     return -1;
2490                 }
2491             }
2492
2493             s->mb_x = mb_x;
2494             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2495             ff_update_block_index(s);
2496
2497             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2498                 ff_h261_reorder_mb_index(s);
2499                 xy= s->mb_y*s->mb_stride + s->mb_x;
2500                 mb_type= s->mb_type[xy];
2501             }
2502
2503             /* write gob / video packet header  */
2504             if(s->rtp_mode){
2505                 int current_packet_size, is_gob_start;
2506
2507                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2508
2509                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2510
2511                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2512
2513                 switch(s->codec_id){
2514                 case CODEC_ID_H263:
2515                 case CODEC_ID_H263P:
2516                     if(!s->h263_slice_structured)
2517                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2518                     break;
2519                 case CODEC_ID_MPEG2VIDEO:
2520                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2521                 case CODEC_ID_MPEG1VIDEO:
2522                     if(s->mb_skip_run) is_gob_start=0;
2523                     break;
2524                 }
2525
2526                 if(is_gob_start){
2527                     if(s->start_mb_y != mb_y || mb_x!=0){
2528                         write_slice_end(s);
2529
2530                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2531                             ff_mpeg4_init_partitions(s);
2532                         }
2533                     }
2534
2535                     assert((put_bits_count(&s->pb)&7) == 0);
2536                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2537
2538                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2539                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2540                         int d= 100 / s->avctx->error_rate;
2541                         if(r % d == 0){
2542                             current_packet_size=0;
2543                             s->pb.buf_ptr= s->ptr_lastgob;
2544                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2545                         }
2546                     }
2547
2548                     if (s->avctx->rtp_callback){
2549                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2550                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2551                     }
2552                     update_mb_info(s, 1);
2553
2554                     switch(s->codec_id){
2555                     case CODEC_ID_MPEG4:
2556                         if (CONFIG_MPEG4_ENCODER) {
2557                             ff_mpeg4_encode_video_packet_header(s);
2558                             ff_mpeg4_clean_buffers(s);
2559                         }
2560                     break;
2561                     case CODEC_ID_MPEG1VIDEO:
2562                     case CODEC_ID_MPEG2VIDEO:
2563                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2564                             ff_mpeg1_encode_slice_header(s);
2565                             ff_mpeg1_clean_buffers(s);
2566                         }
2567                     break;
2568                     case CODEC_ID_H263:
2569                     case CODEC_ID_H263P:
2570                         if (CONFIG_H263_ENCODER)
2571                             ff_h263_encode_gob_header(s, mb_y);
2572                     break;
2573                     }
2574
2575                     if(s->flags&CODEC_FLAG_PASS1){
2576                         int bits= put_bits_count(&s->pb);
2577                         s->misc_bits+= bits - s->last_bits;
2578                         s->last_bits= bits;
2579                     }
2580
2581                     s->ptr_lastgob += current_packet_size;
2582                     s->first_slice_line=1;
2583                     s->resync_mb_x=mb_x;
2584                     s->resync_mb_y=mb_y;
2585                 }
2586             }
2587
2588             if(  (s->resync_mb_x   == s->mb_x)
2589                && s->resync_mb_y+1 == s->mb_y){
2590                 s->first_slice_line=0;
2591             }
2592
2593             s->mb_skipped=0;
2594             s->dquant=0; //only for QP_RD
2595
2596             update_mb_info(s, 0);
2597
2598             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2599                 int next_block=0;
2600                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2601
2602                 copy_context_before_encode(&backup_s, s, -1);
2603                 backup_s.pb= s->pb;
2604                 best_s.data_partitioning= s->data_partitioning;
2605                 best_s.partitioned_frame= s->partitioned_frame;
2606                 if(s->data_partitioning){
2607                     backup_s.pb2= s->pb2;
2608                     backup_s.tex_pb= s->tex_pb;
2609                 }
2610
2611                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2612                     s->mv_dir = MV_DIR_FORWARD;
2613                     s->mv_type = MV_TYPE_16X16;
2614                     s->mb_intra= 0;
2615                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2616                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2617                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2618                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2619                 }
2620                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2621                     s->mv_dir = MV_DIR_FORWARD;
2622                     s->mv_type = MV_TYPE_FIELD;
2623                     s->mb_intra= 0;
2624                     for(i=0; i<2; i++){
2625                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2626                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2627                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2628                     }
2629                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2630                                  &dmin, &next_block, 0, 0);
2631                 }
2632                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2633                     s->mv_dir = MV_DIR_FORWARD;
2634                     s->mv_type = MV_TYPE_16X16;
2635                     s->mb_intra= 0;
2636                     s->mv[0][0][0] = 0;
2637                     s->mv[0][0][1] = 0;
2638                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2639                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2640                 }
2641                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2642                     s->mv_dir = MV_DIR_FORWARD;
2643                     s->mv_type = MV_TYPE_8X8;
2644                     s->mb_intra= 0;
2645                     for(i=0; i<4; i++){
2646                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2647                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2648                     }
2649                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2650                                  &dmin, &next_block, 0, 0);
2651                 }
2652                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2653                     s->mv_dir = MV_DIR_FORWARD;
2654                     s->mv_type = MV_TYPE_16X16;
2655                     s->mb_intra= 0;
2656                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2657                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2658                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2659                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2660                 }
2661                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2662                     s->mv_dir = MV_DIR_BACKWARD;
2663                     s->mv_type = MV_TYPE_16X16;
2664                     s->mb_intra= 0;
2665                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2666                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2667                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2668                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2669                 }
2670                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2671                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2672                     s->mv_type = MV_TYPE_16X16;
2673                     s->mb_intra= 0;
2674                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2675                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2676                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2677                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2678                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2679                                  &dmin, &next_block, 0, 0);
2680                 }
2681                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2682                     s->mv_dir = MV_DIR_FORWARD;
2683                     s->mv_type = MV_TYPE_FIELD;
2684                     s->mb_intra= 0;
2685                     for(i=0; i<2; i++){
2686                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2687                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2688                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2689                     }
2690                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2691                                  &dmin, &next_block, 0, 0);
2692                 }
2693                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2694                     s->mv_dir = MV_DIR_BACKWARD;
2695                     s->mv_type = MV_TYPE_FIELD;
2696                     s->mb_intra= 0;
2697                     for(i=0; i<2; i++){
2698                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2699                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2700                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2701                     }
2702                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2703                                  &dmin, &next_block, 0, 0);
2704                 }
2705                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2706                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2707                     s->mv_type = MV_TYPE_FIELD;
2708                     s->mb_intra= 0;
2709                     for(dir=0; dir<2; dir++){
2710                         for(i=0; i<2; i++){
2711                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2712                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2713                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2714                         }
2715                     }
2716                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2717                                  &dmin, &next_block, 0, 0);
2718                 }
2719                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2720                     s->mv_dir = 0;
2721                     s->mv_type = MV_TYPE_16X16;
2722                     s->mb_intra= 1;
2723                     s->mv[0][0][0] = 0;
2724                     s->mv[0][0][1] = 0;
2725                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2726                                  &dmin, &next_block, 0, 0);
2727                     if(s->h263_pred || s->h263_aic){
2728                         if(best_s.mb_intra)
2729                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2730                         else
2731                             ff_clean_intra_table_entries(s); //old mode?
2732                     }
2733                 }
2734
2735                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2736                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2737                         const int last_qp= backup_s.qscale;
2738                         int qpi, qp, dc[6];
2739                         DCTELEM ac[6][16];
2740                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2741                         static const int dquant_tab[4]={-1,1,-2,2};
2742
2743                         assert(backup_s.dquant == 0);
2744
2745                         //FIXME intra
2746                         s->mv_dir= best_s.mv_dir;
2747                         s->mv_type = MV_TYPE_16X16;
2748                         s->mb_intra= best_s.mb_intra;
2749                         s->mv[0][0][0] = best_s.mv[0][0][0];
2750                         s->mv[0][0][1] = best_s.mv[0][0][1];
2751                         s->mv[1][0][0] = best_s.mv[1][0][0];
2752                         s->mv[1][0][1] = best_s.mv[1][0][1];
2753
2754                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2755                         for(; qpi<4; qpi++){
2756                             int dquant= dquant_tab[qpi];
2757                             qp= last_qp + dquant;
2758                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2759                                 continue;
2760                             backup_s.dquant= dquant;
2761                             if(s->mb_intra && s->dc_val[0]){
2762                                 for(i=0; i<6; i++){
2763                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2764                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2765                                 }
2766                             }
2767
2768                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2769                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2770                             if(best_s.qscale != qp){
2771                                 if(s->mb_intra && s->dc_val[0]){
2772                                     for(i=0; i<6; i++){
2773                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2774                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2775                                     }
2776                                 }
2777                             }
2778                         }
2779                     }
2780                 }
2781                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2782                     int mx= s->b_direct_mv_table[xy][0];
2783                     int my= s->b_direct_mv_table[xy][1];
2784
2785                     backup_s.dquant = 0;
2786                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2787                     s->mb_intra= 0;
2788                     ff_mpeg4_set_direct_mv(s, mx, my);
2789                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2790                                  &dmin, &next_block, mx, my);
2791                 }
2792                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2793                     backup_s.dquant = 0;
2794                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2795                     s->mb_intra= 0;
2796                     ff_mpeg4_set_direct_mv(s, 0, 0);
2797                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2798                                  &dmin, &next_block, 0, 0);
2799                 }
2800                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2801                     int coded=0;
2802                     for(i=0; i<6; i++)
2803                         coded |= s->block_last_index[i];
2804                     if(coded){
2805                         int mx,my;
2806                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2807                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2808                             mx=my=0; //FIXME find the one we actually used
2809                             ff_mpeg4_set_direct_mv(s, mx, my);
2810                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2811                             mx= s->mv[1][0][0];
2812                             my= s->mv[1][0][1];
2813                         }else{
2814                             mx= s->mv[0][0][0];
2815                             my= s->mv[0][0][1];
2816                         }
2817
2818                         s->mv_dir= best_s.mv_dir;
2819                         s->mv_type = best_s.mv_type;
2820                         s->mb_intra= 0;
2821 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2822                         s->mv[0][0][1] = best_s.mv[0][0][1];
2823                         s->mv[1][0][0] = best_s.mv[1][0][0];
2824                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2825                         backup_s.dquant= 0;
2826                         s->skipdct=1;
2827                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2828                                         &dmin, &next_block, mx, my);
2829                         s->skipdct=0;
2830                     }
2831                 }
2832
2833                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2834
2835                 copy_context_after_encode(s, &best_s, -1);
2836
2837                 pb_bits_count= put_bits_count(&s->pb);
2838                 flush_put_bits(&s->pb);
2839                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2840                 s->pb= backup_s.pb;
2841
2842                 if(s->data_partitioning){
2843                     pb2_bits_count= put_bits_count(&s->pb2);
2844                     flush_put_bits(&s->pb2);
2845                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2846                     s->pb2= backup_s.pb2;
2847
2848                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2849                     flush_put_bits(&s->tex_pb);
2850                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2851                     s->tex_pb= backup_s.tex_pb;
2852                 }
2853                 s->last_bits= put_bits_count(&s->pb);
2854
2855                 if (CONFIG_H263_ENCODER &&
2856                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2857                     ff_h263_update_motion_val(s);
2858
2859                 if(next_block==0){ //FIXME 16 vs linesize16
2860                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2861                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2862                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2863                 }
2864
2865                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2866                     ff_MPV_decode_mb(s, s->block);
2867             } else {
2868                 int motion_x = 0, motion_y = 0;
2869                 s->mv_type=MV_TYPE_16X16;
2870                 // only one MB-Type possible
2871
2872                 switch(mb_type){
2873                 case CANDIDATE_MB_TYPE_INTRA:
2874                     s->mv_dir = 0;
2875                     s->mb_intra= 1;
2876                     motion_x= s->mv[0][0][0] = 0;
2877                     motion_y= s->mv[0][0][1] = 0;
2878                     break;
2879                 case CANDIDATE_MB_TYPE_INTER:
2880                     s->mv_dir = MV_DIR_FORWARD;
2881                     s->mb_intra= 0;
2882                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2883                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2884                     break;
2885                 case CANDIDATE_MB_TYPE_INTER_I:
2886                     s->mv_dir = MV_DIR_FORWARD;
2887                     s->mv_type = MV_TYPE_FIELD;
2888                     s->mb_intra= 0;
2889                     for(i=0; i<2; i++){
2890                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2891                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2892                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2893                     }
2894                     break;
2895                 case CANDIDATE_MB_TYPE_INTER4V:
2896                     s->mv_dir = MV_DIR_FORWARD;
2897                     s->mv_type = MV_TYPE_8X8;
2898                     s->mb_intra= 0;
2899                     for(i=0; i<4; i++){
2900                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2901                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2902                     }
2903                     break;
2904                 case CANDIDATE_MB_TYPE_DIRECT:
2905                     if (CONFIG_MPEG4_ENCODER) {
2906                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2907                         s->mb_intra= 0;
2908                         motion_x=s->b_direct_mv_table[xy][0];
2909                         motion_y=s->b_direct_mv_table[xy][1];
2910                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2911                     }
2912                     break;
2913                 case CANDIDATE_MB_TYPE_DIRECT0:
2914                     if (CONFIG_MPEG4_ENCODER) {
2915                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2916                         s->mb_intra= 0;
2917                         ff_mpeg4_set_direct_mv(s, 0, 0);
2918                     }
2919                     break;
2920                 case CANDIDATE_MB_TYPE_BIDIR:
2921                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2922                     s->mb_intra= 0;
2923                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2924                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2925                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2926                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2927                     break;
2928                 case CANDIDATE_MB_TYPE_BACKWARD:
2929                     s->mv_dir = MV_DIR_BACKWARD;
2930                     s->mb_intra= 0;
2931                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2932                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2933                     break;
2934                 case CANDIDATE_MB_TYPE_FORWARD:
2935                     s->mv_dir = MV_DIR_FORWARD;
2936                     s->mb_intra= 0;
2937                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2938                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2939 //                    printf(" %d %d ", motion_x, motion_y);
2940                     break;
2941                 case CANDIDATE_MB_TYPE_FORWARD_I:
2942                     s->mv_dir = MV_DIR_FORWARD;
2943                     s->mv_type = MV_TYPE_FIELD;
2944                     s->mb_intra= 0;
2945                     for(i=0; i<2; i++){
2946                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2947                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2948                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2949                     }
2950                     break;
2951                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2952                     s->mv_dir = MV_DIR_BACKWARD;
2953                     s->mv_type = MV_TYPE_FIELD;
2954                     s->mb_intra= 0;
2955                     for(i=0; i<2; i++){
2956                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2957                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2958                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2959                     }
2960                     break;
2961                 case CANDIDATE_MB_TYPE_BIDIR_I:
2962                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2963                     s->mv_type = MV_TYPE_FIELD;
2964                     s->mb_intra= 0;
2965                     for(dir=0; dir<2; dir++){
2966                         for(i=0; i<2; i++){
2967                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2968                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2969                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2970                         }
2971                     }
2972                     break;
2973                 default:
2974                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2975                 }
2976
2977                 encode_mb(s, motion_x, motion_y);
2978
2979                 // RAL: Update last macroblock type
2980                 s->last_mv_dir = s->mv_dir;
2981
2982                 if (CONFIG_H263_ENCODER &&
2983                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2984                     ff_h263_update_motion_val(s);
2985
2986                 ff_MPV_decode_mb(s, s->block);
2987             }
2988
2989             /* clean the MV table in IPS frames for direct mode in B frames */
2990             if(s->mb_intra /* && I,P,S_TYPE */){
2991                 s->p_mv_table[xy][0]=0;
2992                 s->p_mv_table[xy][1]=0;
2993             }
2994
2995             if(s->flags&CODEC_FLAG_PSNR){
2996                 int w= 16;
2997                 int h= 16;
2998
2999                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3000                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3001
3002                 s->current_picture.f.error[0] += sse(
3003                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3004                     s->dest[0], w, h, s->linesize);
3005                 s->current_picture.f.error[1] += sse(
3006                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3007                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3008                 s->current_picture.f.error[2] += sse(
3009                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3010                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3011             }
3012             if(s->loop_filter){
3013                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3014                     ff_h263_loop_filter(s);
3015             }
3016 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3017         }
3018     }
3019
3020     //not beautiful here but we must write it before flushing so it has to be here
3021     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3022         ff_msmpeg4_encode_ext_header(s);
3023
3024     write_slice_end(s);
3025
3026     /* Send the last GOB if RTP */
3027     if (s->avctx->rtp_callback) {
3028         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3029         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3030         /* Call the RTP callback to send the last GOB */
3031         emms_c();
3032         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3033     }
3034
3035     return 0;
3036 }
3037
3038 #define MERGE(field) dst->field += src->field; src->field=0
3039 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3040     MERGE(me.scene_change_score);
3041     MERGE(me.mc_mb_var_sum_temp);
3042     MERGE(me.mb_var_sum_temp);
3043 }
3044
3045 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3046     int i;
3047
3048     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3049     MERGE(dct_count[1]);
3050     MERGE(mv_bits);
3051     MERGE(i_tex_bits);
3052     MERGE(p_tex_bits);
3053     MERGE(i_count);
3054     MERGE(f_count);
3055     MERGE(b_count);
3056     MERGE(skip_count);
3057     MERGE(misc_bits);
3058     MERGE(error_count);
3059     MERGE(padding_bug_score);
3060     MERGE(current_picture.f.error[0]);
3061     MERGE(current_picture.f.error[1]);
3062     MERGE(current_picture.f.error[2]);
3063
3064     if(dst->avctx->noise_reduction){
3065         for(i=0; i<64; i++){
3066             MERGE(dct_error_sum[0][i]);
3067             MERGE(dct_error_sum[1][i]);
3068         }
3069     }
3070
3071     assert(put_bits_count(&src->pb) % 8 ==0);
3072     assert(put_bits_count(&dst->pb) % 8 ==0);
3073     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3074     flush_put_bits(&dst->pb);
3075 }
3076
3077 static int estimate_qp(MpegEncContext *s, int dry_run){
3078     if (s->next_lambda){
3079         s->current_picture_ptr->f.quality =
3080         s->current_picture.f.quality = s->next_lambda;
3081         if(!dry_run) s->next_lambda= 0;
3082     } else if (!s->fixed_qscale) {
3083         s->current_picture_ptr->f.quality =
3084         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3085         if (s->current_picture.f.quality < 0)
3086             return -1;
3087     }
3088
3089     if(s->adaptive_quant){
3090         switch(s->codec_id){
3091         case CODEC_ID_MPEG4:
3092             if (CONFIG_MPEG4_ENCODER)
3093                 ff_clean_mpeg4_qscales(s);
3094             break;
3095         case CODEC_ID_H263:
3096         case CODEC_ID_H263P:
3097         case CODEC_ID_FLV1:
3098             if (CONFIG_H263_ENCODER)
3099                 ff_clean_h263_qscales(s);
3100             break;
3101         default:
3102             ff_init_qscale_tab(s);
3103         }
3104
3105         s->lambda= s->lambda_table[0];
3106         //FIXME broken
3107     }else
3108         s->lambda = s->current_picture.f.quality;
3109 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3110     update_qscale(s);
3111     return 0;
3112 }
3113
3114 /* must be called before writing the header */
3115 static void set_frame_distances(MpegEncContext * s){
3116     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3117     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3118
3119     if(s->pict_type==AV_PICTURE_TYPE_B){
3120         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3121         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3122     }else{
3123         s->pp_time= s->time - s->last_non_b_time;
3124         s->last_non_b_time= s->time;
3125         assert(s->picture_number==0 || s->pp_time > 0);
3126     }
3127 }
3128
3129 static int encode_picture(MpegEncContext *s, int picture_number)
3130 {
3131     int i;
3132     int bits;
3133     int context_count = s->slice_context_count;
3134
3135     s->picture_number = picture_number;
3136
3137     /* Reset the average MB variance */
3138     s->me.mb_var_sum_temp    =
3139     s->me.mc_mb_var_sum_temp = 0;
3140
3141     /* we need to initialize some time vars before we can encode b-frames */
3142     // RAL: Condition added for MPEG1VIDEO
3143     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3144         set_frame_distances(s);
3145     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3146         ff_set_mpeg4_time(s);
3147
3148     s->me.scene_change_score=0;
3149
3150 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3151
3152     if(s->pict_type==AV_PICTURE_TYPE_I){
3153         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3154         else                        s->no_rounding=0;
3155     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3156         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3157             s->no_rounding ^= 1;
3158     }
3159
3160     if(s->flags & CODEC_FLAG_PASS2){
3161         if (estimate_qp(s,1) < 0)
3162             return -1;
3163         ff_get_2pass_fcode(s);
3164     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3165         if(s->pict_type==AV_PICTURE_TYPE_B)
3166             s->lambda= s->last_lambda_for[s->pict_type];
3167         else
3168             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3169         update_qscale(s);
3170     }
3171
3172     if(s->codec_id != CODEC_ID_AMV){
3173         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3174         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3175         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3176         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3177     }
3178
3179     s->mb_intra=0; //for the rate distortion & bit compare functions
3180     for(i=1; i<context_count; i++){
3181         ff_update_duplicate_context(s->thread_context[i], s);
3182     }
3183
3184     if(ff_init_me(s)<0)
3185         return -1;
3186
3187     /* Estimate motion for every MB */
3188     if(s->pict_type != AV_PICTURE_TYPE_I){
3189         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3190         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3191         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3192             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3193                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3194             }
3195         }
3196
3197         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3198     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3199         /* I-Frame */
3200         for(i=0; i<s->mb_stride*s->mb_height; i++)
3201             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3202
3203         if(!s->fixed_qscale){
3204             /* finding spatial complexity for I-frame rate control */
3205             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3206         }
3207     }
3208     for(i=1; i<context_count; i++){
3209         merge_context_after_me(s, s->thread_context[i]);
3210     }
3211     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3212     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3213     emms_c();
3214
3215     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3216         s->pict_type= AV_PICTURE_TYPE_I;
3217         for(i=0; i<s->mb_stride*s->mb_height; i++)
3218             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3219 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3220     }
3221
3222     if(!s->umvplus){
3223         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3224             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3225
3226             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3227                 int a,b;
3228                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3229                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3230                 s->f_code= FFMAX3(s->f_code, a, b);
3231             }
3232
3233             ff_fix_long_p_mvs(s);
3234             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3235             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3236                 int j;
3237                 for(i=0; i<2; i++){
3238                     for(j=0; j<2; j++)
3239                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3240                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3241                 }
3242             }
3243         }
3244
3245         if(s->pict_type==AV_PICTURE_TYPE_B){
3246             int a, b;
3247
3248             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3249             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3250             s->f_code = FFMAX(a, b);
3251
3252             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3253             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3254             s->b_code = FFMAX(a, b);
3255
3256             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3257             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3258             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3259             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3260             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3261                 int dir, j;
3262                 for(dir=0; dir<2; dir++){
3263                     for(i=0; i<2; i++){
3264                         for(j=0; j<2; j++){
3265                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3266                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3267                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3268                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3269                         }
3270                     }
3271                 }
3272             }
3273         }
3274     }
3275
3276     if (estimate_qp(s, 0) < 0)
3277         return -1;
3278
3279     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3280         s->qscale= 3; //reduce clipping problems
3281
3282     if (s->out_format == FMT_MJPEG) {
3283         /* for mjpeg, we do include qscale in the matrix */
3284         for(i=1;i<64;i++){
3285             int j= s->dsp.idct_permutation[i];
3286
3287             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3288         }
3289         s->y_dc_scale_table=
3290         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3291         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3292         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3293                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3294         s->qscale= 8;
3295     }
3296     if(s->codec_id == CODEC_ID_AMV){
3297         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3298         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3299         for(i=1;i<64;i++){
3300             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3301
3302             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3303             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3304         }
3305         s->y_dc_scale_table= y;
3306         s->c_dc_scale_table= c;
3307         s->intra_matrix[0] = 13;
3308         s->chroma_intra_matrix[0] = 14;
3309         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3310                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3311         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3312                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3313         s->qscale= 8;
3314     }
3315
3316     //FIXME var duplication
3317     s->current_picture_ptr->f.key_frame =
3318     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3319     s->current_picture_ptr->f.pict_type =
3320     s->current_picture.f.pict_type = s->pict_type;
3321
3322     if (s->current_picture.f.key_frame)
3323         s->picture_in_gop_number=0;
3324
3325     s->last_bits= put_bits_count(&s->pb);
3326     switch(s->out_format) {
3327     case FMT_MJPEG:
3328         if (CONFIG_MJPEG_ENCODER)
3329             ff_mjpeg_encode_picture_header(s);
3330         break;
3331     case FMT_H261:
3332         if (CONFIG_H261_ENCODER)
3333             ff_h261_encode_picture_header(s, picture_number);
3334         break;
3335     case FMT_H263:
3336         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3337             ff_wmv2_encode_picture_header(s, picture_number);
3338         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3339             ff_msmpeg4_encode_picture_header(s, picture_number);
3340         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3341             ff_mpeg4_encode_picture_header(s, picture_number);
3342         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3343             ff_rv10_encode_picture_header(s, picture_number);
3344         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3345             ff_rv20_encode_picture_header(s, picture_number);
3346         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3347             ff_flv_encode_picture_header(s, picture_number);
3348         else if (CONFIG_H263_ENCODER)
3349             ff_h263_encode_picture_header(s, picture_number);
3350         break;
3351     case FMT_MPEG1:
3352         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3353             ff_mpeg1_encode_picture_header(s, picture_number);
3354         break;
3355     case FMT_H264:
3356         break;
3357     default:
3358         assert(0);
3359     }
3360     bits= put_bits_count(&s->pb);
3361     s->header_bits= bits - s->last_bits;
3362
3363     for(i=1; i<context_count; i++){
3364         update_duplicate_context_after_me(s->thread_context[i], s);
3365     }
3366     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3367     for(i=1; i<context_count; i++){
3368         merge_context_after_encode(s, s->thread_context[i]);
3369     }
3370     emms_c();
3371     return 0;
3372 }
3373
3374 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3375     const int intra= s->mb_intra;
3376     int i;
3377
3378     s->dct_count[intra]++;
3379
3380     for(i=0; i<64; i++){
3381         int level= block[i];
3382
3383         if(level){
3384             if(level>0){
3385                 s->dct_error_sum[intra][i] += level;
3386                 level -= s->dct_offset[intra][i];
3387                 if(level<0) level=0;
3388             }else{
3389                 s->dct_error_sum[intra][i] -= level;
3390                 level += s->dct_offset[intra][i];
3391                 if(level>0) level=0;
3392             }
3393             block[i]= level;
3394         }
3395     }
3396 }
3397
3398 static int dct_quantize_trellis_c(MpegEncContext *s,
3399                                   DCTELEM *block, int n,
3400                                   int qscale, int *overflow){
3401     const int *qmat;
3402     const uint8_t *scantable= s->intra_scantable.scantable;
3403     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3404     int max=0;
3405     unsigned int threshold1, threshold2;
3406     int bias=0;
3407     int run_tab[65];
3408     int level_tab[65];
3409     int score_tab[65];
3410     int survivor[65];
3411     int survivor_count;
3412     int last_run=0;
3413     int last_level=0;
3414     int last_score= 0;
3415     int last_i;
3416     int coeff[2][64];
3417     int coeff_count[64];
3418     int qmul, qadd, start_i, last_non_zero, i, dc;
3419     const int esc_length= s->ac_esc_length;
3420     uint8_t * length;
3421     uint8_t * last_length;
3422     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3423
3424     s->dsp.fdct (block);
3425
3426     if(s->dct_error_sum)
3427         s->denoise_dct(s, block);
3428     qmul= qscale*16;
3429     qadd= ((qscale-1)|1)*8;
3430
3431     if (s->mb_intra) {
3432         int q;
3433         if (!s->h263_aic) {
3434             if (n < 4)
3435                 q = s->y_dc_scale;
3436             else
3437                 q = s->c_dc_scale;
3438             q = q << 3;
3439         } else{
3440             /* For AIC we skip quant/dequant of INTRADC */
3441             q = 1 << 3;
3442             qadd=0;
3443         }
3444
3445         /* note: block[0] is assumed to be positive */
3446         block[0] = (block[0] + (q >> 1)) / q;
3447         start_i = 1;
3448         last_non_zero = 0;
3449         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3450         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3451             bias= 1<<(QMAT_SHIFT-1);
3452         length     = s->intra_ac_vlc_length;
3453         last_length= s->intra_ac_vlc_last_length;
3454     } else {
3455         start_i = 0;
3456         last_non_zero = -1;
3457         qmat = s->q_inter_matrix[qscale];
3458         length     = s->inter_ac_vlc_length;
3459         last_length= s->inter_ac_vlc_last_length;
3460     }
3461     last_i= start_i;
3462
3463     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3464     threshold2= (threshold1<<1);
3465
3466     for(i=63; i>=start_i; i--) {
3467         const int j = scantable[i];
3468         int level = block[j] * qmat[j];
3469
3470         if(((unsigned)(level+threshold1))>threshold2){
3471             last_non_zero = i;
3472             break;
3473         }
3474     }
3475
3476     for(i=start_i; i<=last_non_zero; i++) {
3477         const int j = scantable[i];
3478         int level = block[j] * qmat[j];
3479
3480 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3481 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3482         if(((unsigned)(level+threshold1))>threshold2){
3483             if(level>0){
3484                 level= (bias + level)>>QMAT_SHIFT;
3485                 coeff[0][i]= level;
3486                 coeff[1][i]= level-1;
3487 //                coeff[2][k]= level-2;
3488             }else{
3489                 level= (bias - level)>>QMAT_SHIFT;
3490                 coeff[0][i]= -level;
3491                 coeff[1][i]= -level+1;
3492 //                coeff[2][k]= -level+2;
3493             }
3494             coeff_count[i]= FFMIN(level, 2);
3495             assert(coeff_count[i]);
3496             max |=level;
3497         }else{
3498             coeff[0][i]= (level>>31)|1;
3499             coeff_count[i]= 1;
3500         }
3501     }
3502
3503     *overflow= s->max_qcoeff < max; //overflow might have happened
3504
3505     if(last_non_zero < start_i){
3506         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3507         return last_non_zero;
3508     }
3509
3510     score_tab[start_i]= 0;
3511     survivor[0]= start_i;
3512     survivor_count= 1;
3513
3514     for(i=start_i; i<=last_non_zero; i++){
3515         int level_index, j, zero_distortion;
3516         int dct_coeff= FFABS(block[ scantable[i] ]);
3517         int best_score=256*256*256*120;
3518
3519         if (   s->dsp.fdct == ff_fdct_ifast
3520 #ifndef FAAN_POSTSCALE
3521             || s->dsp.fdct == ff_faandct
3522 #endif
3523            )
3524             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3525         zero_distortion= dct_coeff*dct_coeff;
3526
3527         for(level_index=0; level_index < coeff_count[i]; level_index++){
3528             int distortion;
3529             int level= coeff[level_index][i];
3530             const int alevel= FFABS(level);
3531             int unquant_coeff;
3532
3533             assert(level);
3534
3535             if(s->out_format == FMT_H263){
3536                 unquant_coeff= alevel*qmul + qadd;
3537             }else{ //MPEG1
3538                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3539                 if(s->mb_intra){
3540                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3541                         unquant_coeff =   (unquant_coeff - 1) | 1;
3542                 }else{
3543                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3544                         unquant_coeff =   (unquant_coeff - 1) | 1;
3545                 }
3546                 unquant_coeff<<= 3;
3547             }
3548
3549             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3550             level+=64;
3551             if((level&(~127)) == 0){
3552                 for(j=survivor_count-1; j>=0; j--){
3553                     int run= i - survivor[j];
3554                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3555                     score += score_tab[i-run];
3556
3557                     if(score < best_score){
3558                         best_score= score;
3559                         run_tab[i+1]= run;
3560                         level_tab[i+1]= level-64;
3561                     }
3562                 }
3563
3564                 if(s->out_format == FMT_H263){
3565                     for(j=survivor_count-1; j>=0; j--){
3566                         int run= i - survivor[j];
3567                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3568                         score += score_tab[i-run];
3569                         if(score < last_score){
3570                             last_score= score;
3571                             last_run= run;
3572                             last_level= level-64;
3573                             last_i= i+1;
3574                         }
3575                     }
3576                 }
3577             }else{
3578                 distortion += esc_length*lambda;
3579                 for(j=survivor_count-1; j>=0; j--){
3580                     int run= i - survivor[j];
3581                     int score= distortion + score_tab[i-run];
3582
3583                     if(score < best_score){
3584                         best_score= score;
3585                         run_tab[i+1]= run;
3586                         level_tab[i+1]= level-64;
3587                     }
3588                 }
3589
3590                 if(s->out_format == FMT_H263){
3591                   for(j=survivor_count-1; j>=0; j--){
3592                         int run= i - survivor[j];
3593                         int score= distortion + score_tab[i-run];
3594                         if(score < last_score){
3595                             last_score= score;
3596                             last_run= run;
3597                             last_level= level-64;
3598                             last_i= i+1;
3599                         }
3600                     }
3601                 }
3602             }
3603         }
3604
3605         score_tab[i+1]= best_score;
3606
3607         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3608         if(last_non_zero <= 27){
3609             for(; survivor_count; survivor_count--){
3610                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3611                     break;
3612             }
3613         }else{
3614             for(; survivor_count; survivor_count--){
3615                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3616                     break;
3617             }
3618         }
3619
3620         survivor[ survivor_count++ ]= i+1;
3621     }
3622
3623     if(s->out_format != FMT_H263){
3624         last_score= 256*256*256*120;
3625         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3626             int score= score_tab[i];
3627             if(i) score += lambda*2; //FIXME exacter?
3628
3629             if(score < last_score){
3630                 last_score= score;
3631                 last_i= i;
3632                 last_level= level_tab[i];
3633                 last_run= run_tab[i];
3634             }
3635         }
3636     }
3637
3638     s->coded_score[n] = last_score;
3639
3640     dc= FFABS(block[0]);
3641     last_non_zero= last_i - 1;
3642     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3643
3644     if(last_non_zero < start_i)
3645         return last_non_zero;
3646
3647     if(last_non_zero == 0 && start_i == 0){
3648         int best_level= 0;
3649         int best_score= dc * dc;
3650
3651         for(i=0; i<coeff_count[0]; i++){
3652             int level= coeff[i][0];
3653             int alevel= FFABS(level);
3654             int unquant_coeff, score, distortion;
3655
3656             if(s->out_format == FMT_H263){
3657                     unquant_coeff= (alevel*qmul + qadd)>>3;
3658             }else{ //MPEG1
3659                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3660                     unquant_coeff =   (unquant_coeff - 1) | 1;
3661             }
3662             unquant_coeff = (unquant_coeff + 4) >> 3;
3663             unquant_coeff<<= 3 + 3;
3664
3665             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3666             level+=64;
3667             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3668             else                    score= distortion + esc_length*lambda;
3669
3670             if(score < best_score){
3671                 best_score= score;
3672                 best_level= level - 64;
3673             }
3674         }
3675         block[0]= best_level;
3676         s->coded_score[n] = best_score - dc*dc;
3677         if(best_level == 0) return -1;
3678         else                return last_non_zero;
3679     }
3680
3681     i= last_i;
3682     assert(last_level);
3683
3684     block[ perm_scantable[last_non_zero] ]= last_level;
3685     i -= last_run + 1;
3686
3687     for(; i>start_i; i -= run_tab[i] + 1){
3688         block[ perm_scantable[i-1] ]= level_tab[i];
3689     }
3690
3691     return last_non_zero;
3692 }
3693
3694 //#define REFINE_STATS 1
3695 static int16_t basis[64][64];
3696
3697 static void build_basis(uint8_t *perm){
3698     int i, j, x, y;
3699     emms_c();
3700     for(i=0; i<8; i++){
3701         for(j=0; j<8; j++){
3702             for(y=0; y<8; y++){
3703                 for(x=0; x<8; x++){
3704                     double s= 0.25*(1<<BASIS_SHIFT);
3705                     int index= 8*i + j;
3706                     int perm_index= perm[index];
3707                     if(i==0) s*= sqrt(0.5);
3708                     if(j==0) s*= sqrt(0.5);
3709                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3710                 }
3711             }
3712         }
3713     }
3714 }
3715
3716 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3717                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3718                         int n, int qscale){
3719     int16_t rem[64];
3720     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3721     const uint8_t *scantable= s->intra_scantable.scantable;
3722     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3723 //    unsigned int threshold1, threshold2;
3724 //    int bias=0;
3725     int run_tab[65];
3726     int prev_run=0;
3727     int prev_level=0;
3728     int qmul, qadd, start_i, last_non_zero, i, dc;
3729     uint8_t * length;
3730     uint8_t * last_length;
3731     int lambda;
3732     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3733 #ifdef REFINE_STATS
3734 static int count=0;
3735 static int after_last=0;
3736 static int to_zero=0;
3737 static int from_zero=0;
3738 static int raise=0;
3739 static int lower=0;
3740 static int messed_sign=0;
3741 #endif
3742
3743     if(basis[0][0] == 0)
3744         build_basis(s->dsp.idct_permutation);
3745
3746     qmul= qscale*2;
3747     qadd= (qscale-1)|1;
3748     if (s->mb_intra) {
3749         if (!s->h263_aic) {
3750             if (n < 4)
3751                 q = s->y_dc_scale;
3752             else
3753                 q = s->c_dc_scale;
3754         } else{
3755             /* For AIC we skip quant/dequant of INTRADC */
3756             q = 1;
3757             qadd=0;
3758         }
3759         q <<= RECON_SHIFT-3;
3760         /* note: block[0] is assumed to be positive */
3761         dc= block[0]*q;
3762 //        block[0] = (block[0] + (q >> 1)) / q;
3763         start_i = 1;
3764 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3765 //            bias= 1<<(QMAT_SHIFT-1);
3766         length     = s->intra_ac_vlc_length;
3767         last_length= s->intra_ac_vlc_last_length;
3768     } else {
3769         dc= 0;
3770         start_i = 0;
3771         length     = s->inter_ac_vlc_length;
3772         last_length= s->inter_ac_vlc_last_length;
3773     }
3774     last_non_zero = s->block_last_index[n];
3775
3776 #ifdef REFINE_STATS
3777 {START_TIMER
3778 #endif
3779     dc += (1<<(RECON_SHIFT-1));
3780     for(i=0; i<64; i++){
3781         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3782     }
3783 #ifdef REFINE_STATS
3784 STOP_TIMER("memset rem[]")}
3785 #endif
3786     sum=0;
3787     for(i=0; i<64; i++){
3788         int one= 36;
3789         int qns=4;
3790         int w;
3791
3792         w= FFABS(weight[i]) + qns*one;
3793         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3794
3795         weight[i] = w;
3796 //        w=weight[i] = (63*qns + (w/2)) / w;
3797
3798         assert(w>0);
3799         assert(w<(1<<6));
3800         sum += w*w;
3801     }
3802     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3803 #ifdef REFINE_STATS
3804 {START_TIMER
3805 #endif
3806     run=0;
3807     rle_index=0;
3808     for(i=start_i; i<=last_non_zero; i++){
3809         int j= perm_scantable[i];
3810         const int level= block[j];
3811         int coeff;
3812
3813         if(level){
3814             if(level<0) coeff= qmul*level - qadd;
3815             else        coeff= qmul*level + qadd;
3816             run_tab[rle_index++]=run;
3817             run=0;
3818
3819             s->dsp.add_8x8basis(rem, basis[j], coeff);
3820         }else{
3821             run++;
3822         }
3823     }
3824 #ifdef REFINE_STATS
3825 if(last_non_zero>0){
3826 STOP_TIMER("init rem[]")
3827 }
3828 }
3829
3830 {START_TIMER
3831 #endif
3832     for(;;){
3833         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3834         int best_coeff=0;
3835         int best_change=0;
3836         int run2, best_unquant_change=0, analyze_gradient;
3837 #ifdef REFINE_STATS
3838 {START_TIMER
3839 #endif
3840         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3841
3842         if(analyze_gradient){
3843 #ifdef REFINE_STATS
3844 {START_TIMER
3845 #endif
3846             for(i=0; i<64; i++){
3847                 int w= weight[i];
3848
3849                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3850             }
3851 #ifdef REFINE_STATS
3852 STOP_TIMER("rem*w*w")}
3853 {START_TIMER
3854 #endif
3855             s->dsp.fdct(d1);
3856 #ifdef REFINE_STATS
3857 STOP_TIMER("dct")}
3858 #endif
3859         }
3860
3861         if(start_i){
3862             const int level= block[0];
3863             int change, old_coeff;
3864
3865             assert(s->mb_intra);
3866
3867             old_coeff= q*level;
3868
3869             for(change=-1; change<=1; change+=2){
3870                 int new_level= level + change;
3871                 int score, new_coeff;
3872
3873                 new_coeff= q*new_level;
3874                 if(new_coeff >= 2048 || new_coeff < 0)
3875                     continue;
3876
3877                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3878                 if(score<best_score){
3879                     best_score= score;
3880                     best_coeff= 0;
3881                     best_change= change;
3882                     best_unquant_change= new_coeff - old_coeff;
3883                 }
3884             }
3885         }
3886
3887         run=0;
3888         rle_index=0;
3889         run2= run_tab[rle_index++];
3890         prev_level=0;
3891         prev_run=0;
3892
3893         for(i=start_i; i<64; i++){
3894             int j= perm_scantable[i];
3895             const int level= block[j];
3896             int change, old_coeff;
3897
3898             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3899                 break;
3900
3901             if(level){
3902                 if(level<0) old_coeff= qmul*level - qadd;
3903                 else        old_coeff= qmul*level + qadd;
3904                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3905             }else{
3906                 old_coeff=0;
3907                 run2--;
3908                 assert(run2>=0 || i >= last_non_zero );
3909             }
3910
3911             for(change=-1; change<=1; change+=2){
3912                 int new_level= level + change;
3913                 int score, new_coeff, unquant_change;
3914
3915                 score=0;
3916                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3917                    continue;
3918
3919                 if(new_level){
3920                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3921                     else            new_coeff= qmul*new_level + qadd;
3922                     if(new_coeff >= 2048 || new_coeff <= -2048)
3923                         continue;
3924                     //FIXME check for overflow
3925
3926                     if(level){
3927                         if(level < 63 && level > -63){
3928                             if(i < last_non_zero)
3929                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3930                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3931                             else
3932                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3933                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3934                         }
3935                     }else{
3936                         assert(FFABS(new_level)==1);
3937
3938                         if(analyze_gradient){
3939                             int g= d1[ scantable[i] ];
3940                             if(g && (g^new_level) >= 0)
3941                                 continue;
3942                         }
3943
3944                         if(i < last_non_zero){
3945                             int next_i= i + run2 + 1;
3946                             int next_level= block[ perm_scantable[next_i] ] + 64;
3947
3948                             if(next_level&(~127))
3949                                 next_level= 0;
3950
3951                             if(next_i < last_non_zero)
3952                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3953                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3954                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3955                             else
3956                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3957                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3958                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3959                         }else{
3960                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3961                             if(prev_level){
3962                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3963                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3964                             }
3965                         }
3966                     }
3967                 }else{
3968                     new_coeff=0;
3969                     assert(FFABS(level)==1);
3970
3971                     if(i < last_non_zero){
3972                         int next_i= i + run2 + 1;
3973                         int next_level= block[ perm_scantable[next_i] ] + 64;
3974
3975                         if(next_level&(~127))
3976                             next_level= 0;
3977
3978                         if(next_i < last_non_zero)
3979                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3980                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3981                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3982                         else
3983                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3984                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3985                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3986                     }else{
3987                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3988                         if(prev_level){
3989                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3990                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3991                         }
3992                     }
3993                 }
3994
3995                 score *= lambda;
3996
3997                 unquant_change= new_coeff - old_coeff;
3998                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3999
4000                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4001                 if(score<best_score){
4002                     best_score= score;
4003                     best_coeff= i;
4004                     best_change= change;
4005                     best_unquant_change= unquant_change;
4006                 }
4007             }
4008             if(level){
4009                 prev_level= level + 64;
4010                 if(prev_level&(~127))
4011                     prev_level= 0;
4012                 prev_run= run;
4013                 run=0;
4014             }else{
4015                 run++;
4016             }
4017         }
4018 #ifdef REFINE_STATS
4019 STOP_TIMER("iterative step")}
4020 #endif
4021
4022         if(best_change){
4023             int j= perm_scantable[ best_coeff ];
4024
4025             block[j] += best_change;
4026
4027             if(best_coeff > last_non_zero){
4028                 last_non_zero= best_coeff;
4029                 assert(block[j]);
4030 #ifdef REFINE_STATS
4031 after_last++;
4032 #endif
4033             }else{
4034 #ifdef REFINE_STATS
4035 if(block[j]){
4036     if(block[j] - best_change){
4037         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4038             raise++;
4039         }else{
4040             lower++;
4041         }
4042     }else{
4043         from_zero++;
4044     }
4045 }else{
4046     to_zero++;
4047 }
4048 #endif
4049                 for(; last_non_zero>=start_i; last_non_zero--){
4050                     if(block[perm_scantable[last_non_zero]])
4051                         break;
4052                 }
4053             }
4054 #ifdef REFINE_STATS
4055 count++;
4056 if(256*256*256*64 % count == 0){
4057     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4058 }
4059 #endif
4060             run=0;
4061             rle_index=0;
4062             for(i=start_i; i<=last_non_zero; i++){
4063                 int j= perm_scantable[i];
4064                 const int level= block[j];
4065
4066                  if(level){
4067                      run_tab[rle_index++]=run;
4068                      run=0;
4069                  }else{
4070                      run++;
4071                  }
4072             }
4073
4074             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4075         }else{
4076             break;
4077         }
4078     }
4079 #ifdef REFINE_STATS
4080 if(last_non_zero>0){
4081 STOP_TIMER("iterative search")
4082 }
4083 }
4084 #endif
4085
4086     return last_non_zero;
4087 }
4088
4089 int ff_dct_quantize_c(MpegEncContext *s,
4090                         DCTELEM *block, int n,
4091                         int qscale, int *overflow)
4092 {
4093     int i, j, level, last_non_zero, q, start_i;
4094     const int *qmat;
4095     const uint8_t *scantable= s->intra_scantable.scantable;
4096     int bias;
4097     int max=0;
4098     unsigned int threshold1, threshold2;
4099
4100     s->dsp.fdct (block);
4101
4102     if(s->dct_error_sum)
4103         s->denoise_dct(s, block);
4104
4105     if (s->mb_intra) {
4106         if (!s->h263_aic) {
4107             if (n < 4)
4108                 q = s->y_dc_scale;
4109             else
4110                 q = s->c_dc_scale;
4111             q = q << 3;
4112         } else
4113             /* For AIC we skip quant/dequant of INTRADC */
4114             q = 1 << 3;
4115
4116         /* note: block[0] is assumed to be positive */
4117         block[0] = (block[0] + (q >> 1)) / q;
4118         start_i = 1;
4119         last_non_zero = 0;
4120         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4121         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4122     } else {
4123         start_i = 0;
4124         last_non_zero = -1;
4125         qmat = s->q_inter_matrix[qscale];
4126         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4127     }
4128     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4129     threshold2= (threshold1<<1);
4130     for(i=63;i>=start_i;i--) {
4131         j = scantable[i];
4132         level = block[j] * qmat[j];
4133
4134         if(((unsigned)(level+threshold1))>threshold2){
4135             last_non_zero = i;
4136             break;
4137         }else{
4138             block[j]=0;
4139         }
4140     }
4141     for(i=start_i; i<=last_non_zero; i++) {
4142         j = scantable[i];
4143         level = block[j] * qmat[j];
4144
4145 //        if(   bias+level >= (1<<QMAT_SHIFT)
4146 //           || bias-level >= (1<<QMAT_SHIFT)){
4147         if(((unsigned)(level+threshold1))>threshold2){
4148             if(level>0){
4149                 level= (bias + level)>>QMAT_SHIFT;
4150                 block[j]= level;
4151             }else{
4152                 level= (bias - level)>>QMAT_SHIFT;
4153                 block[j]= -level;
4154             }
4155             max |=level;
4156         }else{
4157             block[j]=0;
4158         }
4159     }
4160     *overflow= s->max_qcoeff < max; //overflow might have happened
4161
4162     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4163     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4164         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4165
4166     return last_non_zero;
4167 }
4168
4169 #define OFFSET(x) offsetof(MpegEncContext, x)
4170 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4171 static const AVOption h263_options[] = {
4172     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4173     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4174     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, VE },
4175     FF_MPV_COMMON_OPTS
4176     { NULL },
4177 };
4178
4179 static const AVClass h263_class = {
4180     .class_name = "H.263 encoder",
4181     .item_name  = av_default_item_name,
4182     .option     = h263_options,
4183     .version    = LIBAVUTIL_VERSION_INT,
4184 };
4185
4186 AVCodec ff_h263_encoder = {
4187     .name           = "h263",
4188     .type           = AVMEDIA_TYPE_VIDEO,
4189     .id             = CODEC_ID_H263,
4190     .priv_data_size = sizeof(MpegEncContext),
4191     .init           = ff_MPV_encode_init,
4192     .encode2        = ff_MPV_encode_picture,
4193     .close          = ff_MPV_encode_end,
4194     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4195     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4196     .priv_class     = &h263_class,
4197 };
4198
4199 static const AVOption h263p_options[] = {
4200     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4201     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4202     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4203     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4204     FF_MPV_COMMON_OPTS
4205     { NULL },
4206 };
4207 static const AVClass h263p_class = {
4208     .class_name = "H.263p encoder",
4209     .item_name  = av_default_item_name,
4210     .option     = h263p_options,
4211     .version    = LIBAVUTIL_VERSION_INT,
4212 };
4213
4214 AVCodec ff_h263p_encoder = {
4215     .name           = "h263p",
4216     .type           = AVMEDIA_TYPE_VIDEO,
4217     .id             = CODEC_ID_H263P,
4218     .priv_data_size = sizeof(MpegEncContext),
4219     .init           = ff_MPV_encode_init,
4220     .encode2        = ff_MPV_encode_picture,
4221     .close          = ff_MPV_encode_end,
4222     .capabilities = CODEC_CAP_SLICE_THREADS,
4223     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4224     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4225     .priv_class     = &h263p_class,
4226 };
4227
4228 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4229
4230 AVCodec ff_msmpeg4v2_encoder = {
4231     .name           = "msmpeg4v2",
4232     .type           = AVMEDIA_TYPE_VIDEO,
4233     .id             = CODEC_ID_MSMPEG4V2,
4234     .priv_data_size = sizeof(MpegEncContext),
4235     .init           = ff_MPV_encode_init,
4236     .encode2        = ff_MPV_encode_picture,
4237     .close          = ff_MPV_encode_end,
4238     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4239     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4240     .priv_class     = &msmpeg4v2_class,
4241 };
4242
4243 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4244
4245 AVCodec ff_msmpeg4v3_encoder = {
4246     .name           = "msmpeg4",
4247     .type           = AVMEDIA_TYPE_VIDEO,
4248     .id             = CODEC_ID_MSMPEG4V3,
4249     .priv_data_size = sizeof(MpegEncContext),
4250     .init           = ff_MPV_encode_init,
4251     .encode2        = ff_MPV_encode_picture,
4252     .close          = ff_MPV_encode_end,
4253     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4254     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4255     .priv_class     = &msmpeg4v3_class,
4256 };
4257
4258 FF_MPV_GENERIC_CLASS(wmv1)
4259
4260 AVCodec ff_wmv1_encoder = {
4261     .name           = "wmv1",
4262     .type           = AVMEDIA_TYPE_VIDEO,
4263     .id             = CODEC_ID_WMV1,
4264     .priv_data_size = sizeof(MpegEncContext),
4265     .init           = ff_MPV_encode_init,
4266     .encode2        = ff_MPV_encode_picture,
4267     .close          = ff_MPV_encode_end,
4268     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4269     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4270     .priv_class     = &wmv1_class,
4271 };