]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
fbb5cf15c7da94dd7f825283c4899a82f597dfeb
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include "bytestream.h"
47 #include <limits.h>
48
49 //#undef NDEBUG
50 //#include <assert.h>
51
52 static int encode_picture(MpegEncContext *s, int picture_number);
53 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
54 static int sse_mb(MpegEncContext *s);
55 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
56 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
57
58 /* enable all paranoid tests for rounding, overflows, etc... */
59 //#define PARANOID
60
61 //#define DEBUG
62
63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
65
66 const AVOption ff_mpv_generic_options[] = {
67     FF_MPV_COMMON_OPTS
68     { NULL },
69 };
70
71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
72                        uint16_t (*qmat16)[2][64],
73                        const uint16_t *quant_matrix,
74                        int bias, int qmin, int qmax, int intra)
75 {
76     int qscale;
77     int shift = 0;
78
79     for (qscale = qmin; qscale <= qmax; qscale++) {
80         int i;
81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
82             dsp->fdct == ff_jpeg_fdct_islow_10
83 #ifdef FAAN_POSTSCALE
84             || dsp->fdct == ff_faandct
85 #endif
86             ) {
87             for (i = 0; i < 64; i++) {
88                 const int j = dsp->idct_permutation[i];
89                 /* 16 <= qscale * quant_matrix[i] <= 7905
90                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
91                  *             19952 <=              x  <= 249205026
92                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
93                  *           3444240 >= (1 << 36) / (x) >= 275 */
94
95                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
96                                         (qscale * quant_matrix[j]));
97             }
98         } else if (dsp->fdct == ff_fdct_ifast
99 #ifndef FAAN_POSTSCALE
100                    || dsp->fdct == ff_faandct
101 #endif
102                    ) {
103             for (i = 0; i < 64; i++) {
104                 const int j = dsp->idct_permutation[i];
105                 /* 16 <= qscale * quant_matrix[i] <= 7905
106                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
107                  *             19952 <=              x  <= 249205026
108                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
109                  *           3444240 >= (1 << 36) / (x) >= 275 */
110
111                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
112                                         (ff_aanscales[i] * qscale *
113                                          quant_matrix[j]));
114             }
115         } else {
116             for (i = 0; i < 64; i++) {
117                 const int j = dsp->idct_permutation[i];
118                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
119                  * Assume x = qscale * quant_matrix[i]
120                  * So             16 <=              x  <= 7905
121                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
122                  * so          32768 >= (1 << 19) / (x) >= 67 */
123                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
124                                         (qscale * quant_matrix[j]));
125                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
126                 //                    (qscale * quant_matrix[i]);
127                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
128                                        (qscale * quant_matrix[j]);
129
130                 if (qmat16[qscale][0][i] == 0 ||
131                     qmat16[qscale][0][i] == 128 * 256)
132                     qmat16[qscale][0][i] = 128 * 256 - 1;
133                 qmat16[qscale][1][i] =
134                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
135                                 qmat16[qscale][0][i]);
136             }
137         }
138
139         for (i = intra; i < 64; i++) {
140             int64_t max = 8191;
141             if (dsp->fdct == ff_fdct_ifast
142 #ifndef FAAN_POSTSCALE
143                 || dsp->fdct == ff_faandct
144 #endif
145                ) {
146                 max = (8191LL * ff_aanscales[i]) >> 14;
147             }
148             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
149                 shift++;
150             }
151         }
152     }
153     if (shift) {
154         av_log(NULL, AV_LOG_INFO,
155                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
156                QMAT_SHIFT - shift);
157     }
158 }
159
160 static inline void update_qscale(MpegEncContext *s)
161 {
162     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
163                 (FF_LAMBDA_SHIFT + 7);
164     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
165
166     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
167                  FF_LAMBDA_SHIFT;
168 }
169
170 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
171 {
172     int i;
173
174     if (matrix) {
175         put_bits(pb, 1, 1);
176         for (i = 0; i < 64; i++) {
177             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
178         }
179     } else
180         put_bits(pb, 1, 0);
181 }
182
183 /**
184  * init s->current_picture.qscale_table from s->lambda_table
185  */
186 void ff_init_qscale_tab(MpegEncContext *s)
187 {
188     int8_t * const qscale_table = s->current_picture.f.qscale_table;
189     int i;
190
191     for (i = 0; i < s->mb_num; i++) {
192         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
193         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
194         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
195                                                   s->avctx->qmax);
196     }
197 }
198
199 static void copy_picture_attributes(MpegEncContext *s,
200                                     AVFrame *dst,
201                                     AVFrame *src)
202 {
203     int i;
204
205     dst->pict_type              = src->pict_type;
206     dst->quality                = src->quality;
207     dst->coded_picture_number   = src->coded_picture_number;
208     dst->display_picture_number = src->display_picture_number;
209     //dst->reference              = src->reference;
210     dst->pts                    = src->pts;
211     dst->interlaced_frame       = src->interlaced_frame;
212     dst->top_field_first        = src->top_field_first;
213
214     if (s->avctx->me_threshold) {
215         if (!src->motion_val[0])
216             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
217         if (!src->mb_type)
218             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
219         if (!src->ref_index[0])
220             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
221         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
222             av_log(s->avctx, AV_LOG_ERROR,
223                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
224                    src->motion_subsample_log2, dst->motion_subsample_log2);
225
226         memcpy(dst->mb_type, src->mb_type,
227                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
228
229         for (i = 0; i < 2; i++) {
230             int stride = ((16 * s->mb_width ) >>
231                           src->motion_subsample_log2) + 1;
232             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
233
234             if (src->motion_val[i] &&
235                 src->motion_val[i] != dst->motion_val[i]) {
236                 memcpy(dst->motion_val[i], src->motion_val[i],
237                        2 * stride * height * sizeof(int16_t));
238             }
239             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
240                 memcpy(dst->ref_index[i], src->ref_index[i],
241                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
242             }
243         }
244     }
245 }
246
247 static void update_duplicate_context_after_me(MpegEncContext *dst,
248                                               MpegEncContext *src)
249 {
250 #define COPY(a) dst->a= src->a
251     COPY(pict_type);
252     COPY(current_picture);
253     COPY(f_code);
254     COPY(b_code);
255     COPY(qscale);
256     COPY(lambda);
257     COPY(lambda2);
258     COPY(picture_in_gop_number);
259     COPY(gop_picture_number);
260     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
261     COPY(progressive_frame);    // FIXME don't set in encode_header
262     COPY(partitioned_frame);    // FIXME don't set in encode_header
263 #undef COPY
264 }
265
266 /**
267  * Set the given MpegEncContext to defaults for encoding.
268  * the changed fields will not depend upon the prior state of the MpegEncContext.
269  */
270 static void MPV_encode_defaults(MpegEncContext *s)
271 {
272     int i;
273     ff_MPV_common_defaults(s);
274
275     for (i = -16; i < 16; i++) {
276         default_fcode_tab[i + MAX_MV] = 1;
277     }
278     s->me.mv_penalty = default_mv_penalty;
279     s->fcode_tab     = default_fcode_tab;
280 }
281
282 /* init video encoder */
283 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
284 {
285     MpegEncContext *s = avctx->priv_data;
286     int i;
287     int chroma_h_shift, chroma_v_shift;
288
289     MPV_encode_defaults(s);
290
291     switch (avctx->codec_id) {
292     case CODEC_ID_MPEG2VIDEO:
293         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
294             avctx->pix_fmt != PIX_FMT_YUV422P) {
295             av_log(avctx, AV_LOG_ERROR,
296                    "only YUV420 and YUV422 are supported\n");
297             return -1;
298         }
299         break;
300     case CODEC_ID_LJPEG:
301         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
302             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
303             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
304             avctx->pix_fmt != PIX_FMT_BGRA     &&
305             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
306               avctx->pix_fmt != PIX_FMT_YUV422P &&
307               avctx->pix_fmt != PIX_FMT_YUV444P) ||
308              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
309             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
310             return -1;
311         }
312         break;
313     case CODEC_ID_MJPEG:
314         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
315             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
316             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
317               avctx->pix_fmt != PIX_FMT_YUV422P) ||
318              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
319             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
320             return -1;
321         }
322         break;
323     default:
324         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
325             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
326             return -1;
327         }
328     }
329
330     switch (avctx->pix_fmt) {
331     case PIX_FMT_YUVJ422P:
332     case PIX_FMT_YUV422P:
333         s->chroma_format = CHROMA_422;
334         break;
335     case PIX_FMT_YUVJ420P:
336     case PIX_FMT_YUV420P:
337     default:
338         s->chroma_format = CHROMA_420;
339         break;
340     }
341
342     s->bit_rate = avctx->bit_rate;
343     s->width    = avctx->width;
344     s->height   = avctx->height;
345     if (avctx->gop_size > 600 &&
346         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
347         av_log(avctx, AV_LOG_ERROR,
348                "Warning keyframe interval too large! reducing it ...\n");
349         avctx->gop_size = 600;
350     }
351     s->gop_size     = avctx->gop_size;
352     s->avctx        = avctx;
353     s->flags        = avctx->flags;
354     s->flags2       = avctx->flags2;
355     s->max_b_frames = avctx->max_b_frames;
356     s->codec_id     = avctx->codec->id;
357 #if FF_API_MPV_GLOBAL_OPTS
358     if (avctx->luma_elim_threshold)
359         s->luma_elim_threshold   = avctx->luma_elim_threshold;
360     if (avctx->chroma_elim_threshold)
361         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
362 #endif
363     s->strict_std_compliance = avctx->strict_std_compliance;
364     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
365     s->mpeg_quant         = avctx->mpeg_quant;
366     s->rtp_mode           = !!avctx->rtp_payload_size;
367     s->intra_dc_precision = avctx->intra_dc_precision;
368     s->user_specified_pts = AV_NOPTS_VALUE;
369
370     if (s->gop_size <= 1) {
371         s->intra_only = 1;
372         s->gop_size   = 12;
373     } else {
374         s->intra_only = 0;
375     }
376
377     s->me_method = avctx->me_method;
378
379     /* Fixed QSCALE */
380     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
381
382 #if FF_API_MPV_GLOBAL_OPTS
383     if (s->flags & CODEC_FLAG_QP_RD)
384         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
385 #endif
386
387     s->adaptive_quant = (s->avctx->lumi_masking ||
388                          s->avctx->dark_masking ||
389                          s->avctx->temporal_cplx_masking ||
390                          s->avctx->spatial_cplx_masking  ||
391                          s->avctx->p_masking      ||
392                          s->avctx->border_masking ||
393                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
394                         !s->fixed_qscale;
395
396     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
397
398     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
399         av_log(avctx, AV_LOG_ERROR,
400                "a vbv buffer size is needed, "
401                "for encoding with a maximum bitrate\n");
402         return -1;
403     }
404
405     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
406         av_log(avctx, AV_LOG_INFO,
407                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
408     }
409
410     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
411         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
412         return -1;
413     }
414
415     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
416         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
417         return -1;
418     }
419
420     if (avctx->rc_max_rate &&
421         avctx->rc_max_rate == avctx->bit_rate &&
422         avctx->rc_max_rate != avctx->rc_min_rate) {
423         av_log(avctx, AV_LOG_INFO,
424                "impossible bitrate constraints, this will fail\n");
425     }
426
427     if (avctx->rc_buffer_size &&
428         avctx->bit_rate * (int64_t)avctx->time_base.num >
429             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
430         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
431         return -1;
432     }
433
434     if (!s->fixed_qscale &&
435         avctx->bit_rate * av_q2d(avctx->time_base) >
436             avctx->bit_rate_tolerance) {
437         av_log(avctx, AV_LOG_ERROR,
438                "bitrate tolerance too small for bitrate\n");
439         return -1;
440     }
441
442     if (s->avctx->rc_max_rate &&
443         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
444         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
445          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
446         90000LL * (avctx->rc_buffer_size - 1) >
447             s->avctx->rc_max_rate * 0xFFFFLL) {
448         av_log(avctx, AV_LOG_INFO,
449                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
450                "specified vbv buffer is too large for the given bitrate!\n");
451     }
452
453     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
454         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
455         s->codec_id != CODEC_ID_FLV1) {
456         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
457         return -1;
458     }
459
460     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
461         av_log(avctx, AV_LOG_ERROR,
462                "OBMC is only supported with simple mb decision\n");
463         return -1;
464     }
465
466     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
467         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
468         return -1;
469     }
470
471     if (s->max_b_frames                    &&
472         s->codec_id != CODEC_ID_MPEG4      &&
473         s->codec_id != CODEC_ID_MPEG1VIDEO &&
474         s->codec_id != CODEC_ID_MPEG2VIDEO) {
475         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
476         return -1;
477     }
478
479     if ((s->codec_id == CODEC_ID_MPEG4 ||
480          s->codec_id == CODEC_ID_H263  ||
481          s->codec_id == CODEC_ID_H263P) &&
482         (avctx->sample_aspect_ratio.num > 255 ||
483          avctx->sample_aspect_ratio.den > 255)) {
484         av_log(avctx, AV_LOG_ERROR,
485                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
486                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
487         return -1;
488     }
489
490     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
491         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
492         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
493         return -1;
494     }
495
496     // FIXME mpeg2 uses that too
497     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
498         av_log(avctx, AV_LOG_ERROR,
499                "mpeg2 style quantization not supported by codec\n");
500         return -1;
501     }
502
503 #if FF_API_MPV_GLOBAL_OPTS
504     if (s->flags & CODEC_FLAG_CBP_RD)
505         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
506 #endif
507
508     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
509         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
510         return -1;
511     }
512
513     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
514         s->avctx->mb_decision != FF_MB_DECISION_RD) {
515         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
516         return -1;
517     }
518
519     if (s->avctx->scenechange_threshold < 1000000000 &&
520         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
521         av_log(avctx, AV_LOG_ERROR,
522                "closed gop with scene change detection are not supported yet, "
523                "set threshold to 1000000000\n");
524         return -1;
525     }
526
527     if (s->flags & CODEC_FLAG_LOW_DELAY) {
528         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
529             av_log(avctx, AV_LOG_ERROR,
530                   "low delay forcing is only available for mpeg2\n");
531             return -1;
532         }
533         if (s->max_b_frames != 0) {
534             av_log(avctx, AV_LOG_ERROR,
535                    "b frames cannot be used with low delay\n");
536             return -1;
537         }
538     }
539
540     if (s->q_scale_type == 1) {
541         if (avctx->qmax > 12) {
542             av_log(avctx, AV_LOG_ERROR,
543                    "non linear quant only supports qmax <= 12 currently\n");
544             return -1;
545         }
546     }
547
548     if (s->avctx->thread_count > 1         &&
549         s->codec_id != CODEC_ID_MPEG4      &&
550         s->codec_id != CODEC_ID_MPEG1VIDEO &&
551         s->codec_id != CODEC_ID_MPEG2VIDEO &&
552         (s->codec_id != CODEC_ID_H263P)) {
553         av_log(avctx, AV_LOG_ERROR,
554                "multi threaded encoding not supported by codec\n");
555         return -1;
556     }
557
558     if (s->avctx->thread_count < 1) {
559         av_log(avctx, AV_LOG_ERROR,
560                "automatic thread number detection not supported by codec,"
561                "patch welcome\n");
562         return -1;
563     }
564
565     if (s->avctx->thread_count > 1)
566         s->rtp_mode = 1;
567
568     if (!avctx->time_base.den || !avctx->time_base.num) {
569         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
570         return -1;
571     }
572
573     i = (INT_MAX / 2 + 128) >> 8;
574     if (avctx->me_threshold >= i) {
575         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
576                i - 1);
577         return -1;
578     }
579     if (avctx->mb_threshold >= i) {
580         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
581                i - 1);
582         return -1;
583     }
584
585     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
586         av_log(avctx, AV_LOG_INFO,
587                "notice: b_frame_strategy only affects the first pass\n");
588         avctx->b_frame_strategy = 0;
589     }
590
591     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
592     if (i > 1) {
593         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
594         avctx->time_base.den /= i;
595         avctx->time_base.num /= i;
596         //return -1;
597     }
598
599     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO ||
600         s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG) {
601         // (a + x * 3 / 8) / x
602         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
603         s->inter_quant_bias = 0;
604     } else {
605         s->intra_quant_bias = 0;
606         // (a - x / 4) / x
607         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
608     }
609
610     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
611         s->intra_quant_bias = avctx->intra_quant_bias;
612     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
613         s->inter_quant_bias = avctx->inter_quant_bias;
614
615     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
616                                   &chroma_v_shift);
617
618     if (avctx->codec_id == CODEC_ID_MPEG4 &&
619         s->avctx->time_base.den > (1 << 16) - 1) {
620         av_log(avctx, AV_LOG_ERROR,
621                "timebase %d/%d not supported by MPEG 4 standard, "
622                "the maximum admitted value for the timebase denominator "
623                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
624                (1 << 16) - 1);
625         return -1;
626     }
627     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
628
629 #if FF_API_MPV_GLOBAL_OPTS
630     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
631         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
632     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
633         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
634     if (avctx->quantizer_noise_shaping)
635         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
636 #endif
637
638     switch (avctx->codec->id) {
639     case CODEC_ID_MPEG1VIDEO:
640         s->out_format = FMT_MPEG1;
641         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
642         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
643         break;
644     case CODEC_ID_MPEG2VIDEO:
645         s->out_format = FMT_MPEG1;
646         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
647         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
648         s->rtp_mode   = 1;
649         break;
650     case CODEC_ID_LJPEG:
651     case CODEC_ID_MJPEG:
652         s->out_format = FMT_MJPEG;
653         s->intra_only = 1; /* force intra only for jpeg */
654         if (avctx->codec->id == CODEC_ID_LJPEG &&
655             avctx->pix_fmt   == PIX_FMT_BGRA) {
656             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
657             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
658             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
659         } else {
660             s->mjpeg_vsample[0] = 2;
661             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
662             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
663             s->mjpeg_hsample[0] = 2;
664             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
665             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
666         }
667         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
668             ff_mjpeg_encode_init(s) < 0)
669             return -1;
670         avctx->delay = 0;
671         s->low_delay = 1;
672         break;
673     case CODEC_ID_H261:
674         if (!CONFIG_H261_ENCODER)
675             return -1;
676         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
677             av_log(avctx, AV_LOG_ERROR,
678                    "The specified picture size of %dx%d is not valid for the "
679                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
680                     s->width, s->height);
681             return -1;
682         }
683         s->out_format = FMT_H261;
684         avctx->delay  = 0;
685         s->low_delay  = 1;
686         break;
687     case CODEC_ID_H263:
688         if (!CONFIG_H263_ENCODER)
689         return -1;
690         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
691                              s->width, s->height) == 8) {
692             av_log(avctx, AV_LOG_INFO,
693                    "The specified picture size of %dx%d is not valid for "
694                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
695                    "352x288, 704x576, and 1408x1152."
696                    "Try H.263+.\n", s->width, s->height);
697             return -1;
698         }
699         s->out_format = FMT_H263;
700         avctx->delay  = 0;
701         s->low_delay  = 1;
702         break;
703     case CODEC_ID_H263P:
704         s->out_format = FMT_H263;
705         s->h263_plus  = 1;
706         /* Fx */
707         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
708         s->modified_quant  = s->h263_aic;
709         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
710         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
711
712         /* /Fx */
713         /* These are just to be sure */
714         avctx->delay = 0;
715         s->low_delay = 1;
716         break;
717     case CODEC_ID_FLV1:
718         s->out_format      = FMT_H263;
719         s->h263_flv        = 2; /* format = 1; 11-bit codes */
720         s->unrestricted_mv = 1;
721         s->rtp_mode  = 0; /* don't allow GOB */
722         avctx->delay = 0;
723         s->low_delay = 1;
724         break;
725     case CODEC_ID_RV10:
726         s->out_format = FMT_H263;
727         avctx->delay  = 0;
728         s->low_delay  = 1;
729         break;
730     case CODEC_ID_RV20:
731         s->out_format      = FMT_H263;
732         avctx->delay       = 0;
733         s->low_delay       = 1;
734         s->modified_quant  = 1;
735         s->h263_aic        = 1;
736         s->h263_plus       = 1;
737         s->loop_filter     = 1;
738         s->unrestricted_mv = 0;
739         break;
740     case CODEC_ID_MPEG4:
741         s->out_format      = FMT_H263;
742         s->h263_pred       = 1;
743         s->unrestricted_mv = 1;
744         s->low_delay       = s->max_b_frames ? 0 : 1;
745         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
746         break;
747     case CODEC_ID_MSMPEG4V2:
748         s->out_format      = FMT_H263;
749         s->h263_pred       = 1;
750         s->unrestricted_mv = 1;
751         s->msmpeg4_version = 2;
752         avctx->delay       = 0;
753         s->low_delay       = 1;
754         break;
755     case CODEC_ID_MSMPEG4V3:
756         s->out_format        = FMT_H263;
757         s->h263_pred         = 1;
758         s->unrestricted_mv   = 1;
759         s->msmpeg4_version   = 3;
760         s->flipflop_rounding = 1;
761         avctx->delay         = 0;
762         s->low_delay         = 1;
763         break;
764     case CODEC_ID_WMV1:
765         s->out_format        = FMT_H263;
766         s->h263_pred         = 1;
767         s->unrestricted_mv   = 1;
768         s->msmpeg4_version   = 4;
769         s->flipflop_rounding = 1;
770         avctx->delay         = 0;
771         s->low_delay         = 1;
772         break;
773     case CODEC_ID_WMV2:
774         s->out_format        = FMT_H263;
775         s->h263_pred         = 1;
776         s->unrestricted_mv   = 1;
777         s->msmpeg4_version   = 5;
778         s->flipflop_rounding = 1;
779         avctx->delay         = 0;
780         s->low_delay         = 1;
781         break;
782     default:
783         return -1;
784     }
785
786     avctx->has_b_frames = !s->low_delay;
787
788     s->encoding = 1;
789
790     s->progressive_frame    =
791     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
792                                                 CODEC_FLAG_INTERLACED_ME) ||
793                                 s->alternate_scan);
794
795     /* init */
796     if (ff_MPV_common_init(s) < 0)
797         return -1;
798
799     if (!s->dct_quantize)
800         s->dct_quantize = ff_dct_quantize_c;
801     if (!s->denoise_dct)
802         s->denoise_dct  = denoise_dct_c;
803     s->fast_dct_quantize = s->dct_quantize;
804     if (avctx->trellis)
805         s->dct_quantize  = dct_quantize_trellis_c;
806
807     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
808         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
809
810     s->quant_precision = 5;
811
812     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
813     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
814
815     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
816         ff_h261_encode_init(s);
817     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
818         ff_h263_encode_init(s);
819     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
820         ff_msmpeg4_encode_init(s);
821     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
822         && s->out_format == FMT_MPEG1)
823         ff_mpeg1_encode_init(s);
824
825     /* init q matrix */
826     for (i = 0; i < 64; i++) {
827         int j = s->dsp.idct_permutation[i];
828         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
829             s->mpeg_quant) {
830             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
831             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
832         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
833             s->intra_matrix[j] =
834             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
835         } else {
836             /* mpeg1/2 */
837             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
838             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
839         }
840         if (s->avctx->intra_matrix)
841             s->intra_matrix[j] = s->avctx->intra_matrix[i];
842         if (s->avctx->inter_matrix)
843             s->inter_matrix[j] = s->avctx->inter_matrix[i];
844     }
845
846     /* precompute matrix */
847     /* for mjpeg, we do include qscale in the matrix */
848     if (s->out_format != FMT_MJPEG) {
849         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
850                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
851                           31, 1);
852         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
853                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
854                           31, 0);
855     }
856
857     if (ff_rate_control_init(s) < 0)
858         return -1;
859
860     return 0;
861 }
862
863 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
864 {
865     MpegEncContext *s = avctx->priv_data;
866
867     ff_rate_control_uninit(s);
868
869     ff_MPV_common_end(s);
870     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
871         s->out_format == FMT_MJPEG)
872         ff_mjpeg_encode_close(s);
873
874     av_freep(&avctx->extradata);
875
876     return 0;
877 }
878
879 static int get_sae(uint8_t *src, int ref, int stride)
880 {
881     int x,y;
882     int acc = 0;
883
884     for (y = 0; y < 16; y++) {
885         for (x = 0; x < 16; x++) {
886             acc += FFABS(src[x + y * stride] - ref);
887         }
888     }
889
890     return acc;
891 }
892
893 static int get_intra_count(MpegEncContext *s, uint8_t *src,
894                            uint8_t *ref, int stride)
895 {
896     int x, y, w, h;
897     int acc = 0;
898
899     w = s->width  & ~15;
900     h = s->height & ~15;
901
902     for (y = 0; y < h; y += 16) {
903         for (x = 0; x < w; x += 16) {
904             int offset = x + y * stride;
905             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
906                                      16);
907             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
908             int sae  = get_sae(src + offset, mean, stride);
909
910             acc += sae + 500 < sad;
911         }
912     }
913     return acc;
914 }
915
916
917 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
918 {
919     AVFrame *pic = NULL;
920     int64_t pts;
921     int i;
922     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
923                                                  (s->low_delay ? 0 : 1);
924     int direct = 1;
925
926     if (pic_arg) {
927         pts = pic_arg->pts;
928         pic_arg->display_picture_number = s->input_picture_number++;
929
930         if (pts != AV_NOPTS_VALUE) {
931             if (s->user_specified_pts != AV_NOPTS_VALUE) {
932                 int64_t time = pts;
933                 int64_t last = s->user_specified_pts;
934
935                 if (time <= last) {
936                     av_log(s->avctx, AV_LOG_ERROR,
937                            "Error, Invalid timestamp=%"PRId64", "
938                            "last=%"PRId64"\n", pts, s->user_specified_pts);
939                     return -1;
940                 }
941
942                 if (!s->low_delay && pic_arg->display_picture_number == 1)
943                     s->dts_delta = time - last;
944             }
945             s->user_specified_pts = pts;
946         } else {
947             if (s->user_specified_pts != AV_NOPTS_VALUE) {
948                 s->user_specified_pts =
949                 pts = s->user_specified_pts + 1;
950                 av_log(s->avctx, AV_LOG_INFO,
951                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
952                        pts);
953             } else {
954                 pts = pic_arg->display_picture_number;
955             }
956         }
957     }
958
959   if (pic_arg) {
960     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
961         direct = 0;
962     if (pic_arg->linesize[0] != s->linesize)
963         direct = 0;
964     if (pic_arg->linesize[1] != s->uvlinesize)
965         direct = 0;
966     if (pic_arg->linesize[2] != s->uvlinesize)
967         direct = 0;
968
969     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
970     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
971
972     if (direct) {
973         i = ff_find_unused_picture(s, 1);
974         if (i < 0)
975             return i;
976
977         pic = (AVFrame *) &s->picture[i];
978         pic->reference = 3;
979
980         for (i = 0; i < 4; i++) {
981             pic->data[i]     = pic_arg->data[i];
982             pic->linesize[i] = pic_arg->linesize[i];
983         }
984         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
985             return -1;
986         }
987     } else {
988         i = ff_find_unused_picture(s, 0);
989         if (i < 0)
990             return i;
991
992         pic = (AVFrame *) &s->picture[i];
993         pic->reference = 3;
994
995         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
996             return -1;
997         }
998
999         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1000             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1001             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1002             // empty
1003         } else {
1004             int h_chroma_shift, v_chroma_shift;
1005             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1006                                           &v_chroma_shift);
1007
1008             for (i = 0; i < 3; i++) {
1009                 int src_stride = pic_arg->linesize[i];
1010                 int dst_stride = i ? s->uvlinesize : s->linesize;
1011                 int h_shift = i ? h_chroma_shift : 0;
1012                 int v_shift = i ? v_chroma_shift : 0;
1013                 int w = s->width  >> h_shift;
1014                 int h = s->height >> v_shift;
1015                 uint8_t *src = pic_arg->data[i];
1016                 uint8_t *dst = pic->data[i];
1017
1018                 if (!s->avctx->rc_buffer_size)
1019                     dst += INPLACE_OFFSET;
1020
1021                 if (src_stride == dst_stride)
1022                     memcpy(dst, src, src_stride * h);
1023                 else {
1024                     while (h--) {
1025                         memcpy(dst, src, w);
1026                         dst += dst_stride;
1027                         src += src_stride;
1028                     }
1029                 }
1030             }
1031         }
1032     }
1033     copy_picture_attributes(s, pic, pic_arg);
1034     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1035   }
1036
1037     /* shift buffer entries */
1038     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1039         s->input_picture[i - 1] = s->input_picture[i];
1040
1041     s->input_picture[encoding_delay] = (Picture*) pic;
1042
1043     return 0;
1044 }
1045
1046 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1047 {
1048     int x, y, plane;
1049     int score = 0;
1050     int64_t score64 = 0;
1051
1052     for (plane = 0; plane < 3; plane++) {
1053         const int stride = p->f.linesize[plane];
1054         const int bw = plane ? 1 : 2;
1055         for (y = 0; y < s->mb_height * bw; y++) {
1056             for (x = 0; x < s->mb_width * bw; x++) {
1057                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1058                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1059                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1060                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1061
1062                 switch (s->avctx->frame_skip_exp) {
1063                 case 0: score    =  FFMAX(score, v);          break;
1064                 case 1: score   += FFABS(v);                  break;
1065                 case 2: score   += v * v;                     break;
1066                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1067                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1068                 }
1069             }
1070         }
1071     }
1072
1073     if (score)
1074         score64 = score;
1075
1076     if (score64 < s->avctx->frame_skip_threshold)
1077         return 1;
1078     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1079         return 1;
1080     return 0;
1081 }
1082
1083 static int estimate_best_b_count(MpegEncContext *s)
1084 {
1085     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1086     AVCodecContext *c = avcodec_alloc_context3(NULL);
1087     AVFrame input[FF_MAX_B_FRAMES + 2];
1088     const int scale = s->avctx->brd_scale;
1089     int i, j, out_size, p_lambda, b_lambda, lambda2;
1090     int outbuf_size  = s->width * s->height; // FIXME
1091     uint8_t *outbuf  = av_malloc(outbuf_size);
1092     int64_t best_rd  = INT64_MAX;
1093     int best_b_count = -1;
1094
1095     assert(scale >= 0 && scale <= 3);
1096
1097     //emms_c();
1098     //s->next_picture_ptr->quality;
1099     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1100     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1101     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1102     if (!b_lambda) // FIXME we should do this somewhere else
1103         b_lambda = p_lambda;
1104     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1105                FF_LAMBDA_SHIFT;
1106
1107     c->width        = s->width  >> scale;
1108     c->height       = s->height >> scale;
1109     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1110                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1111     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1112     c->mb_decision  = s->avctx->mb_decision;
1113     c->me_cmp       = s->avctx->me_cmp;
1114     c->mb_cmp       = s->avctx->mb_cmp;
1115     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1116     c->pix_fmt      = PIX_FMT_YUV420P;
1117     c->time_base    = s->avctx->time_base;
1118     c->max_b_frames = s->max_b_frames;
1119
1120     if (avcodec_open2(c, codec, NULL) < 0)
1121         return -1;
1122
1123     for (i = 0; i < s->max_b_frames + 2; i++) {
1124         int ysize = c->width * c->height;
1125         int csize = (c->width / 2) * (c->height / 2);
1126         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1127                                                 s->next_picture_ptr;
1128
1129         avcodec_get_frame_defaults(&input[i]);
1130         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1131         input[i].data[1]     = input[i].data[0] + ysize;
1132         input[i].data[2]     = input[i].data[1] + csize;
1133         input[i].linesize[0] = c->width;
1134         input[i].linesize[1] =
1135         input[i].linesize[2] = c->width / 2;
1136
1137         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1138             pre_input = *pre_input_ptr;
1139
1140             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1141                 pre_input.f.data[0] += INPLACE_OFFSET;
1142                 pre_input.f.data[1] += INPLACE_OFFSET;
1143                 pre_input.f.data[2] += INPLACE_OFFSET;
1144             }
1145
1146             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1147                                  pre_input.f.data[0], pre_input.f.linesize[0],
1148                                  c->width,      c->height);
1149             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1150                                  pre_input.f.data[1], pre_input.f.linesize[1],
1151                                  c->width >> 1, c->height >> 1);
1152             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1153                                  pre_input.f.data[2], pre_input.f.linesize[2],
1154                                  c->width >> 1, c->height >> 1);
1155         }
1156     }
1157
1158     for (j = 0; j < s->max_b_frames + 1; j++) {
1159         int64_t rd = 0;
1160
1161         if (!s->input_picture[j])
1162             break;
1163
1164         c->error[0] = c->error[1] = c->error[2] = 0;
1165
1166         input[0].pict_type = AV_PICTURE_TYPE_I;
1167         input[0].quality   = 1 * FF_QP2LAMBDA;
1168         out_size           = avcodec_encode_video(c, outbuf,
1169                                                   outbuf_size, &input[0]);
1170         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1171
1172         for (i = 0; i < s->max_b_frames + 1; i++) {
1173             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1174
1175             input[i + 1].pict_type = is_p ?
1176                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1177             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1178             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1179                                             &input[i + 1]);
1180             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1181         }
1182
1183         /* get the delayed frames */
1184         while (out_size) {
1185             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1186             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1187         }
1188
1189         rd += c->error[0] + c->error[1] + c->error[2];
1190
1191         if (rd < best_rd) {
1192             best_rd = rd;
1193             best_b_count = j;
1194         }
1195     }
1196
1197     av_freep(&outbuf);
1198     avcodec_close(c);
1199     av_freep(&c);
1200
1201     for (i = 0; i < s->max_b_frames + 2; i++) {
1202         av_freep(&input[i].data[0]);
1203     }
1204
1205     return best_b_count;
1206 }
1207
1208 static int select_input_picture(MpegEncContext *s)
1209 {
1210     int i;
1211
1212     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1213         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1214     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1215
1216     /* set next picture type & ordering */
1217     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1218         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1219             s->next_picture_ptr == NULL || s->intra_only) {
1220             s->reordered_input_picture[0] = s->input_picture[0];
1221             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1222             s->reordered_input_picture[0]->f.coded_picture_number =
1223                 s->coded_picture_number++;
1224         } else {
1225             int b_frames;
1226
1227             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1228                 if (s->picture_in_gop_number < s->gop_size &&
1229                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1230                     // FIXME check that te gop check above is +-1 correct
1231                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1232                     //       s->input_picture[0]->f.data[0],
1233                     //       s->input_picture[0]->pts);
1234
1235                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1236                         for (i = 0; i < 4; i++)
1237                             s->input_picture[0]->f.data[i] = NULL;
1238                         s->input_picture[0]->f.type = 0;
1239                     } else {
1240                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1241                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1242
1243                         s->avctx->release_buffer(s->avctx,
1244                                                  (AVFrame *) s->input_picture[0]);
1245                     }
1246
1247                     emms_c();
1248                     ff_vbv_update(s, 0);
1249
1250                     goto no_output_pic;
1251                 }
1252             }
1253
1254             if (s->flags & CODEC_FLAG_PASS2) {
1255                 for (i = 0; i < s->max_b_frames + 1; i++) {
1256                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1257
1258                     if (pict_num >= s->rc_context.num_entries)
1259                         break;
1260                     if (!s->input_picture[i]) {
1261                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1262                         break;
1263                     }
1264
1265                     s->input_picture[i]->f.pict_type =
1266                         s->rc_context.entry[pict_num].new_pict_type;
1267                 }
1268             }
1269
1270             if (s->avctx->b_frame_strategy == 0) {
1271                 b_frames = s->max_b_frames;
1272                 while (b_frames && !s->input_picture[b_frames])
1273                     b_frames--;
1274             } else if (s->avctx->b_frame_strategy == 1) {
1275                 for (i = 1; i < s->max_b_frames + 1; i++) {
1276                     if (s->input_picture[i] &&
1277                         s->input_picture[i]->b_frame_score == 0) {
1278                         s->input_picture[i]->b_frame_score =
1279                             get_intra_count(s,
1280                                             s->input_picture[i    ]->f.data[0],
1281                                             s->input_picture[i - 1]->f.data[0],
1282                                             s->linesize) + 1;
1283                     }
1284                 }
1285                 for (i = 0; i < s->max_b_frames + 1; i++) {
1286                     if (s->input_picture[i] == NULL ||
1287                         s->input_picture[i]->b_frame_score - 1 >
1288                             s->mb_num / s->avctx->b_sensitivity)
1289                         break;
1290                 }
1291
1292                 b_frames = FFMAX(0, i - 1);
1293
1294                 /* reset scores */
1295                 for (i = 0; i < b_frames + 1; i++) {
1296                     s->input_picture[i]->b_frame_score = 0;
1297                 }
1298             } else if (s->avctx->b_frame_strategy == 2) {
1299                 b_frames = estimate_best_b_count(s);
1300             } else {
1301                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1302                 b_frames = 0;
1303             }
1304
1305             emms_c();
1306             //static int b_count = 0;
1307             //b_count += b_frames;
1308             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1309
1310             for (i = b_frames - 1; i >= 0; i--) {
1311                 int type = s->input_picture[i]->f.pict_type;
1312                 if (type && type != AV_PICTURE_TYPE_B)
1313                     b_frames = i;
1314             }
1315             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1316                 b_frames == s->max_b_frames) {
1317                 av_log(s->avctx, AV_LOG_ERROR,
1318                        "warning, too many b frames in a row\n");
1319             }
1320
1321             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1322                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1323                     s->gop_size > s->picture_in_gop_number) {
1324                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1325                 } else {
1326                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1327                         b_frames = 0;
1328                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1329                 }
1330             }
1331
1332             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1333                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1334                 b_frames--;
1335
1336             s->reordered_input_picture[0] = s->input_picture[b_frames];
1337             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1338                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1339             s->reordered_input_picture[0]->f.coded_picture_number =
1340                 s->coded_picture_number++;
1341             for (i = 0; i < b_frames; i++) {
1342                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1343                 s->reordered_input_picture[i + 1]->f.pict_type =
1344                     AV_PICTURE_TYPE_B;
1345                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1346                     s->coded_picture_number++;
1347             }
1348         }
1349     }
1350 no_output_pic:
1351     if (s->reordered_input_picture[0]) {
1352         s->reordered_input_picture[0]->f.reference =
1353            s->reordered_input_picture[0]->f.pict_type !=
1354                AV_PICTURE_TYPE_B ? 3 : 0;
1355
1356         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1357
1358         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1359             s->avctx->rc_buffer_size) {
1360             // input is a shared pix, so we can't modifiy it -> alloc a new
1361             // one & ensure that the shared one is reuseable
1362
1363             Picture *pic;
1364             int i = ff_find_unused_picture(s, 0);
1365             if (i < 0)
1366                 return i;
1367             pic = &s->picture[i];
1368
1369             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1370             if (ff_alloc_picture(s, pic, 0) < 0) {
1371                 return -1;
1372             }
1373
1374             /* mark us unused / free shared pic */
1375             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1376                 s->avctx->release_buffer(s->avctx,
1377                                          (AVFrame *) s->reordered_input_picture[0]);
1378             for (i = 0; i < 4; i++)
1379                 s->reordered_input_picture[0]->f.data[i] = NULL;
1380             s->reordered_input_picture[0]->f.type = 0;
1381
1382             copy_picture_attributes(s, (AVFrame *) pic,
1383                                     (AVFrame *) s->reordered_input_picture[0]);
1384
1385             s->current_picture_ptr = pic;
1386         } else {
1387             // input is not a shared pix -> reuse buffer for current_pix
1388
1389             assert(s->reordered_input_picture[0]->f.type ==
1390                        FF_BUFFER_TYPE_USER ||
1391                    s->reordered_input_picture[0]->f.type ==
1392                        FF_BUFFER_TYPE_INTERNAL);
1393
1394             s->current_picture_ptr = s->reordered_input_picture[0];
1395             for (i = 0; i < 4; i++) {
1396                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1397             }
1398         }
1399         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1400
1401         s->picture_number = s->new_picture.f.display_picture_number;
1402         //printf("dpn:%d\n", s->picture_number);
1403     } else {
1404         memset(&s->new_picture, 0, sizeof(Picture));
1405     }
1406     return 0;
1407 }
1408
1409 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1410                           const AVFrame *pic_arg, int *got_packet)
1411 {
1412     MpegEncContext *s = avctx->priv_data;
1413     int i, stuffing_count, ret;
1414     int context_count = s->slice_context_count;
1415
1416     s->picture_in_gop_number++;
1417
1418     if (load_input_picture(s, pic_arg) < 0)
1419         return -1;
1420
1421     if (select_input_picture(s) < 0) {
1422         return -1;
1423     }
1424
1425     /* output? */
1426     if (s->new_picture.f.data[0]) {
1427         if (!pkt->data &&
1428             (ret = ff_alloc_packet(pkt, s->mb_width*s->mb_height*MAX_MB_BYTES)) < 0)
1429             return ret;
1430         if (s->mb_info) {
1431             s->mb_info_ptr = av_packet_new_side_data(pkt,
1432                                  AV_PKT_DATA_H263_MB_INFO,
1433                                  s->mb_width*s->mb_height*12);
1434             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1435         }
1436
1437         for (i = 0; i < context_count; i++) {
1438             int start_y = s->thread_context[i]->start_mb_y;
1439             int   end_y = s->thread_context[i]->  end_mb_y;
1440             int h       = s->mb_height;
1441             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1442             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1443
1444             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1445         }
1446
1447         s->pict_type = s->new_picture.f.pict_type;
1448         //emms_c();
1449         //printf("qs:%f %f %d\n", s->new_picture.quality,
1450         //       s->current_picture.quality, s->qscale);
1451         ff_MPV_frame_start(s, avctx);
1452 vbv_retry:
1453         if (encode_picture(s, s->picture_number) < 0)
1454             return -1;
1455
1456         avctx->header_bits = s->header_bits;
1457         avctx->mv_bits     = s->mv_bits;
1458         avctx->misc_bits   = s->misc_bits;
1459         avctx->i_tex_bits  = s->i_tex_bits;
1460         avctx->p_tex_bits  = s->p_tex_bits;
1461         avctx->i_count     = s->i_count;
1462         // FIXME f/b_count in avctx
1463         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1464         avctx->skip_count  = s->skip_count;
1465
1466         ff_MPV_frame_end(s);
1467
1468         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1469             ff_mjpeg_encode_picture_trailer(s);
1470
1471         if (avctx->rc_buffer_size) {
1472             RateControlContext *rcc = &s->rc_context;
1473             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1474
1475             if (put_bits_count(&s->pb) > max_size &&
1476                 s->lambda < s->avctx->lmax) {
1477                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1478                                        (s->qscale + 1) / s->qscale);
1479                 if (s->adaptive_quant) {
1480                     int i;
1481                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1482                         s->lambda_table[i] =
1483                             FFMAX(s->lambda_table[i] + 1,
1484                                   s->lambda_table[i] * (s->qscale + 1) /
1485                                   s->qscale);
1486                 }
1487                 s->mb_skipped = 0;        // done in MPV_frame_start()
1488                 // done in encode_picture() so we must undo it
1489                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1490                     if (s->flipflop_rounding          ||
1491                         s->codec_id == CODEC_ID_H263P ||
1492                         s->codec_id == CODEC_ID_MPEG4)
1493                         s->no_rounding ^= 1;
1494                 }
1495                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1496                     s->time_base       = s->last_time_base;
1497                     s->last_non_b_time = s->time - s->pp_time;
1498                 }
1499                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1500                 for (i = 0; i < context_count; i++) {
1501                     PutBitContext *pb = &s->thread_context[i]->pb;
1502                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1503                 }
1504                 goto vbv_retry;
1505             }
1506
1507             assert(s->avctx->rc_max_rate);
1508         }
1509
1510         if (s->flags & CODEC_FLAG_PASS1)
1511             ff_write_pass1_stats(s);
1512
1513         for (i = 0; i < 4; i++) {
1514             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1515             avctx->error[i] += s->current_picture_ptr->f.error[i];
1516         }
1517
1518         if (s->flags & CODEC_FLAG_PASS1)
1519             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1520                    avctx->i_tex_bits + avctx->p_tex_bits ==
1521                        put_bits_count(&s->pb));
1522         flush_put_bits(&s->pb);
1523         s->frame_bits  = put_bits_count(&s->pb);
1524
1525         stuffing_count = ff_vbv_update(s, s->frame_bits);
1526         if (stuffing_count) {
1527             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1528                     stuffing_count + 50) {
1529                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1530                 return -1;
1531             }
1532
1533             switch (s->codec_id) {
1534             case CODEC_ID_MPEG1VIDEO:
1535             case CODEC_ID_MPEG2VIDEO:
1536                 while (stuffing_count--) {
1537                     put_bits(&s->pb, 8, 0);
1538                 }
1539             break;
1540             case CODEC_ID_MPEG4:
1541                 put_bits(&s->pb, 16, 0);
1542                 put_bits(&s->pb, 16, 0x1C3);
1543                 stuffing_count -= 4;
1544                 while (stuffing_count--) {
1545                     put_bits(&s->pb, 8, 0xFF);
1546                 }
1547             break;
1548             default:
1549                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1550             }
1551             flush_put_bits(&s->pb);
1552             s->frame_bits  = put_bits_count(&s->pb);
1553         }
1554
1555         /* update mpeg1/2 vbv_delay for CBR */
1556         if (s->avctx->rc_max_rate                          &&
1557             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1558             s->out_format == FMT_MPEG1                     &&
1559             90000LL * (avctx->rc_buffer_size - 1) <=
1560                 s->avctx->rc_max_rate * 0xFFFFLL) {
1561             int vbv_delay, min_delay;
1562             double inbits  = s->avctx->rc_max_rate *
1563                              av_q2d(s->avctx->time_base);
1564             int    minbits = s->frame_bits - 8 *
1565                              (s->vbv_delay_ptr - s->pb.buf - 1);
1566             double bits    = s->rc_context.buffer_index + minbits - inbits;
1567
1568             if (bits < 0)
1569                 av_log(s->avctx, AV_LOG_ERROR,
1570                        "Internal error, negative bits\n");
1571
1572             assert(s->repeat_first_field == 0);
1573
1574             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1575             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1576                         s->avctx->rc_max_rate;
1577
1578             vbv_delay = FFMAX(vbv_delay, min_delay);
1579
1580             assert(vbv_delay < 0xFFFF);
1581
1582             s->vbv_delay_ptr[0] &= 0xF8;
1583             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1584             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1585             s->vbv_delay_ptr[2] &= 0x07;
1586             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1587             avctx->vbv_delay     = vbv_delay * 300;
1588         }
1589         s->total_bits     += s->frame_bits;
1590         avctx->frame_bits  = s->frame_bits;
1591
1592         pkt->pts = s->current_picture.f.pts;
1593         if (!s->low_delay) {
1594             if (!s->current_picture.f.coded_picture_number)
1595                 pkt->dts = pkt->pts - s->dts_delta;
1596             else
1597                 pkt->dts = s->reordered_pts;
1598             s->reordered_pts = s->input_picture[0]->f.pts;
1599         } else
1600             pkt->dts = pkt->pts;
1601         if (s->current_picture.f.key_frame)
1602             pkt->flags |= AV_PKT_FLAG_KEY;
1603         if (s->mb_info)
1604             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1605     } else {
1606         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1607         s->frame_bits = 0;
1608     }
1609     assert((s->frame_bits & 7) == 0);
1610
1611     pkt->size = s->frame_bits / 8;
1612     *got_packet = !!pkt->size;
1613     return 0;
1614 }
1615
1616 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1617                                                 int n, int threshold)
1618 {
1619     static const char tab[64] = {
1620         3, 2, 2, 1, 1, 1, 1, 1,
1621         1, 1, 1, 1, 1, 1, 1, 1,
1622         1, 1, 1, 1, 1, 1, 1, 1,
1623         0, 0, 0, 0, 0, 0, 0, 0,
1624         0, 0, 0, 0, 0, 0, 0, 0,
1625         0, 0, 0, 0, 0, 0, 0, 0,
1626         0, 0, 0, 0, 0, 0, 0, 0,
1627         0, 0, 0, 0, 0, 0, 0, 0
1628     };
1629     int score = 0;
1630     int run = 0;
1631     int i;
1632     DCTELEM *block = s->block[n];
1633     const int last_index = s->block_last_index[n];
1634     int skip_dc;
1635
1636     if (threshold < 0) {
1637         skip_dc = 0;
1638         threshold = -threshold;
1639     } else
1640         skip_dc = 1;
1641
1642     /* Are all we could set to zero already zero? */
1643     if (last_index <= skip_dc - 1)
1644         return;
1645
1646     for (i = 0; i <= last_index; i++) {
1647         const int j = s->intra_scantable.permutated[i];
1648         const int level = FFABS(block[j]);
1649         if (level == 1) {
1650             if (skip_dc && i == 0)
1651                 continue;
1652             score += tab[run];
1653             run = 0;
1654         } else if (level > 1) {
1655             return;
1656         } else {
1657             run++;
1658         }
1659     }
1660     if (score >= threshold)
1661         return;
1662     for (i = skip_dc; i <= last_index; i++) {
1663         const int j = s->intra_scantable.permutated[i];
1664         block[j] = 0;
1665     }
1666     if (block[0])
1667         s->block_last_index[n] = 0;
1668     else
1669         s->block_last_index[n] = -1;
1670 }
1671
1672 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1673                                int last_index)
1674 {
1675     int i;
1676     const int maxlevel = s->max_qcoeff;
1677     const int minlevel = s->min_qcoeff;
1678     int overflow = 0;
1679
1680     if (s->mb_intra) {
1681         i = 1; // skip clipping of intra dc
1682     } else
1683         i = 0;
1684
1685     for (; i <= last_index; i++) {
1686         const int j = s->intra_scantable.permutated[i];
1687         int level = block[j];
1688
1689         if (level > maxlevel) {
1690             level = maxlevel;
1691             overflow++;
1692         } else if (level < minlevel) {
1693             level = minlevel;
1694             overflow++;
1695         }
1696
1697         block[j] = level;
1698     }
1699
1700     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1701         av_log(s->avctx, AV_LOG_INFO,
1702                "warning, clipping %d dct coefficients to %d..%d\n",
1703                overflow, minlevel, maxlevel);
1704 }
1705
1706 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1707 {
1708     int x, y;
1709     // FIXME optimize
1710     for (y = 0; y < 8; y++) {
1711         for (x = 0; x < 8; x++) {
1712             int x2, y2;
1713             int sum = 0;
1714             int sqr = 0;
1715             int count = 0;
1716
1717             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1718                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1719                     int v = ptr[x2 + y2 * stride];
1720                     sum += v;
1721                     sqr += v * v;
1722                     count++;
1723                 }
1724             }
1725             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1726         }
1727     }
1728 }
1729
1730 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1731                                                 int motion_x, int motion_y,
1732                                                 int mb_block_height,
1733                                                 int mb_block_count)
1734 {
1735     int16_t weight[8][64];
1736     DCTELEM orig[8][64];
1737     const int mb_x = s->mb_x;
1738     const int mb_y = s->mb_y;
1739     int i;
1740     int skip_dct[8];
1741     int dct_offset = s->linesize * 8; // default for progressive frames
1742     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1743     int wrap_y, wrap_c;
1744
1745     for (i = 0; i < mb_block_count; i++)
1746         skip_dct[i] = s->skipdct;
1747
1748     if (s->adaptive_quant) {
1749         const int last_qp = s->qscale;
1750         const int mb_xy = mb_x + mb_y * s->mb_stride;
1751
1752         s->lambda = s->lambda_table[mb_xy];
1753         update_qscale(s);
1754
1755         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1756             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1757             s->dquant = s->qscale - last_qp;
1758
1759             if (s->out_format == FMT_H263) {
1760                 s->dquant = av_clip(s->dquant, -2, 2);
1761
1762                 if (s->codec_id == CODEC_ID_MPEG4) {
1763                     if (!s->mb_intra) {
1764                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1765                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1766                                 s->dquant = 0;
1767                         }
1768                         if (s->mv_type == MV_TYPE_8X8)
1769                             s->dquant = 0;
1770                     }
1771                 }
1772             }
1773         }
1774         ff_set_qscale(s, last_qp + s->dquant);
1775     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1776         ff_set_qscale(s, s->qscale + s->dquant);
1777
1778     wrap_y = s->linesize;
1779     wrap_c = s->uvlinesize;
1780     ptr_y  = s->new_picture.f.data[0] +
1781              (mb_y * 16 * wrap_y)              + mb_x * 16;
1782     ptr_cb = s->new_picture.f.data[1] +
1783              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1784     ptr_cr = s->new_picture.f.data[2] +
1785              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1786
1787     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1788         uint8_t *ebuf = s->edge_emu_buffer + 32;
1789         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1790                                 mb_y * 16, s->width, s->height);
1791         ptr_y = ebuf;
1792         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1793                                 mb_block_height, mb_x * 8, mb_y * 8,
1794                                 s->width >> 1, s->height >> 1);
1795         ptr_cb = ebuf + 18 * wrap_y;
1796         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1797                                 mb_block_height, mb_x * 8, mb_y * 8,
1798                                 s->width >> 1, s->height >> 1);
1799         ptr_cr = ebuf + 18 * wrap_y + 8;
1800     }
1801
1802     if (s->mb_intra) {
1803         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1804             int progressive_score, interlaced_score;
1805
1806             s->interlaced_dct = 0;
1807             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1808                                                     NULL, wrap_y, 8) +
1809                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1810                                                     NULL, wrap_y, 8) - 400;
1811
1812             if (progressive_score > 0) {
1813                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1814                                                        NULL, wrap_y * 2, 8) +
1815                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1816                                                        NULL, wrap_y * 2, 8);
1817                 if (progressive_score > interlaced_score) {
1818                     s->interlaced_dct = 1;
1819
1820                     dct_offset = wrap_y;
1821                     wrap_y <<= 1;
1822                     if (s->chroma_format == CHROMA_422)
1823                         wrap_c <<= 1;
1824                 }
1825             }
1826         }
1827
1828         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1829         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1830         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1831         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1832
1833         if (s->flags & CODEC_FLAG_GRAY) {
1834             skip_dct[4] = 1;
1835             skip_dct[5] = 1;
1836         } else {
1837             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1838             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1839             if (!s->chroma_y_shift) { /* 422 */
1840                 s->dsp.get_pixels(s->block[6],
1841                                   ptr_cb + (dct_offset >> 1), wrap_c);
1842                 s->dsp.get_pixels(s->block[7],
1843                                   ptr_cr + (dct_offset >> 1), wrap_c);
1844             }
1845         }
1846     } else {
1847         op_pixels_func (*op_pix)[4];
1848         qpel_mc_func (*op_qpix)[16];
1849         uint8_t *dest_y, *dest_cb, *dest_cr;
1850
1851         dest_y  = s->dest[0];
1852         dest_cb = s->dest[1];
1853         dest_cr = s->dest[2];
1854
1855         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1856             op_pix  = s->dsp.put_pixels_tab;
1857             op_qpix = s->dsp.put_qpel_pixels_tab;
1858         } else {
1859             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1860             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1861         }
1862
1863         if (s->mv_dir & MV_DIR_FORWARD) {
1864             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1865                        op_pix, op_qpix);
1866             op_pix  = s->dsp.avg_pixels_tab;
1867             op_qpix = s->dsp.avg_qpel_pixels_tab;
1868         }
1869         if (s->mv_dir & MV_DIR_BACKWARD) {
1870             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1871                        op_pix, op_qpix);
1872         }
1873
1874         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1875             int progressive_score, interlaced_score;
1876
1877             s->interlaced_dct = 0;
1878             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1879                                                     ptr_y,              wrap_y,
1880                                                     8) +
1881                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1882                                                     ptr_y + wrap_y * 8, wrap_y,
1883                                                     8) - 400;
1884
1885             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1886                 progressive_score -= 400;
1887
1888             if (progressive_score > 0) {
1889                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1890                                                        ptr_y,
1891                                                        wrap_y * 2, 8) +
1892                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1893                                                        ptr_y + wrap_y,
1894                                                        wrap_y * 2, 8);
1895
1896                 if (progressive_score > interlaced_score) {
1897                     s->interlaced_dct = 1;
1898
1899                     dct_offset = wrap_y;
1900                     wrap_y <<= 1;
1901                     if (s->chroma_format == CHROMA_422)
1902                         wrap_c <<= 1;
1903                 }
1904             }
1905         }
1906
1907         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1908         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1909         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1910                            dest_y + dct_offset, wrap_y);
1911         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1912                            dest_y + dct_offset + 8, wrap_y);
1913
1914         if (s->flags & CODEC_FLAG_GRAY) {
1915             skip_dct[4] = 1;
1916             skip_dct[5] = 1;
1917         } else {
1918             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1919             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1920             if (!s->chroma_y_shift) { /* 422 */
1921                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1922                                    dest_cb + (dct_offset >> 1), wrap_c);
1923                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1924                                    dest_cr + (dct_offset >> 1), wrap_c);
1925             }
1926         }
1927         /* pre quantization */
1928         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1929                 2 * s->qscale * s->qscale) {
1930             // FIXME optimize
1931             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1932                               wrap_y, 8) < 20 * s->qscale)
1933                 skip_dct[0] = 1;
1934             if (s->dsp.sad[1](NULL, ptr_y + 8,
1935                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1936                 skip_dct[1] = 1;
1937             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1938                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1939                 skip_dct[2] = 1;
1940             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1941                               dest_y + dct_offset + 8,
1942                               wrap_y, 8) < 20 * s->qscale)
1943                 skip_dct[3] = 1;
1944             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1945                               wrap_c, 8) < 20 * s->qscale)
1946                 skip_dct[4] = 1;
1947             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1948                               wrap_c, 8) < 20 * s->qscale)
1949                 skip_dct[5] = 1;
1950             if (!s->chroma_y_shift) { /* 422 */
1951                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1952                                   dest_cb + (dct_offset >> 1),
1953                                   wrap_c, 8) < 20 * s->qscale)
1954                     skip_dct[6] = 1;
1955                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1956                                   dest_cr + (dct_offset >> 1),
1957                                   wrap_c, 8) < 20 * s->qscale)
1958                     skip_dct[7] = 1;
1959             }
1960         }
1961     }
1962
1963     if (s->quantizer_noise_shaping) {
1964         if (!skip_dct[0])
1965             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1966         if (!skip_dct[1])
1967             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1968         if (!skip_dct[2])
1969             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1970         if (!skip_dct[3])
1971             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1972         if (!skip_dct[4])
1973             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1974         if (!skip_dct[5])
1975             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1976         if (!s->chroma_y_shift) { /* 422 */
1977             if (!skip_dct[6])
1978                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1979                                   wrap_c);
1980             if (!skip_dct[7])
1981                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1982                                   wrap_c);
1983         }
1984         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1985     }
1986
1987     /* DCT & quantize */
1988     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1989     {
1990         for (i = 0; i < mb_block_count; i++) {
1991             if (!skip_dct[i]) {
1992                 int overflow;
1993                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1994                 // FIXME we could decide to change to quantizer instead of
1995                 // clipping
1996                 // JS: I don't think that would be a good idea it could lower
1997                 //     quality instead of improve it. Just INTRADC clipping
1998                 //     deserves changes in quantizer
1999                 if (overflow)
2000                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2001             } else
2002                 s->block_last_index[i] = -1;
2003         }
2004         if (s->quantizer_noise_shaping) {
2005             for (i = 0; i < mb_block_count; i++) {
2006                 if (!skip_dct[i]) {
2007                     s->block_last_index[i] =
2008                         dct_quantize_refine(s, s->block[i], weight[i],
2009                                             orig[i], i, s->qscale);
2010                 }
2011             }
2012         }
2013
2014         if (s->luma_elim_threshold && !s->mb_intra)
2015             for (i = 0; i < 4; i++)
2016                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2017         if (s->chroma_elim_threshold && !s->mb_intra)
2018             for (i = 4; i < mb_block_count; i++)
2019                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2020
2021         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2022             for (i = 0; i < mb_block_count; i++) {
2023                 if (s->block_last_index[i] == -1)
2024                     s->coded_score[i] = INT_MAX / 256;
2025             }
2026         }
2027     }
2028
2029     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2030         s->block_last_index[4] =
2031         s->block_last_index[5] = 0;
2032         s->block[4][0] =
2033         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2034     }
2035
2036     // non c quantize code returns incorrect block_last_index FIXME
2037     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2038         for (i = 0; i < mb_block_count; i++) {
2039             int j;
2040             if (s->block_last_index[i] > 0) {
2041                 for (j = 63; j > 0; j--) {
2042                     if (s->block[i][s->intra_scantable.permutated[j]])
2043                         break;
2044                 }
2045                 s->block_last_index[i] = j;
2046             }
2047         }
2048     }
2049
2050     /* huffman encode */
2051     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2052     case CODEC_ID_MPEG1VIDEO:
2053     case CODEC_ID_MPEG2VIDEO:
2054         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2055             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2056         break;
2057     case CODEC_ID_MPEG4:
2058         if (CONFIG_MPEG4_ENCODER)
2059             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2060         break;
2061     case CODEC_ID_MSMPEG4V2:
2062     case CODEC_ID_MSMPEG4V3:
2063     case CODEC_ID_WMV1:
2064         if (CONFIG_MSMPEG4_ENCODER)
2065             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2066         break;
2067     case CODEC_ID_WMV2:
2068         if (CONFIG_WMV2_ENCODER)
2069             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2070         break;
2071     case CODEC_ID_H261:
2072         if (CONFIG_H261_ENCODER)
2073             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2074         break;
2075     case CODEC_ID_H263:
2076     case CODEC_ID_H263P:
2077     case CODEC_ID_FLV1:
2078     case CODEC_ID_RV10:
2079     case CODEC_ID_RV20:
2080         if (CONFIG_H263_ENCODER)
2081             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2082         break;
2083     case CODEC_ID_MJPEG:
2084         if (CONFIG_MJPEG_ENCODER)
2085             ff_mjpeg_encode_mb(s, s->block);
2086         break;
2087     default:
2088         assert(0);
2089     }
2090 }
2091
2092 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2093 {
2094     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2095     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2096 }
2097
2098 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2099     int i;
2100
2101     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2102
2103     /* mpeg1 */
2104     d->mb_skip_run= s->mb_skip_run;
2105     for(i=0; i<3; i++)
2106         d->last_dc[i] = s->last_dc[i];
2107
2108     /* statistics */
2109     d->mv_bits= s->mv_bits;
2110     d->i_tex_bits= s->i_tex_bits;
2111     d->p_tex_bits= s->p_tex_bits;
2112     d->i_count= s->i_count;
2113     d->f_count= s->f_count;
2114     d->b_count= s->b_count;
2115     d->skip_count= s->skip_count;
2116     d->misc_bits= s->misc_bits;
2117     d->last_bits= 0;
2118
2119     d->mb_skipped= 0;
2120     d->qscale= s->qscale;
2121     d->dquant= s->dquant;
2122
2123     d->esc3_level_length= s->esc3_level_length;
2124 }
2125
2126 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2127     int i;
2128
2129     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2130     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2131
2132     /* mpeg1 */
2133     d->mb_skip_run= s->mb_skip_run;
2134     for(i=0; i<3; i++)
2135         d->last_dc[i] = s->last_dc[i];
2136
2137     /* statistics */
2138     d->mv_bits= s->mv_bits;
2139     d->i_tex_bits= s->i_tex_bits;
2140     d->p_tex_bits= s->p_tex_bits;
2141     d->i_count= s->i_count;
2142     d->f_count= s->f_count;
2143     d->b_count= s->b_count;
2144     d->skip_count= s->skip_count;
2145     d->misc_bits= s->misc_bits;
2146
2147     d->mb_intra= s->mb_intra;
2148     d->mb_skipped= s->mb_skipped;
2149     d->mv_type= s->mv_type;
2150     d->mv_dir= s->mv_dir;
2151     d->pb= s->pb;
2152     if(s->data_partitioning){
2153         d->pb2= s->pb2;
2154         d->tex_pb= s->tex_pb;
2155     }
2156     d->block= s->block;
2157     for(i=0; i<8; i++)
2158         d->block_last_index[i]= s->block_last_index[i];
2159     d->interlaced_dct= s->interlaced_dct;
2160     d->qscale= s->qscale;
2161
2162     d->esc3_level_length= s->esc3_level_length;
2163 }
2164
2165 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2166                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2167                            int *dmin, int *next_block, int motion_x, int motion_y)
2168 {
2169     int score;
2170     uint8_t *dest_backup[3];
2171
2172     copy_context_before_encode(s, backup, type);
2173
2174     s->block= s->blocks[*next_block];
2175     s->pb= pb[*next_block];
2176     if(s->data_partitioning){
2177         s->pb2   = pb2   [*next_block];
2178         s->tex_pb= tex_pb[*next_block];
2179     }
2180
2181     if(*next_block){
2182         memcpy(dest_backup, s->dest, sizeof(s->dest));
2183         s->dest[0] = s->rd_scratchpad;
2184         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2185         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2186         assert(s->linesize >= 32); //FIXME
2187     }
2188
2189     encode_mb(s, motion_x, motion_y);
2190
2191     score= put_bits_count(&s->pb);
2192     if(s->data_partitioning){
2193         score+= put_bits_count(&s->pb2);
2194         score+= put_bits_count(&s->tex_pb);
2195     }
2196
2197     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2198         ff_MPV_decode_mb(s, s->block);
2199
2200         score *= s->lambda2;
2201         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2202     }
2203
2204     if(*next_block){
2205         memcpy(s->dest, dest_backup, sizeof(s->dest));
2206     }
2207
2208     if(score<*dmin){
2209         *dmin= score;
2210         *next_block^=1;
2211
2212         copy_context_after_encode(best, s, type);
2213     }
2214 }
2215
2216 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2217     uint32_t *sq = ff_squareTbl + 256;
2218     int acc=0;
2219     int x,y;
2220
2221     if(w==16 && h==16)
2222         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2223     else if(w==8 && h==8)
2224         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2225
2226     for(y=0; y<h; y++){
2227         for(x=0; x<w; x++){
2228             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2229         }
2230     }
2231
2232     assert(acc>=0);
2233
2234     return acc;
2235 }
2236
2237 static int sse_mb(MpegEncContext *s){
2238     int w= 16;
2239     int h= 16;
2240
2241     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2242     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2243
2244     if(w==16 && h==16)
2245       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2246         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2247                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2248                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2249       }else{
2250         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2251                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2252                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2253       }
2254     else
2255         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2256                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2257                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2258 }
2259
2260 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2261     MpegEncContext *s= *(void**)arg;
2262
2263
2264     s->me.pre_pass=1;
2265     s->me.dia_size= s->avctx->pre_dia_size;
2266     s->first_slice_line=1;
2267     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2268         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2269             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2270         }
2271         s->first_slice_line=0;
2272     }
2273
2274     s->me.pre_pass=0;
2275
2276     return 0;
2277 }
2278
2279 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2280     MpegEncContext *s= *(void**)arg;
2281
2282     ff_check_alignment();
2283
2284     s->me.dia_size= s->avctx->dia_size;
2285     s->first_slice_line=1;
2286     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2287         s->mb_x=0; //for block init below
2288         ff_init_block_index(s);
2289         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2290             s->block_index[0]+=2;
2291             s->block_index[1]+=2;
2292             s->block_index[2]+=2;
2293             s->block_index[3]+=2;
2294
2295             /* compute motion vector & mb_type and store in context */
2296             if(s->pict_type==AV_PICTURE_TYPE_B)
2297                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2298             else
2299                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2300         }
2301         s->first_slice_line=0;
2302     }
2303     return 0;
2304 }
2305
2306 static int mb_var_thread(AVCodecContext *c, void *arg){
2307     MpegEncContext *s= *(void**)arg;
2308     int mb_x, mb_y;
2309
2310     ff_check_alignment();
2311
2312     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2313         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2314             int xx = mb_x * 16;
2315             int yy = mb_y * 16;
2316             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2317             int varc;
2318             int sum = s->dsp.pix_sum(pix, s->linesize);
2319
2320             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2321
2322             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2323             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2324             s->me.mb_var_sum_temp    += varc;
2325         }
2326     }
2327     return 0;
2328 }
2329
2330 static void write_slice_end(MpegEncContext *s){
2331     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2332         if(s->partitioned_frame){
2333             ff_mpeg4_merge_partitions(s);
2334         }
2335
2336         ff_mpeg4_stuffing(&s->pb);
2337     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2338         ff_mjpeg_encode_stuffing(&s->pb);
2339     }
2340
2341     avpriv_align_put_bits(&s->pb);
2342     flush_put_bits(&s->pb);
2343
2344     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2345         s->misc_bits+= get_bits_diff(s);
2346 }
2347
2348 static void write_mb_info(MpegEncContext *s)
2349 {
2350     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2351     int offset = put_bits_count(&s->pb);
2352     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2353     int gobn = s->mb_y / s->gob_index;
2354     int pred_x, pred_y;
2355     if (CONFIG_H263_ENCODER)
2356         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2357     bytestream_put_le32(&ptr, offset);
2358     bytestream_put_byte(&ptr, s->qscale);
2359     bytestream_put_byte(&ptr, gobn);
2360     bytestream_put_le16(&ptr, mba);
2361     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2362     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2363     /* 4MV not implemented */
2364     bytestream_put_byte(&ptr, 0); /* hmv2 */
2365     bytestream_put_byte(&ptr, 0); /* vmv2 */
2366 }
2367
2368 static void update_mb_info(MpegEncContext *s, int startcode)
2369 {
2370     if (!s->mb_info)
2371         return;
2372     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2373         s->mb_info_size += 12;
2374         s->prev_mb_info = s->last_mb_info;
2375     }
2376     if (startcode) {
2377         s->prev_mb_info = put_bits_count(&s->pb)/8;
2378         /* This might have incremented mb_info_size above, and we return without
2379          * actually writing any info into that slot yet. But in that case,
2380          * this will be called again at the start of the after writing the
2381          * start code, actually writing the mb info. */
2382         return;
2383     }
2384
2385     s->last_mb_info = put_bits_count(&s->pb)/8;
2386     if (!s->mb_info_size)
2387         s->mb_info_size += 12;
2388     write_mb_info(s);
2389 }
2390
2391 static int encode_thread(AVCodecContext *c, void *arg){
2392     MpegEncContext *s= *(void**)arg;
2393     int mb_x, mb_y, pdif = 0;
2394     int chr_h= 16>>s->chroma_y_shift;
2395     int i, j;
2396     MpegEncContext best_s, backup_s;
2397     uint8_t bit_buf[2][MAX_MB_BYTES];
2398     uint8_t bit_buf2[2][MAX_MB_BYTES];
2399     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2400     PutBitContext pb[2], pb2[2], tex_pb[2];
2401 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2402
2403     ff_check_alignment();
2404
2405     for(i=0; i<2; i++){
2406         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2407         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2408         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2409     }
2410
2411     s->last_bits= put_bits_count(&s->pb);
2412     s->mv_bits=0;
2413     s->misc_bits=0;
2414     s->i_tex_bits=0;
2415     s->p_tex_bits=0;
2416     s->i_count=0;
2417     s->f_count=0;
2418     s->b_count=0;
2419     s->skip_count=0;
2420
2421     for(i=0; i<3; i++){
2422         /* init last dc values */
2423         /* note: quant matrix value (8) is implied here */
2424         s->last_dc[i] = 128 << s->intra_dc_precision;
2425
2426         s->current_picture.f.error[i] = 0;
2427     }
2428     s->mb_skip_run = 0;
2429     memset(s->last_mv, 0, sizeof(s->last_mv));
2430
2431     s->last_mv_dir = 0;
2432
2433     switch(s->codec_id){
2434     case CODEC_ID_H263:
2435     case CODEC_ID_H263P:
2436     case CODEC_ID_FLV1:
2437         if (CONFIG_H263_ENCODER)
2438             s->gob_index = ff_h263_get_gob_height(s);
2439         break;
2440     case CODEC_ID_MPEG4:
2441         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2442             ff_mpeg4_init_partitions(s);
2443         break;
2444     }
2445
2446     s->resync_mb_x=0;
2447     s->resync_mb_y=0;
2448     s->first_slice_line = 1;
2449     s->ptr_lastgob = s->pb.buf;
2450     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2451 //    printf("row %d at %X\n", s->mb_y, (int)s);
2452         s->mb_x=0;
2453         s->mb_y= mb_y;
2454
2455         ff_set_qscale(s, s->qscale);
2456         ff_init_block_index(s);
2457
2458         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2459             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2460             int mb_type= s->mb_type[xy];
2461 //            int d;
2462             int dmin= INT_MAX;
2463             int dir;
2464
2465             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2466                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2467                 return -1;
2468             }
2469             if(s->data_partitioning){
2470                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2471                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2472                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2473                     return -1;
2474                 }
2475             }
2476
2477             s->mb_x = mb_x;
2478             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2479             ff_update_block_index(s);
2480
2481             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2482                 ff_h261_reorder_mb_index(s);
2483                 xy= s->mb_y*s->mb_stride + s->mb_x;
2484                 mb_type= s->mb_type[xy];
2485             }
2486
2487             /* write gob / video packet header  */
2488             if(s->rtp_mode){
2489                 int current_packet_size, is_gob_start;
2490
2491                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2492
2493                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2494
2495                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2496
2497                 switch(s->codec_id){
2498                 case CODEC_ID_H263:
2499                 case CODEC_ID_H263P:
2500                     if(!s->h263_slice_structured)
2501                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2502                     break;
2503                 case CODEC_ID_MPEG2VIDEO:
2504                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2505                 case CODEC_ID_MPEG1VIDEO:
2506                     if(s->mb_skip_run) is_gob_start=0;
2507                     break;
2508                 }
2509
2510                 if(is_gob_start){
2511                     if(s->start_mb_y != mb_y || mb_x!=0){
2512                         write_slice_end(s);
2513
2514                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2515                             ff_mpeg4_init_partitions(s);
2516                         }
2517                     }
2518
2519                     assert((put_bits_count(&s->pb)&7) == 0);
2520                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2521
2522                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2523                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2524                         int d= 100 / s->avctx->error_rate;
2525                         if(r % d == 0){
2526                             current_packet_size=0;
2527                             s->pb.buf_ptr= s->ptr_lastgob;
2528                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2529                         }
2530                     }
2531
2532                     if (s->avctx->rtp_callback){
2533                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2534                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2535                     }
2536                     update_mb_info(s, 1);
2537
2538                     switch(s->codec_id){
2539                     case CODEC_ID_MPEG4:
2540                         if (CONFIG_MPEG4_ENCODER) {
2541                             ff_mpeg4_encode_video_packet_header(s);
2542                             ff_mpeg4_clean_buffers(s);
2543                         }
2544                     break;
2545                     case CODEC_ID_MPEG1VIDEO:
2546                     case CODEC_ID_MPEG2VIDEO:
2547                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2548                             ff_mpeg1_encode_slice_header(s);
2549                             ff_mpeg1_clean_buffers(s);
2550                         }
2551                     break;
2552                     case CODEC_ID_H263:
2553                     case CODEC_ID_H263P:
2554                         if (CONFIG_H263_ENCODER)
2555                             ff_h263_encode_gob_header(s, mb_y);
2556                     break;
2557                     }
2558
2559                     if(s->flags&CODEC_FLAG_PASS1){
2560                         int bits= put_bits_count(&s->pb);
2561                         s->misc_bits+= bits - s->last_bits;
2562                         s->last_bits= bits;
2563                     }
2564
2565                     s->ptr_lastgob += current_packet_size;
2566                     s->first_slice_line=1;
2567                     s->resync_mb_x=mb_x;
2568                     s->resync_mb_y=mb_y;
2569                 }
2570             }
2571
2572             if(  (s->resync_mb_x   == s->mb_x)
2573                && s->resync_mb_y+1 == s->mb_y){
2574                 s->first_slice_line=0;
2575             }
2576
2577             s->mb_skipped=0;
2578             s->dquant=0; //only for QP_RD
2579
2580             update_mb_info(s, 0);
2581
2582             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2583                 int next_block=0;
2584                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2585
2586                 copy_context_before_encode(&backup_s, s, -1);
2587                 backup_s.pb= s->pb;
2588                 best_s.data_partitioning= s->data_partitioning;
2589                 best_s.partitioned_frame= s->partitioned_frame;
2590                 if(s->data_partitioning){
2591                     backup_s.pb2= s->pb2;
2592                     backup_s.tex_pb= s->tex_pb;
2593                 }
2594
2595                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2596                     s->mv_dir = MV_DIR_FORWARD;
2597                     s->mv_type = MV_TYPE_16X16;
2598                     s->mb_intra= 0;
2599                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2600                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2601                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2602                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2603                 }
2604                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2605                     s->mv_dir = MV_DIR_FORWARD;
2606                     s->mv_type = MV_TYPE_FIELD;
2607                     s->mb_intra= 0;
2608                     for(i=0; i<2; i++){
2609                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2610                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2611                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2612                     }
2613                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2614                                  &dmin, &next_block, 0, 0);
2615                 }
2616                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2617                     s->mv_dir = MV_DIR_FORWARD;
2618                     s->mv_type = MV_TYPE_16X16;
2619                     s->mb_intra= 0;
2620                     s->mv[0][0][0] = 0;
2621                     s->mv[0][0][1] = 0;
2622                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2623                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2624                 }
2625                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2626                     s->mv_dir = MV_DIR_FORWARD;
2627                     s->mv_type = MV_TYPE_8X8;
2628                     s->mb_intra= 0;
2629                     for(i=0; i<4; i++){
2630                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2631                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2632                     }
2633                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2634                                  &dmin, &next_block, 0, 0);
2635                 }
2636                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2637                     s->mv_dir = MV_DIR_FORWARD;
2638                     s->mv_type = MV_TYPE_16X16;
2639                     s->mb_intra= 0;
2640                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2641                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2642                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2643                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2644                 }
2645                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2646                     s->mv_dir = MV_DIR_BACKWARD;
2647                     s->mv_type = MV_TYPE_16X16;
2648                     s->mb_intra= 0;
2649                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2650                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2651                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2652                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2653                 }
2654                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2655                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2656                     s->mv_type = MV_TYPE_16X16;
2657                     s->mb_intra= 0;
2658                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2659                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2660                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2661                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2662                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2663                                  &dmin, &next_block, 0, 0);
2664                 }
2665                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2666                     s->mv_dir = MV_DIR_FORWARD;
2667                     s->mv_type = MV_TYPE_FIELD;
2668                     s->mb_intra= 0;
2669                     for(i=0; i<2; i++){
2670                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2671                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2672                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2673                     }
2674                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2675                                  &dmin, &next_block, 0, 0);
2676                 }
2677                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2678                     s->mv_dir = MV_DIR_BACKWARD;
2679                     s->mv_type = MV_TYPE_FIELD;
2680                     s->mb_intra= 0;
2681                     for(i=0; i<2; i++){
2682                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2683                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2684                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2685                     }
2686                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2687                                  &dmin, &next_block, 0, 0);
2688                 }
2689                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2690                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2691                     s->mv_type = MV_TYPE_FIELD;
2692                     s->mb_intra= 0;
2693                     for(dir=0; dir<2; dir++){
2694                         for(i=0; i<2; i++){
2695                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2696                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2697                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2698                         }
2699                     }
2700                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2701                                  &dmin, &next_block, 0, 0);
2702                 }
2703                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2704                     s->mv_dir = 0;
2705                     s->mv_type = MV_TYPE_16X16;
2706                     s->mb_intra= 1;
2707                     s->mv[0][0][0] = 0;
2708                     s->mv[0][0][1] = 0;
2709                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2710                                  &dmin, &next_block, 0, 0);
2711                     if(s->h263_pred || s->h263_aic){
2712                         if(best_s.mb_intra)
2713                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2714                         else
2715                             ff_clean_intra_table_entries(s); //old mode?
2716                     }
2717                 }
2718
2719                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2720                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2721                         const int last_qp= backup_s.qscale;
2722                         int qpi, qp, dc[6];
2723                         DCTELEM ac[6][16];
2724                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2725                         static const int dquant_tab[4]={-1,1,-2,2};
2726
2727                         assert(backup_s.dquant == 0);
2728
2729                         //FIXME intra
2730                         s->mv_dir= best_s.mv_dir;
2731                         s->mv_type = MV_TYPE_16X16;
2732                         s->mb_intra= best_s.mb_intra;
2733                         s->mv[0][0][0] = best_s.mv[0][0][0];
2734                         s->mv[0][0][1] = best_s.mv[0][0][1];
2735                         s->mv[1][0][0] = best_s.mv[1][0][0];
2736                         s->mv[1][0][1] = best_s.mv[1][0][1];
2737
2738                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2739                         for(; qpi<4; qpi++){
2740                             int dquant= dquant_tab[qpi];
2741                             qp= last_qp + dquant;
2742                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2743                                 continue;
2744                             backup_s.dquant= dquant;
2745                             if(s->mb_intra && s->dc_val[0]){
2746                                 for(i=0; i<6; i++){
2747                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2748                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2749                                 }
2750                             }
2751
2752                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2753                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2754                             if(best_s.qscale != qp){
2755                                 if(s->mb_intra && s->dc_val[0]){
2756                                     for(i=0; i<6; i++){
2757                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2758                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2759                                     }
2760                                 }
2761                             }
2762                         }
2763                     }
2764                 }
2765                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2766                     int mx= s->b_direct_mv_table[xy][0];
2767                     int my= s->b_direct_mv_table[xy][1];
2768
2769                     backup_s.dquant = 0;
2770                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2771                     s->mb_intra= 0;
2772                     ff_mpeg4_set_direct_mv(s, mx, my);
2773                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2774                                  &dmin, &next_block, mx, my);
2775                 }
2776                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2777                     backup_s.dquant = 0;
2778                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2779                     s->mb_intra= 0;
2780                     ff_mpeg4_set_direct_mv(s, 0, 0);
2781                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2782                                  &dmin, &next_block, 0, 0);
2783                 }
2784                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2785                     int coded=0;
2786                     for(i=0; i<6; i++)
2787                         coded |= s->block_last_index[i];
2788                     if(coded){
2789                         int mx,my;
2790                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2791                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2792                             mx=my=0; //FIXME find the one we actually used
2793                             ff_mpeg4_set_direct_mv(s, mx, my);
2794                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2795                             mx= s->mv[1][0][0];
2796                             my= s->mv[1][0][1];
2797                         }else{
2798                             mx= s->mv[0][0][0];
2799                             my= s->mv[0][0][1];
2800                         }
2801
2802                         s->mv_dir= best_s.mv_dir;
2803                         s->mv_type = best_s.mv_type;
2804                         s->mb_intra= 0;
2805 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2806                         s->mv[0][0][1] = best_s.mv[0][0][1];
2807                         s->mv[1][0][0] = best_s.mv[1][0][0];
2808                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2809                         backup_s.dquant= 0;
2810                         s->skipdct=1;
2811                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2812                                         &dmin, &next_block, mx, my);
2813                         s->skipdct=0;
2814                     }
2815                 }
2816
2817                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2818
2819                 copy_context_after_encode(s, &best_s, -1);
2820
2821                 pb_bits_count= put_bits_count(&s->pb);
2822                 flush_put_bits(&s->pb);
2823                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2824                 s->pb= backup_s.pb;
2825
2826                 if(s->data_partitioning){
2827                     pb2_bits_count= put_bits_count(&s->pb2);
2828                     flush_put_bits(&s->pb2);
2829                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2830                     s->pb2= backup_s.pb2;
2831
2832                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2833                     flush_put_bits(&s->tex_pb);
2834                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2835                     s->tex_pb= backup_s.tex_pb;
2836                 }
2837                 s->last_bits= put_bits_count(&s->pb);
2838
2839                 if (CONFIG_H263_ENCODER &&
2840                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2841                     ff_h263_update_motion_val(s);
2842
2843                 if(next_block==0){ //FIXME 16 vs linesize16
2844                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2845                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2846                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2847                 }
2848
2849                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2850                     ff_MPV_decode_mb(s, s->block);
2851             } else {
2852                 int motion_x = 0, motion_y = 0;
2853                 s->mv_type=MV_TYPE_16X16;
2854                 // only one MB-Type possible
2855
2856                 switch(mb_type){
2857                 case CANDIDATE_MB_TYPE_INTRA:
2858                     s->mv_dir = 0;
2859                     s->mb_intra= 1;
2860                     motion_x= s->mv[0][0][0] = 0;
2861                     motion_y= s->mv[0][0][1] = 0;
2862                     break;
2863                 case CANDIDATE_MB_TYPE_INTER:
2864                     s->mv_dir = MV_DIR_FORWARD;
2865                     s->mb_intra= 0;
2866                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2867                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2868                     break;
2869                 case CANDIDATE_MB_TYPE_INTER_I:
2870                     s->mv_dir = MV_DIR_FORWARD;
2871                     s->mv_type = MV_TYPE_FIELD;
2872                     s->mb_intra= 0;
2873                     for(i=0; i<2; i++){
2874                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2875                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2876                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2877                     }
2878                     break;
2879                 case CANDIDATE_MB_TYPE_INTER4V:
2880                     s->mv_dir = MV_DIR_FORWARD;
2881                     s->mv_type = MV_TYPE_8X8;
2882                     s->mb_intra= 0;
2883                     for(i=0; i<4; i++){
2884                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2885                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2886                     }
2887                     break;
2888                 case CANDIDATE_MB_TYPE_DIRECT:
2889                     if (CONFIG_MPEG4_ENCODER) {
2890                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2891                         s->mb_intra= 0;
2892                         motion_x=s->b_direct_mv_table[xy][0];
2893                         motion_y=s->b_direct_mv_table[xy][1];
2894                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2895                     }
2896                     break;
2897                 case CANDIDATE_MB_TYPE_DIRECT0:
2898                     if (CONFIG_MPEG4_ENCODER) {
2899                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2900                         s->mb_intra= 0;
2901                         ff_mpeg4_set_direct_mv(s, 0, 0);
2902                     }
2903                     break;
2904                 case CANDIDATE_MB_TYPE_BIDIR:
2905                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2906                     s->mb_intra= 0;
2907                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2908                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2909                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2910                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2911                     break;
2912                 case CANDIDATE_MB_TYPE_BACKWARD:
2913                     s->mv_dir = MV_DIR_BACKWARD;
2914                     s->mb_intra= 0;
2915                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2916                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2917                     break;
2918                 case CANDIDATE_MB_TYPE_FORWARD:
2919                     s->mv_dir = MV_DIR_FORWARD;
2920                     s->mb_intra= 0;
2921                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2922                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2923 //                    printf(" %d %d ", motion_x, motion_y);
2924                     break;
2925                 case CANDIDATE_MB_TYPE_FORWARD_I:
2926                     s->mv_dir = MV_DIR_FORWARD;
2927                     s->mv_type = MV_TYPE_FIELD;
2928                     s->mb_intra= 0;
2929                     for(i=0; i<2; i++){
2930                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2931                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2932                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2933                     }
2934                     break;
2935                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2936                     s->mv_dir = MV_DIR_BACKWARD;
2937                     s->mv_type = MV_TYPE_FIELD;
2938                     s->mb_intra= 0;
2939                     for(i=0; i<2; i++){
2940                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2941                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2942                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2943                     }
2944                     break;
2945                 case CANDIDATE_MB_TYPE_BIDIR_I:
2946                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2947                     s->mv_type = MV_TYPE_FIELD;
2948                     s->mb_intra= 0;
2949                     for(dir=0; dir<2; dir++){
2950                         for(i=0; i<2; i++){
2951                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2952                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2953                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2954                         }
2955                     }
2956                     break;
2957                 default:
2958                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2959                 }
2960
2961                 encode_mb(s, motion_x, motion_y);
2962
2963                 // RAL: Update last macroblock type
2964                 s->last_mv_dir = s->mv_dir;
2965
2966                 if (CONFIG_H263_ENCODER &&
2967                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2968                     ff_h263_update_motion_val(s);
2969
2970                 ff_MPV_decode_mb(s, s->block);
2971             }
2972
2973             /* clean the MV table in IPS frames for direct mode in B frames */
2974             if(s->mb_intra /* && I,P,S_TYPE */){
2975                 s->p_mv_table[xy][0]=0;
2976                 s->p_mv_table[xy][1]=0;
2977             }
2978
2979             if(s->flags&CODEC_FLAG_PSNR){
2980                 int w= 16;
2981                 int h= 16;
2982
2983                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2984                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2985
2986                 s->current_picture.f.error[0] += sse(
2987                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2988                     s->dest[0], w, h, s->linesize);
2989                 s->current_picture.f.error[1] += sse(
2990                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2991                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2992                 s->current_picture.f.error[2] += sse(
2993                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2994                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2995             }
2996             if(s->loop_filter){
2997                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2998                     ff_h263_loop_filter(s);
2999             }
3000 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
3001         }
3002     }
3003
3004     //not beautiful here but we must write it before flushing so it has to be here
3005     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3006         ff_msmpeg4_encode_ext_header(s);
3007
3008     write_slice_end(s);
3009
3010     /* Send the last GOB if RTP */
3011     if (s->avctx->rtp_callback) {
3012         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3013         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3014         /* Call the RTP callback to send the last GOB */
3015         emms_c();
3016         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3017     }
3018
3019     return 0;
3020 }
3021
3022 #define MERGE(field) dst->field += src->field; src->field=0
3023 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3024     MERGE(me.scene_change_score);
3025     MERGE(me.mc_mb_var_sum_temp);
3026     MERGE(me.mb_var_sum_temp);
3027 }
3028
3029 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3030     int i;
3031
3032     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3033     MERGE(dct_count[1]);
3034     MERGE(mv_bits);
3035     MERGE(i_tex_bits);
3036     MERGE(p_tex_bits);
3037     MERGE(i_count);
3038     MERGE(f_count);
3039     MERGE(b_count);
3040     MERGE(skip_count);
3041     MERGE(misc_bits);
3042     MERGE(error_count);
3043     MERGE(padding_bug_score);
3044     MERGE(current_picture.f.error[0]);
3045     MERGE(current_picture.f.error[1]);
3046     MERGE(current_picture.f.error[2]);
3047
3048     if(dst->avctx->noise_reduction){
3049         for(i=0; i<64; i++){
3050             MERGE(dct_error_sum[0][i]);
3051             MERGE(dct_error_sum[1][i]);
3052         }
3053     }
3054
3055     assert(put_bits_count(&src->pb) % 8 ==0);
3056     assert(put_bits_count(&dst->pb) % 8 ==0);
3057     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3058     flush_put_bits(&dst->pb);
3059 }
3060
3061 static int estimate_qp(MpegEncContext *s, int dry_run){
3062     if (s->next_lambda){
3063         s->current_picture_ptr->f.quality =
3064         s->current_picture.f.quality = s->next_lambda;
3065         if(!dry_run) s->next_lambda= 0;
3066     } else if (!s->fixed_qscale) {
3067         s->current_picture_ptr->f.quality =
3068         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3069         if (s->current_picture.f.quality < 0)
3070             return -1;
3071     }
3072
3073     if(s->adaptive_quant){
3074         switch(s->codec_id){
3075         case CODEC_ID_MPEG4:
3076             if (CONFIG_MPEG4_ENCODER)
3077                 ff_clean_mpeg4_qscales(s);
3078             break;
3079         case CODEC_ID_H263:
3080         case CODEC_ID_H263P:
3081         case CODEC_ID_FLV1:
3082             if (CONFIG_H263_ENCODER)
3083                 ff_clean_h263_qscales(s);
3084             break;
3085         default:
3086             ff_init_qscale_tab(s);
3087         }
3088
3089         s->lambda= s->lambda_table[0];
3090         //FIXME broken
3091     }else
3092         s->lambda = s->current_picture.f.quality;
3093 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3094     update_qscale(s);
3095     return 0;
3096 }
3097
3098 /* must be called before writing the header */
3099 static void set_frame_distances(MpegEncContext * s){
3100     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3101     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3102
3103     if(s->pict_type==AV_PICTURE_TYPE_B){
3104         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3105         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3106     }else{
3107         s->pp_time= s->time - s->last_non_b_time;
3108         s->last_non_b_time= s->time;
3109         assert(s->picture_number==0 || s->pp_time > 0);
3110     }
3111 }
3112
3113 static int encode_picture(MpegEncContext *s, int picture_number)
3114 {
3115     int i;
3116     int bits;
3117     int context_count = s->slice_context_count;
3118
3119     s->picture_number = picture_number;
3120
3121     /* Reset the average MB variance */
3122     s->me.mb_var_sum_temp    =
3123     s->me.mc_mb_var_sum_temp = 0;
3124
3125     /* we need to initialize some time vars before we can encode b-frames */
3126     // RAL: Condition added for MPEG1VIDEO
3127     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3128         set_frame_distances(s);
3129     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3130         ff_set_mpeg4_time(s);
3131
3132     s->me.scene_change_score=0;
3133
3134 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3135
3136     if(s->pict_type==AV_PICTURE_TYPE_I){
3137         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3138         else                        s->no_rounding=0;
3139     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3140         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3141             s->no_rounding ^= 1;
3142     }
3143
3144     if(s->flags & CODEC_FLAG_PASS2){
3145         if (estimate_qp(s,1) < 0)
3146             return -1;
3147         ff_get_2pass_fcode(s);
3148     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3149         if(s->pict_type==AV_PICTURE_TYPE_B)
3150             s->lambda= s->last_lambda_for[s->pict_type];
3151         else
3152             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3153         update_qscale(s);
3154     }
3155
3156     s->mb_intra=0; //for the rate distortion & bit compare functions
3157     for(i=1; i<context_count; i++){
3158         ff_update_duplicate_context(s->thread_context[i], s);
3159     }
3160
3161     if(ff_init_me(s)<0)
3162         return -1;
3163
3164     /* Estimate motion for every MB */
3165     if(s->pict_type != AV_PICTURE_TYPE_I){
3166         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3167         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3168         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3169             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3170                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3171             }
3172         }
3173
3174         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3175     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3176         /* I-Frame */
3177         for(i=0; i<s->mb_stride*s->mb_height; i++)
3178             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3179
3180         if(!s->fixed_qscale){
3181             /* finding spatial complexity for I-frame rate control */
3182             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3183         }
3184     }
3185     for(i=1; i<context_count; i++){
3186         merge_context_after_me(s, s->thread_context[i]);
3187     }
3188     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3189     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3190     emms_c();
3191
3192     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3193         s->pict_type= AV_PICTURE_TYPE_I;
3194         for(i=0; i<s->mb_stride*s->mb_height; i++)
3195             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3196 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3197     }
3198
3199     if(!s->umvplus){
3200         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3201             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3202
3203             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3204                 int a,b;
3205                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3206                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3207                 s->f_code= FFMAX3(s->f_code, a, b);
3208             }
3209
3210             ff_fix_long_p_mvs(s);
3211             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3212             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3213                 int j;
3214                 for(i=0; i<2; i++){
3215                     for(j=0; j<2; j++)
3216                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3217                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3218                 }
3219             }
3220         }
3221
3222         if(s->pict_type==AV_PICTURE_TYPE_B){
3223             int a, b;
3224
3225             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3226             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3227             s->f_code = FFMAX(a, b);
3228
3229             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3230             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3231             s->b_code = FFMAX(a, b);
3232
3233             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3234             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3235             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3236             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3237             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3238                 int dir, j;
3239                 for(dir=0; dir<2; dir++){
3240                     for(i=0; i<2; i++){
3241                         for(j=0; j<2; j++){
3242                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3243                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3244                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3245                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3246                         }
3247                     }
3248                 }
3249             }
3250         }
3251     }
3252
3253     if (estimate_qp(s, 0) < 0)
3254         return -1;
3255
3256     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3257         s->qscale= 3; //reduce clipping problems
3258
3259     if (s->out_format == FMT_MJPEG) {
3260         /* for mjpeg, we do include qscale in the matrix */
3261         for(i=1;i<64;i++){
3262             int j= s->dsp.idct_permutation[i];
3263
3264             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3265         }
3266         s->y_dc_scale_table=
3267         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3268         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3269         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3270                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3271         s->qscale= 8;
3272     }
3273
3274     //FIXME var duplication
3275     s->current_picture_ptr->f.key_frame =
3276     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3277     s->current_picture_ptr->f.pict_type =
3278     s->current_picture.f.pict_type = s->pict_type;
3279
3280     if (s->current_picture.f.key_frame)
3281         s->picture_in_gop_number=0;
3282
3283     s->last_bits= put_bits_count(&s->pb);
3284     switch(s->out_format) {
3285     case FMT_MJPEG:
3286         if (CONFIG_MJPEG_ENCODER)
3287             ff_mjpeg_encode_picture_header(s);
3288         break;
3289     case FMT_H261:
3290         if (CONFIG_H261_ENCODER)
3291             ff_h261_encode_picture_header(s, picture_number);
3292         break;
3293     case FMT_H263:
3294         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3295             ff_wmv2_encode_picture_header(s, picture_number);
3296         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3297             ff_msmpeg4_encode_picture_header(s, picture_number);
3298         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3299             ff_mpeg4_encode_picture_header(s, picture_number);
3300         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3301             ff_rv10_encode_picture_header(s, picture_number);
3302         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3303             ff_rv20_encode_picture_header(s, picture_number);
3304         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3305             ff_flv_encode_picture_header(s, picture_number);
3306         else if (CONFIG_H263_ENCODER)
3307             ff_h263_encode_picture_header(s, picture_number);
3308         break;
3309     case FMT_MPEG1:
3310         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3311             ff_mpeg1_encode_picture_header(s, picture_number);
3312         break;
3313     case FMT_H264:
3314         break;
3315     default:
3316         assert(0);
3317     }
3318     bits= put_bits_count(&s->pb);
3319     s->header_bits= bits - s->last_bits;
3320
3321     for(i=1; i<context_count; i++){
3322         update_duplicate_context_after_me(s->thread_context[i], s);
3323     }
3324     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3325     for(i=1; i<context_count; i++){
3326         merge_context_after_encode(s, s->thread_context[i]);
3327     }
3328     emms_c();
3329     return 0;
3330 }
3331
3332 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3333     const int intra= s->mb_intra;
3334     int i;
3335
3336     s->dct_count[intra]++;
3337
3338     for(i=0; i<64; i++){
3339         int level= block[i];
3340
3341         if(level){
3342             if(level>0){
3343                 s->dct_error_sum[intra][i] += level;
3344                 level -= s->dct_offset[intra][i];
3345                 if(level<0) level=0;
3346             }else{
3347                 s->dct_error_sum[intra][i] -= level;
3348                 level += s->dct_offset[intra][i];
3349                 if(level>0) level=0;
3350             }
3351             block[i]= level;
3352         }
3353     }
3354 }
3355
3356 static int dct_quantize_trellis_c(MpegEncContext *s,
3357                                   DCTELEM *block, int n,
3358                                   int qscale, int *overflow){
3359     const int *qmat;
3360     const uint8_t *scantable= s->intra_scantable.scantable;
3361     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3362     int max=0;
3363     unsigned int threshold1, threshold2;
3364     int bias=0;
3365     int run_tab[65];
3366     int level_tab[65];
3367     int score_tab[65];
3368     int survivor[65];
3369     int survivor_count;
3370     int last_run=0;
3371     int last_level=0;
3372     int last_score= 0;
3373     int last_i;
3374     int coeff[2][64];
3375     int coeff_count[64];
3376     int qmul, qadd, start_i, last_non_zero, i, dc;
3377     const int esc_length= s->ac_esc_length;
3378     uint8_t * length;
3379     uint8_t * last_length;
3380     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3381
3382     s->dsp.fdct (block);
3383
3384     if(s->dct_error_sum)
3385         s->denoise_dct(s, block);
3386     qmul= qscale*16;
3387     qadd= ((qscale-1)|1)*8;
3388
3389     if (s->mb_intra) {
3390         int q;
3391         if (!s->h263_aic) {
3392             if (n < 4)
3393                 q = s->y_dc_scale;
3394             else
3395                 q = s->c_dc_scale;
3396             q = q << 3;
3397         } else{
3398             /* For AIC we skip quant/dequant of INTRADC */
3399             q = 1 << 3;
3400             qadd=0;
3401         }
3402
3403         /* note: block[0] is assumed to be positive */
3404         block[0] = (block[0] + (q >> 1)) / q;
3405         start_i = 1;
3406         last_non_zero = 0;
3407         qmat = s->q_intra_matrix[qscale];
3408         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3409             bias= 1<<(QMAT_SHIFT-1);
3410         length     = s->intra_ac_vlc_length;
3411         last_length= s->intra_ac_vlc_last_length;
3412     } else {
3413         start_i = 0;
3414         last_non_zero = -1;
3415         qmat = s->q_inter_matrix[qscale];
3416         length     = s->inter_ac_vlc_length;
3417         last_length= s->inter_ac_vlc_last_length;
3418     }
3419     last_i= start_i;
3420
3421     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3422     threshold2= (threshold1<<1);
3423
3424     for(i=63; i>=start_i; i--) {
3425         const int j = scantable[i];
3426         int level = block[j] * qmat[j];
3427
3428         if(((unsigned)(level+threshold1))>threshold2){
3429             last_non_zero = i;
3430             break;
3431         }
3432     }
3433
3434     for(i=start_i; i<=last_non_zero; i++) {
3435         const int j = scantable[i];
3436         int level = block[j] * qmat[j];
3437
3438 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3439 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3440         if(((unsigned)(level+threshold1))>threshold2){
3441             if(level>0){
3442                 level= (bias + level)>>QMAT_SHIFT;
3443                 coeff[0][i]= level;
3444                 coeff[1][i]= level-1;
3445 //                coeff[2][k]= level-2;
3446             }else{
3447                 level= (bias - level)>>QMAT_SHIFT;
3448                 coeff[0][i]= -level;
3449                 coeff[1][i]= -level+1;
3450 //                coeff[2][k]= -level+2;
3451             }
3452             coeff_count[i]= FFMIN(level, 2);
3453             assert(coeff_count[i]);
3454             max |=level;
3455         }else{
3456             coeff[0][i]= (level>>31)|1;
3457             coeff_count[i]= 1;
3458         }
3459     }
3460
3461     *overflow= s->max_qcoeff < max; //overflow might have happened
3462
3463     if(last_non_zero < start_i){
3464         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3465         return last_non_zero;
3466     }
3467
3468     score_tab[start_i]= 0;
3469     survivor[0]= start_i;
3470     survivor_count= 1;
3471
3472     for(i=start_i; i<=last_non_zero; i++){
3473         int level_index, j, zero_distortion;
3474         int dct_coeff= FFABS(block[ scantable[i] ]);
3475         int best_score=256*256*256*120;
3476
3477         if (   s->dsp.fdct == ff_fdct_ifast
3478 #ifndef FAAN_POSTSCALE
3479             || s->dsp.fdct == ff_faandct
3480 #endif
3481            )
3482             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3483         zero_distortion= dct_coeff*dct_coeff;
3484
3485         for(level_index=0; level_index < coeff_count[i]; level_index++){
3486             int distortion;
3487             int level= coeff[level_index][i];
3488             const int alevel= FFABS(level);
3489             int unquant_coeff;
3490
3491             assert(level);
3492
3493             if(s->out_format == FMT_H263){
3494                 unquant_coeff= alevel*qmul + qadd;
3495             }else{ //MPEG1
3496                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3497                 if(s->mb_intra){
3498                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3499                         unquant_coeff =   (unquant_coeff - 1) | 1;
3500                 }else{
3501                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3502                         unquant_coeff =   (unquant_coeff - 1) | 1;
3503                 }
3504                 unquant_coeff<<= 3;
3505             }
3506
3507             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3508             level+=64;
3509             if((level&(~127)) == 0){
3510                 for(j=survivor_count-1; j>=0; j--){
3511                     int run= i - survivor[j];
3512                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3513                     score += score_tab[i-run];
3514
3515                     if(score < best_score){
3516                         best_score= score;
3517                         run_tab[i+1]= run;
3518                         level_tab[i+1]= level-64;
3519                     }
3520                 }
3521
3522                 if(s->out_format == FMT_H263){
3523                     for(j=survivor_count-1; j>=0; j--){
3524                         int run= i - survivor[j];
3525                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3526                         score += score_tab[i-run];
3527                         if(score < last_score){
3528                             last_score= score;
3529                             last_run= run;
3530                             last_level= level-64;
3531                             last_i= i+1;
3532                         }
3533                     }
3534                 }
3535             }else{
3536                 distortion += esc_length*lambda;
3537                 for(j=survivor_count-1; j>=0; j--){
3538                     int run= i - survivor[j];
3539                     int score= distortion + score_tab[i-run];
3540
3541                     if(score < best_score){
3542                         best_score= score;
3543                         run_tab[i+1]= run;
3544                         level_tab[i+1]= level-64;
3545                     }
3546                 }
3547
3548                 if(s->out_format == FMT_H263){
3549                   for(j=survivor_count-1; j>=0; j--){
3550                         int run= i - survivor[j];
3551                         int score= distortion + score_tab[i-run];
3552                         if(score < last_score){
3553                             last_score= score;
3554                             last_run= run;
3555                             last_level= level-64;
3556                             last_i= i+1;
3557                         }
3558                     }
3559                 }
3560             }
3561         }
3562
3563         score_tab[i+1]= best_score;
3564
3565         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3566         if(last_non_zero <= 27){
3567             for(; survivor_count; survivor_count--){
3568                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3569                     break;
3570             }
3571         }else{
3572             for(; survivor_count; survivor_count--){
3573                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3574                     break;
3575             }
3576         }
3577
3578         survivor[ survivor_count++ ]= i+1;
3579     }
3580
3581     if(s->out_format != FMT_H263){
3582         last_score= 256*256*256*120;
3583         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3584             int score= score_tab[i];
3585             if(i) score += lambda*2; //FIXME exacter?
3586
3587             if(score < last_score){
3588                 last_score= score;
3589                 last_i= i;
3590                 last_level= level_tab[i];
3591                 last_run= run_tab[i];
3592             }
3593         }
3594     }
3595
3596     s->coded_score[n] = last_score;
3597
3598     dc= FFABS(block[0]);
3599     last_non_zero= last_i - 1;
3600     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3601
3602     if(last_non_zero < start_i)
3603         return last_non_zero;
3604
3605     if(last_non_zero == 0 && start_i == 0){
3606         int best_level= 0;
3607         int best_score= dc * dc;
3608
3609         for(i=0; i<coeff_count[0]; i++){
3610             int level= coeff[i][0];
3611             int alevel= FFABS(level);
3612             int unquant_coeff, score, distortion;
3613
3614             if(s->out_format == FMT_H263){
3615                     unquant_coeff= (alevel*qmul + qadd)>>3;
3616             }else{ //MPEG1
3617                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3618                     unquant_coeff =   (unquant_coeff - 1) | 1;
3619             }
3620             unquant_coeff = (unquant_coeff + 4) >> 3;
3621             unquant_coeff<<= 3 + 3;
3622
3623             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3624             level+=64;
3625             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3626             else                    score= distortion + esc_length*lambda;
3627
3628             if(score < best_score){
3629                 best_score= score;
3630                 best_level= level - 64;
3631             }
3632         }
3633         block[0]= best_level;
3634         s->coded_score[n] = best_score - dc*dc;
3635         if(best_level == 0) return -1;
3636         else                return last_non_zero;
3637     }
3638
3639     i= last_i;
3640     assert(last_level);
3641
3642     block[ perm_scantable[last_non_zero] ]= last_level;
3643     i -= last_run + 1;
3644
3645     for(; i>start_i; i -= run_tab[i] + 1){
3646         block[ perm_scantable[i-1] ]= level_tab[i];
3647     }
3648
3649     return last_non_zero;
3650 }
3651
3652 //#define REFINE_STATS 1
3653 static int16_t basis[64][64];
3654
3655 static void build_basis(uint8_t *perm){
3656     int i, j, x, y;
3657     emms_c();
3658     for(i=0; i<8; i++){
3659         for(j=0; j<8; j++){
3660             for(y=0; y<8; y++){
3661                 for(x=0; x<8; x++){
3662                     double s= 0.25*(1<<BASIS_SHIFT);
3663                     int index= 8*i + j;
3664                     int perm_index= perm[index];
3665                     if(i==0) s*= sqrt(0.5);
3666                     if(j==0) s*= sqrt(0.5);
3667                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3668                 }
3669             }
3670         }
3671     }
3672 }
3673
3674 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3675                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3676                         int n, int qscale){
3677     int16_t rem[64];
3678     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3679     const uint8_t *scantable= s->intra_scantable.scantable;
3680     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3681 //    unsigned int threshold1, threshold2;
3682 //    int bias=0;
3683     int run_tab[65];
3684     int prev_run=0;
3685     int prev_level=0;
3686     int qmul, qadd, start_i, last_non_zero, i, dc;
3687     uint8_t * length;
3688     uint8_t * last_length;
3689     int lambda;
3690     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3691 #ifdef REFINE_STATS
3692 static int count=0;
3693 static int after_last=0;
3694 static int to_zero=0;
3695 static int from_zero=0;
3696 static int raise=0;
3697 static int lower=0;
3698 static int messed_sign=0;
3699 #endif
3700
3701     if(basis[0][0] == 0)
3702         build_basis(s->dsp.idct_permutation);
3703
3704     qmul= qscale*2;
3705     qadd= (qscale-1)|1;
3706     if (s->mb_intra) {
3707         if (!s->h263_aic) {
3708             if (n < 4)
3709                 q = s->y_dc_scale;
3710             else
3711                 q = s->c_dc_scale;
3712         } else{
3713             /* For AIC we skip quant/dequant of INTRADC */
3714             q = 1;
3715             qadd=0;
3716         }
3717         q <<= RECON_SHIFT-3;
3718         /* note: block[0] is assumed to be positive */
3719         dc= block[0]*q;
3720 //        block[0] = (block[0] + (q >> 1)) / q;
3721         start_i = 1;
3722 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3723 //            bias= 1<<(QMAT_SHIFT-1);
3724         length     = s->intra_ac_vlc_length;
3725         last_length= s->intra_ac_vlc_last_length;
3726     } else {
3727         dc= 0;
3728         start_i = 0;
3729         length     = s->inter_ac_vlc_length;
3730         last_length= s->inter_ac_vlc_last_length;
3731     }
3732     last_non_zero = s->block_last_index[n];
3733
3734 #ifdef REFINE_STATS
3735 {START_TIMER
3736 #endif
3737     dc += (1<<(RECON_SHIFT-1));
3738     for(i=0; i<64; i++){
3739         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3740     }
3741 #ifdef REFINE_STATS
3742 STOP_TIMER("memset rem[]")}
3743 #endif
3744     sum=0;
3745     for(i=0; i<64; i++){
3746         int one= 36;
3747         int qns=4;
3748         int w;
3749
3750         w= FFABS(weight[i]) + qns*one;
3751         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3752
3753         weight[i] = w;
3754 //        w=weight[i] = (63*qns + (w/2)) / w;
3755
3756         assert(w>0);
3757         assert(w<(1<<6));
3758         sum += w*w;
3759     }
3760     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3761 #ifdef REFINE_STATS
3762 {START_TIMER
3763 #endif
3764     run=0;
3765     rle_index=0;
3766     for(i=start_i; i<=last_non_zero; i++){
3767         int j= perm_scantable[i];
3768         const int level= block[j];
3769         int coeff;
3770
3771         if(level){
3772             if(level<0) coeff= qmul*level - qadd;
3773             else        coeff= qmul*level + qadd;
3774             run_tab[rle_index++]=run;
3775             run=0;
3776
3777             s->dsp.add_8x8basis(rem, basis[j], coeff);
3778         }else{
3779             run++;
3780         }
3781     }
3782 #ifdef REFINE_STATS
3783 if(last_non_zero>0){
3784 STOP_TIMER("init rem[]")
3785 }
3786 }
3787
3788 {START_TIMER
3789 #endif
3790     for(;;){
3791         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3792         int best_coeff=0;
3793         int best_change=0;
3794         int run2, best_unquant_change=0, analyze_gradient;
3795 #ifdef REFINE_STATS
3796 {START_TIMER
3797 #endif
3798         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3799
3800         if(analyze_gradient){
3801 #ifdef REFINE_STATS
3802 {START_TIMER
3803 #endif
3804             for(i=0; i<64; i++){
3805                 int w= weight[i];
3806
3807                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3808             }
3809 #ifdef REFINE_STATS
3810 STOP_TIMER("rem*w*w")}
3811 {START_TIMER
3812 #endif
3813             s->dsp.fdct(d1);
3814 #ifdef REFINE_STATS
3815 STOP_TIMER("dct")}
3816 #endif
3817         }
3818
3819         if(start_i){
3820             const int level= block[0];
3821             int change, old_coeff;
3822
3823             assert(s->mb_intra);
3824
3825             old_coeff= q*level;
3826
3827             for(change=-1; change<=1; change+=2){
3828                 int new_level= level + change;
3829                 int score, new_coeff;
3830
3831                 new_coeff= q*new_level;
3832                 if(new_coeff >= 2048 || new_coeff < 0)
3833                     continue;
3834
3835                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3836                 if(score<best_score){
3837                     best_score= score;
3838                     best_coeff= 0;
3839                     best_change= change;
3840                     best_unquant_change= new_coeff - old_coeff;
3841                 }
3842             }
3843         }
3844
3845         run=0;
3846         rle_index=0;
3847         run2= run_tab[rle_index++];
3848         prev_level=0;
3849         prev_run=0;
3850
3851         for(i=start_i; i<64; i++){
3852             int j= perm_scantable[i];
3853             const int level= block[j];
3854             int change, old_coeff;
3855
3856             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3857                 break;
3858
3859             if(level){
3860                 if(level<0) old_coeff= qmul*level - qadd;
3861                 else        old_coeff= qmul*level + qadd;
3862                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3863             }else{
3864                 old_coeff=0;
3865                 run2--;
3866                 assert(run2>=0 || i >= last_non_zero );
3867             }
3868
3869             for(change=-1; change<=1; change+=2){
3870                 int new_level= level + change;
3871                 int score, new_coeff, unquant_change;
3872
3873                 score=0;
3874                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3875                    continue;
3876
3877                 if(new_level){
3878                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3879                     else            new_coeff= qmul*new_level + qadd;
3880                     if(new_coeff >= 2048 || new_coeff <= -2048)
3881                         continue;
3882                     //FIXME check for overflow
3883
3884                     if(level){
3885                         if(level < 63 && level > -63){
3886                             if(i < last_non_zero)
3887                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3888                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3889                             else
3890                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3891                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3892                         }
3893                     }else{
3894                         assert(FFABS(new_level)==1);
3895
3896                         if(analyze_gradient){
3897                             int g= d1[ scantable[i] ];
3898                             if(g && (g^new_level) >= 0)
3899                                 continue;
3900                         }
3901
3902                         if(i < last_non_zero){
3903                             int next_i= i + run2 + 1;
3904                             int next_level= block[ perm_scantable[next_i] ] + 64;
3905
3906                             if(next_level&(~127))
3907                                 next_level= 0;
3908
3909                             if(next_i < last_non_zero)
3910                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3911                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3912                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3913                             else
3914                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3915                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3916                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3917                         }else{
3918                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3919                             if(prev_level){
3920                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3921                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3922                             }
3923                         }
3924                     }
3925                 }else{
3926                     new_coeff=0;
3927                     assert(FFABS(level)==1);
3928
3929                     if(i < last_non_zero){
3930                         int next_i= i + run2 + 1;
3931                         int next_level= block[ perm_scantable[next_i] ] + 64;
3932
3933                         if(next_level&(~127))
3934                             next_level= 0;
3935
3936                         if(next_i < last_non_zero)
3937                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3938                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3939                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3940                         else
3941                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3942                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3943                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3944                     }else{
3945                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3946                         if(prev_level){
3947                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3948                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3949                         }
3950                     }
3951                 }
3952
3953                 score *= lambda;
3954
3955                 unquant_change= new_coeff - old_coeff;
3956                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3957
3958                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3959                 if(score<best_score){
3960                     best_score= score;
3961                     best_coeff= i;
3962                     best_change= change;
3963                     best_unquant_change= unquant_change;
3964                 }
3965             }
3966             if(level){
3967                 prev_level= level + 64;
3968                 if(prev_level&(~127))
3969                     prev_level= 0;
3970                 prev_run= run;
3971                 run=0;
3972             }else{
3973                 run++;
3974             }
3975         }
3976 #ifdef REFINE_STATS
3977 STOP_TIMER("iterative step")}
3978 #endif
3979
3980         if(best_change){
3981             int j= perm_scantable[ best_coeff ];
3982
3983             block[j] += best_change;
3984
3985             if(best_coeff > last_non_zero){
3986                 last_non_zero= best_coeff;
3987                 assert(block[j]);
3988 #ifdef REFINE_STATS
3989 after_last++;
3990 #endif
3991             }else{
3992 #ifdef REFINE_STATS
3993 if(block[j]){
3994     if(block[j] - best_change){
3995         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3996             raise++;
3997         }else{
3998             lower++;
3999         }
4000     }else{
4001         from_zero++;
4002     }
4003 }else{
4004     to_zero++;
4005 }
4006 #endif
4007                 for(; last_non_zero>=start_i; last_non_zero--){
4008                     if(block[perm_scantable[last_non_zero]])
4009                         break;
4010                 }
4011             }
4012 #ifdef REFINE_STATS
4013 count++;
4014 if(256*256*256*64 % count == 0){
4015     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4016 }
4017 #endif
4018             run=0;
4019             rle_index=0;
4020             for(i=start_i; i<=last_non_zero; i++){
4021                 int j= perm_scantable[i];
4022                 const int level= block[j];
4023
4024                  if(level){
4025                      run_tab[rle_index++]=run;
4026                      run=0;
4027                  }else{
4028                      run++;
4029                  }
4030             }
4031
4032             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4033         }else{
4034             break;
4035         }
4036     }
4037 #ifdef REFINE_STATS
4038 if(last_non_zero>0){
4039 STOP_TIMER("iterative search")
4040 }
4041 }
4042 #endif
4043
4044     return last_non_zero;
4045 }
4046
4047 int ff_dct_quantize_c(MpegEncContext *s,
4048                         DCTELEM *block, int n,
4049                         int qscale, int *overflow)
4050 {
4051     int i, j, level, last_non_zero, q, start_i;
4052     const int *qmat;
4053     const uint8_t *scantable= s->intra_scantable.scantable;
4054     int bias;
4055     int max=0;
4056     unsigned int threshold1, threshold2;
4057
4058     s->dsp.fdct (block);
4059
4060     if(s->dct_error_sum)
4061         s->denoise_dct(s, block);
4062
4063     if (s->mb_intra) {
4064         if (!s->h263_aic) {
4065             if (n < 4)
4066                 q = s->y_dc_scale;
4067             else
4068                 q = s->c_dc_scale;
4069             q = q << 3;
4070         } else
4071             /* For AIC we skip quant/dequant of INTRADC */
4072             q = 1 << 3;
4073
4074         /* note: block[0] is assumed to be positive */
4075         block[0] = (block[0] + (q >> 1)) / q;
4076         start_i = 1;
4077         last_non_zero = 0;
4078         qmat = s->q_intra_matrix[qscale];
4079         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4080     } else {
4081         start_i = 0;
4082         last_non_zero = -1;
4083         qmat = s->q_inter_matrix[qscale];
4084         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4085     }
4086     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4087     threshold2= (threshold1<<1);
4088     for(i=63;i>=start_i;i--) {
4089         j = scantable[i];
4090         level = block[j] * qmat[j];
4091
4092         if(((unsigned)(level+threshold1))>threshold2){
4093             last_non_zero = i;
4094             break;
4095         }else{
4096             block[j]=0;
4097         }
4098     }
4099     for(i=start_i; i<=last_non_zero; i++) {
4100         j = scantable[i];
4101         level = block[j] * qmat[j];
4102
4103 //        if(   bias+level >= (1<<QMAT_SHIFT)
4104 //           || bias-level >= (1<<QMAT_SHIFT)){
4105         if(((unsigned)(level+threshold1))>threshold2){
4106             if(level>0){
4107                 level= (bias + level)>>QMAT_SHIFT;
4108                 block[j]= level;
4109             }else{
4110                 level= (bias - level)>>QMAT_SHIFT;
4111                 block[j]= -level;
4112             }
4113             max |=level;
4114         }else{
4115             block[j]=0;
4116         }
4117     }
4118     *overflow= s->max_qcoeff < max; //overflow might have happened
4119
4120     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4121     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4122         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4123
4124     return last_non_zero;
4125 }
4126
4127 #define OFFSET(x) offsetof(MpegEncContext, x)
4128 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4129 static const AVOption h263_options[] = {
4130     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4131     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4132     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { 0 }, 0, INT_MAX, VE },
4133     FF_MPV_COMMON_OPTS
4134     { NULL },
4135 };
4136
4137 static const AVClass h263_class = {
4138     .class_name = "H.263 encoder",
4139     .item_name  = av_default_item_name,
4140     .option     = h263_options,
4141     .version    = LIBAVUTIL_VERSION_INT,
4142 };
4143
4144 AVCodec ff_h263_encoder = {
4145     .name           = "h263",
4146     .type           = AVMEDIA_TYPE_VIDEO,
4147     .id             = CODEC_ID_H263,
4148     .priv_data_size = sizeof(MpegEncContext),
4149     .init           = ff_MPV_encode_init,
4150     .encode2        = ff_MPV_encode_picture,
4151     .close          = ff_MPV_encode_end,
4152     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4153     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4154     .priv_class     = &h263_class,
4155 };
4156
4157 static const AVOption h263p_options[] = {
4158     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4159     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4160     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4161     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4162     FF_MPV_COMMON_OPTS
4163     { NULL },
4164 };
4165 static const AVClass h263p_class = {
4166     .class_name = "H.263p encoder",
4167     .item_name  = av_default_item_name,
4168     .option     = h263p_options,
4169     .version    = LIBAVUTIL_VERSION_INT,
4170 };
4171
4172 AVCodec ff_h263p_encoder = {
4173     .name           = "h263p",
4174     .type           = AVMEDIA_TYPE_VIDEO,
4175     .id             = CODEC_ID_H263P,
4176     .priv_data_size = sizeof(MpegEncContext),
4177     .init           = ff_MPV_encode_init,
4178     .encode2        = ff_MPV_encode_picture,
4179     .close          = ff_MPV_encode_end,
4180     .capabilities = CODEC_CAP_SLICE_THREADS,
4181     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4182     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4183     .priv_class     = &h263p_class,
4184 };
4185
4186 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4187
4188 AVCodec ff_msmpeg4v2_encoder = {
4189     .name           = "msmpeg4v2",
4190     .type           = AVMEDIA_TYPE_VIDEO,
4191     .id             = CODEC_ID_MSMPEG4V2,
4192     .priv_data_size = sizeof(MpegEncContext),
4193     .init           = ff_MPV_encode_init,
4194     .encode2        = ff_MPV_encode_picture,
4195     .close          = ff_MPV_encode_end,
4196     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4197     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4198     .priv_class     = &msmpeg4v2_class,
4199 };
4200
4201 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4202
4203 AVCodec ff_msmpeg4v3_encoder = {
4204     .name           = "msmpeg4",
4205     .type           = AVMEDIA_TYPE_VIDEO,
4206     .id             = CODEC_ID_MSMPEG4V3,
4207     .priv_data_size = sizeof(MpegEncContext),
4208     .init           = ff_MPV_encode_init,
4209     .encode2        = ff_MPV_encode_picture,
4210     .close          = ff_MPV_encode_end,
4211     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4212     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4213     .priv_class     = &msmpeg4v3_class,
4214 };
4215
4216 FF_MPV_GENERIC_CLASS(wmv1)
4217
4218 AVCodec ff_wmv1_encoder = {
4219     .name           = "wmv1",
4220     .type           = AVMEDIA_TYPE_VIDEO,
4221     .id             = CODEC_ID_WMV1,
4222     .priv_data_size = sizeof(MpegEncContext),
4223     .init           = ff_MPV_encode_init,
4224     .encode2        = ff_MPV_encode_picture,
4225     .close          = ff_MPV_encode_end,
4226     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4227     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4228     .priv_class     = &wmv1_class,
4229 };