]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
cabac: Mark ff_h264_mps_state array as static, it is only used within cabac.c.
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include <limits.h>
47
48 //#undef NDEBUG
49 //#include <assert.h>
50
51 static int encode_picture(MpegEncContext *s, int picture_number);
52 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
53 static int sse_mb(MpegEncContext *s);
54 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
55 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
56
57 /* enable all paranoid tests for rounding, overflows, etc... */
58 //#define PARANOID
59
60 //#define DEBUG
61
62 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
63 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
64
65 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
66                        uint16_t (*qmat16)[2][64],
67                        const uint16_t *quant_matrix,
68                        int bias, int qmin, int qmax, int intra)
69 {
70     int qscale;
71     int shift = 0;
72
73     for (qscale = qmin; qscale <= qmax; qscale++) {
74         int i;
75         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
76             dsp->fdct == ff_jpeg_fdct_islow_10
77 #ifdef FAAN_POSTSCALE
78             || dsp->fdct == ff_faandct
79 #endif
80             ) {
81             for (i = 0; i < 64; i++) {
82                 const int j = dsp->idct_permutation[i];
83                 /* 16 <= qscale * quant_matrix[i] <= 7905
84                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
85                  *             19952 <=              x  <= 249205026
86                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
87                  *           3444240 >= (1 << 36) / (x) >= 275 */
88
89                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
90                                         (qscale * quant_matrix[j]));
91             }
92         } else if (dsp->fdct == fdct_ifast
93 #ifndef FAAN_POSTSCALE
94                    || dsp->fdct == ff_faandct
95 #endif
96                    ) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * qscale *
107                                          quant_matrix[j]));
108             }
109         } else {
110             for (i = 0; i < 64; i++) {
111                 const int j = dsp->idct_permutation[i];
112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
113                  * Assume x = qscale * quant_matrix[i]
114                  * So             16 <=              x  <= 7905
115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
116                  * so          32768 >= (1 << 19) / (x) >= 67 */
117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
118                                         (qscale * quant_matrix[j]));
119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
120                 //                    (qscale * quant_matrix[i]);
121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
122                                        (qscale * quant_matrix[j]);
123
124                 if (qmat16[qscale][0][i] == 0 ||
125                     qmat16[qscale][0][i] == 128 * 256)
126                     qmat16[qscale][0][i] = 128 * 256 - 1;
127                 qmat16[qscale][1][i] =
128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
129                                 qmat16[qscale][0][i]);
130             }
131         }
132
133         for (i = intra; i < 64; i++) {
134             int64_t max = 8191;
135             if (dsp->fdct == fdct_ifast
136 #ifndef FAAN_POSTSCALE
137                 || dsp->fdct == ff_faandct
138 #endif
139                ) {
140                 max = (8191LL * ff_aanscales[i]) >> 14;
141             }
142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
143                 shift++;
144             }
145         }
146     }
147     if (shift) {
148         av_log(NULL, AV_LOG_INFO,
149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
150                QMAT_SHIFT - shift);
151     }
152 }
153
154 static inline void update_qscale(MpegEncContext *s)
155 {
156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
157                 (FF_LAMBDA_SHIFT + 7);
158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
159
160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
161                  FF_LAMBDA_SHIFT;
162 }
163
164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
165 {
166     int i;
167
168     if (matrix) {
169         put_bits(pb, 1, 1);
170         for (i = 0; i < 64; i++) {
171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
172         }
173     } else
174         put_bits(pb, 1, 0);
175 }
176
177 /**
178  * init s->current_picture.qscale_table from s->lambda_table
179  */
180 void ff_init_qscale_tab(MpegEncContext *s)
181 {
182     int8_t * const qscale_table = s->current_picture.f.qscale_table;
183     int i;
184
185     for (i = 0; i < s->mb_num; i++) {
186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
189                                                   s->avctx->qmax);
190     }
191 }
192
193 static void copy_picture_attributes(MpegEncContext *s,
194                                     AVFrame *dst,
195                                     AVFrame *src)
196 {
197     int i;
198
199     dst->pict_type              = src->pict_type;
200     dst->quality                = src->quality;
201     dst->coded_picture_number   = src->coded_picture_number;
202     dst->display_picture_number = src->display_picture_number;
203     //dst->reference              = src->reference;
204     dst->pts                    = src->pts;
205     dst->interlaced_frame       = src->interlaced_frame;
206     dst->top_field_first        = src->top_field_first;
207
208     if (s->avctx->me_threshold) {
209         if (!src->motion_val[0])
210             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
211         if (!src->mb_type)
212             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
213         if (!src->ref_index[0])
214             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
215         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
216             av_log(s->avctx, AV_LOG_ERROR,
217                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
218                    src->motion_subsample_log2, dst->motion_subsample_log2);
219
220         memcpy(dst->mb_type, src->mb_type,
221                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
222
223         for (i = 0; i < 2; i++) {
224             int stride = ((16 * s->mb_width ) >>
225                           src->motion_subsample_log2) + 1;
226             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
227
228             if (src->motion_val[i] &&
229                 src->motion_val[i] != dst->motion_val[i]) {
230                 memcpy(dst->motion_val[i], src->motion_val[i],
231                        2 * stride * height * sizeof(int16_t));
232             }
233             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
234                 memcpy(dst->ref_index[i], src->ref_index[i],
235                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
236             }
237         }
238     }
239 }
240
241 static void update_duplicate_context_after_me(MpegEncContext *dst,
242                                               MpegEncContext *src)
243 {
244 #define COPY(a) dst->a= src->a
245     COPY(pict_type);
246     COPY(current_picture);
247     COPY(f_code);
248     COPY(b_code);
249     COPY(qscale);
250     COPY(lambda);
251     COPY(lambda2);
252     COPY(picture_in_gop_number);
253     COPY(gop_picture_number);
254     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
255     COPY(progressive_frame);    // FIXME don't set in encode_header
256     COPY(partitioned_frame);    // FIXME don't set in encode_header
257 #undef COPY
258 }
259
260 /**
261  * Set the given MpegEncContext to defaults for encoding.
262  * the changed fields will not depend upon the prior state of the MpegEncContext.
263  */
264 static void MPV_encode_defaults(MpegEncContext *s)
265 {
266     int i;
267     MPV_common_defaults(s);
268
269     for (i = -16; i < 16; i++) {
270         default_fcode_tab[i + MAX_MV] = 1;
271     }
272     s->me.mv_penalty = default_mv_penalty;
273     s->fcode_tab     = default_fcode_tab;
274 }
275
276 /* init video encoder */
277 av_cold int MPV_encode_init(AVCodecContext *avctx)
278 {
279     MpegEncContext *s = avctx->priv_data;
280     int i;
281     int chroma_h_shift, chroma_v_shift;
282
283     MPV_encode_defaults(s);
284
285     switch (avctx->codec_id) {
286     case CODEC_ID_MPEG2VIDEO:
287         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
288             avctx->pix_fmt != PIX_FMT_YUV422P) {
289             av_log(avctx, AV_LOG_ERROR,
290                    "only YUV420 and YUV422 are supported\n");
291             return -1;
292         }
293         break;
294     case CODEC_ID_LJPEG:
295         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
296             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
297             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
298             avctx->pix_fmt != PIX_FMT_BGRA     &&
299             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
300               avctx->pix_fmt != PIX_FMT_YUV422P &&
301               avctx->pix_fmt != PIX_FMT_YUV444P) ||
302              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
303             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
304             return -1;
305         }
306         break;
307     case CODEC_ID_MJPEG:
308         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
309             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
310             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
311               avctx->pix_fmt != PIX_FMT_YUV422P) ||
312              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
313             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
314             return -1;
315         }
316         break;
317     default:
318         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
319             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
320             return -1;
321         }
322     }
323
324     switch (avctx->pix_fmt) {
325     case PIX_FMT_YUVJ422P:
326     case PIX_FMT_YUV422P:
327         s->chroma_format = CHROMA_422;
328         break;
329     case PIX_FMT_YUVJ420P:
330     case PIX_FMT_YUV420P:
331     default:
332         s->chroma_format = CHROMA_420;
333         break;
334     }
335
336     s->bit_rate = avctx->bit_rate;
337     s->width    = avctx->width;
338     s->height   = avctx->height;
339     if (avctx->gop_size > 600 &&
340         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
341         av_log(avctx, AV_LOG_ERROR,
342                "Warning keyframe interval too large! reducing it ...\n");
343         avctx->gop_size = 600;
344     }
345     s->gop_size     = avctx->gop_size;
346     s->avctx        = avctx;
347     s->flags        = avctx->flags;
348     s->flags2       = avctx->flags2;
349     s->max_b_frames = avctx->max_b_frames;
350     s->codec_id     = avctx->codec->id;
351     s->luma_elim_threshold   = avctx->luma_elim_threshold;
352     s->chroma_elim_threshold = avctx->chroma_elim_threshold;
353     s->strict_std_compliance = avctx->strict_std_compliance;
354 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
355     if (avctx->flags & CODEC_FLAG_PART)
356         s->data_partitioning = 1;
357 #endif
358     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
359     s->mpeg_quant         = avctx->mpeg_quant;
360     s->rtp_mode           = !!avctx->rtp_payload_size;
361     s->intra_dc_precision = avctx->intra_dc_precision;
362     s->user_specified_pts = AV_NOPTS_VALUE;
363
364     if (s->gop_size <= 1) {
365         s->intra_only = 1;
366         s->gop_size   = 12;
367     } else {
368         s->intra_only = 0;
369     }
370
371     s->me_method = avctx->me_method;
372
373     /* Fixed QSCALE */
374     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
375
376     s->adaptive_quant = (s->avctx->lumi_masking ||
377                          s->avctx->dark_masking ||
378                          s->avctx->temporal_cplx_masking ||
379                          s->avctx->spatial_cplx_masking  ||
380                          s->avctx->p_masking      ||
381                          s->avctx->border_masking ||
382                          (s->flags & CODEC_FLAG_QP_RD)) &&
383                         !s->fixed_qscale;
384
385     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
386 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
387     s->alternate_scan   = !!(s->flags  & CODEC_FLAG_ALT_SCAN);
388     s->intra_vlc_format = !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
389     s->q_scale_type     = !!(s->flags2 & CODEC_FLAG2_NON_LINEAR_QUANT);
390     s->obmc             = !!(s->flags  & CODEC_FLAG_OBMC);
391 #endif
392
393     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
394         av_log(avctx, AV_LOG_ERROR,
395                "a vbv buffer size is needed, "
396                "for encoding with a maximum bitrate\n");
397         return -1;
398     }
399
400     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
401         av_log(avctx, AV_LOG_INFO,
402                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
403     }
404
405     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
406         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
407         return -1;
408     }
409
410     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
411         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
412         return -1;
413     }
414
415     if (avctx->rc_max_rate &&
416         avctx->rc_max_rate == avctx->bit_rate &&
417         avctx->rc_max_rate != avctx->rc_min_rate) {
418         av_log(avctx, AV_LOG_INFO,
419                "impossible bitrate constraints, this will fail\n");
420     }
421
422     if (avctx->rc_buffer_size &&
423         avctx->bit_rate * (int64_t)avctx->time_base.num >
424             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
425         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
426         return -1;
427     }
428
429     if (!s->fixed_qscale &&
430         avctx->bit_rate * av_q2d(avctx->time_base) >
431             avctx->bit_rate_tolerance) {
432         av_log(avctx, AV_LOG_ERROR,
433                "bitrate tolerance too small for bitrate\n");
434         return -1;
435     }
436
437     if (s->avctx->rc_max_rate &&
438         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
439         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
440          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
441         90000LL * (avctx->rc_buffer_size - 1) >
442             s->avctx->rc_max_rate * 0xFFFFLL) {
443         av_log(avctx, AV_LOG_INFO,
444                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
445                "specified vbv buffer is too large for the given bitrate!\n");
446     }
447
448     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
449         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
450         s->codec_id != CODEC_ID_FLV1) {
451         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
452         return -1;
453     }
454
455     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
456         av_log(avctx, AV_LOG_ERROR,
457                "OBMC is only supported with simple mb decision\n");
458         return -1;
459     }
460
461 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
462     if (s->obmc && s->codec_id != CODEC_ID_H263 &&
463         s->codec_id != CODEC_ID_H263P) {
464         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
465         return -1;
466     }
467 #endif
468
469     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
470         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
471         return -1;
472     }
473
474 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
475     if (s->data_partitioning && s->codec_id != CODEC_ID_MPEG4) {
476         av_log(avctx, AV_LOG_ERROR,
477                "data partitioning not supported by codec\n");
478         return -1;
479     }
480 #endif
481
482     if (s->max_b_frames                    &&
483         s->codec_id != CODEC_ID_MPEG4      &&
484         s->codec_id != CODEC_ID_MPEG1VIDEO &&
485         s->codec_id != CODEC_ID_MPEG2VIDEO) {
486         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
487         return -1;
488     }
489
490     if ((s->codec_id == CODEC_ID_MPEG4 ||
491          s->codec_id == CODEC_ID_H263  ||
492          s->codec_id == CODEC_ID_H263P) &&
493         (avctx->sample_aspect_ratio.num > 255 ||
494          avctx->sample_aspect_ratio.den > 255)) {
495         av_log(avctx, AV_LOG_ERROR,
496                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
497                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
498         return -1;
499     }
500
501     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME
502 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
503                     | CODEC_FLAG_ALT_SCAN
504 #endif
505         )) &&
506         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
507         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
508         return -1;
509     }
510
511     // FIXME mpeg2 uses that too
512     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
513         av_log(avctx, AV_LOG_ERROR,
514                "mpeg2 style quantization not supported by codec\n");
515         return -1;
516     }
517
518     if ((s->flags & CODEC_FLAG_CBP_RD) && !avctx->trellis) {
519         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
520         return -1;
521     }
522
523     if ((s->flags & CODEC_FLAG_QP_RD) &&
524         s->avctx->mb_decision != FF_MB_DECISION_RD) {
525         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
526         return -1;
527     }
528
529     if (s->avctx->scenechange_threshold < 1000000000 &&
530         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
531         av_log(avctx, AV_LOG_ERROR,
532                "closed gop with scene change detection are not supported yet, "
533                "set threshold to 1000000000\n");
534         return -1;
535     }
536
537 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
538     if ((s->flags2 & CODEC_FLAG2_INTRA_VLC) &&
539         s->codec_id != CODEC_ID_MPEG2VIDEO) {
540         av_log(avctx, AV_LOG_ERROR,
541                "intra vlc table not supported by codec\n");
542         return -1;
543     }
544 #endif
545
546     if (s->flags & CODEC_FLAG_LOW_DELAY) {
547         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
548             av_log(avctx, AV_LOG_ERROR,
549                   "low delay forcing is only available for mpeg2\n");
550             return -1;
551         }
552         if (s->max_b_frames != 0) {
553             av_log(avctx, AV_LOG_ERROR,
554                    "b frames cannot be used with low delay\n");
555             return -1;
556         }
557     }
558
559     if (s->q_scale_type == 1) {
560 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
561         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
562             av_log(avctx, AV_LOG_ERROR,
563                    "non linear quant is only available for mpeg2\n");
564             return -1;
565         }
566 #endif
567         if (avctx->qmax > 12) {
568             av_log(avctx, AV_LOG_ERROR,
569                    "non linear quant only supports qmax <= 12 currently\n");
570             return -1;
571         }
572     }
573
574     if (s->avctx->thread_count > 1         &&
575         s->codec_id != CODEC_ID_MPEG4      &&
576         s->codec_id != CODEC_ID_MPEG1VIDEO &&
577         s->codec_id != CODEC_ID_MPEG2VIDEO &&
578         (s->codec_id != CODEC_ID_H263P
579 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
580          || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT)
581 #endif
582          )) {
583         av_log(avctx, AV_LOG_ERROR,
584                "multi threaded encoding not supported by codec\n");
585         return -1;
586     }
587
588     if (s->avctx->thread_count < 1) {
589         av_log(avctx, AV_LOG_ERROR,
590                "automatic thread number detection not supported by codec,"
591                "patch welcome\n");
592         return -1;
593     }
594
595     if (s->avctx->thread_count > 1)
596         s->rtp_mode = 1;
597
598     if (!avctx->time_base.den || !avctx->time_base.num) {
599         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
600         return -1;
601     }
602
603     i = (INT_MAX / 2 + 128) >> 8;
604     if (avctx->me_threshold >= i) {
605         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
606                i - 1);
607         return -1;
608     }
609     if (avctx->mb_threshold >= i) {
610         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
611                i - 1);
612         return -1;
613     }
614
615     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
616         av_log(avctx, AV_LOG_INFO,
617                "notice: b_frame_strategy only affects the first pass\n");
618         avctx->b_frame_strategy = 0;
619     }
620
621     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
622     if (i > 1) {
623         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
624         avctx->time_base.den /= i;
625         avctx->time_base.num /= i;
626         //return -1;
627     }
628
629     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO ||
630         s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG) {
631         // (a + x * 3 / 8) / x
632         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
633         s->inter_quant_bias = 0;
634     } else {
635         s->intra_quant_bias = 0;
636         // (a - x / 4) / x
637         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
638     }
639
640     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
641         s->intra_quant_bias = avctx->intra_quant_bias;
642     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
643         s->inter_quant_bias = avctx->inter_quant_bias;
644
645     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
646                                   &chroma_v_shift);
647
648     if (avctx->codec_id == CODEC_ID_MPEG4 &&
649         s->avctx->time_base.den > (1 << 16) - 1) {
650         av_log(avctx, AV_LOG_ERROR,
651                "timebase %d/%d not supported by MPEG 4 standard, "
652                "the maximum admitted value for the timebase denominator "
653                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
654                (1 << 16) - 1);
655         return -1;
656     }
657     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
658
659     switch (avctx->codec->id) {
660     case CODEC_ID_MPEG1VIDEO:
661         s->out_format = FMT_MPEG1;
662         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
663         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
664         break;
665     case CODEC_ID_MPEG2VIDEO:
666         s->out_format = FMT_MPEG1;
667         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
668         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
669         s->rtp_mode   = 1;
670         break;
671     case CODEC_ID_LJPEG:
672     case CODEC_ID_MJPEG:
673         s->out_format = FMT_MJPEG;
674         s->intra_only = 1; /* force intra only for jpeg */
675         if (avctx->codec->id == CODEC_ID_LJPEG &&
676             avctx->pix_fmt   == PIX_FMT_BGRA) {
677             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
678             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
679             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
680         } else {
681             s->mjpeg_vsample[0] = 2;
682             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
683             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
684             s->mjpeg_hsample[0] = 2;
685             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
686             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
687         }
688         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
689             ff_mjpeg_encode_init(s) < 0)
690             return -1;
691         avctx->delay = 0;
692         s->low_delay = 1;
693         break;
694     case CODEC_ID_H261:
695         if (!CONFIG_H261_ENCODER)
696             return -1;
697         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
698             av_log(avctx, AV_LOG_ERROR,
699                    "The specified picture size of %dx%d is not valid for the "
700                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
701                     s->width, s->height);
702             return -1;
703         }
704         s->out_format = FMT_H261;
705         avctx->delay  = 0;
706         s->low_delay  = 1;
707         break;
708     case CODEC_ID_H263:
709         if (!CONFIG_H263_ENCODER)
710         return -1;
711         if (ff_match_2uint16(h263_format, FF_ARRAY_ELEMS(h263_format),
712                              s->width, s->height) == 8) {
713             av_log(avctx, AV_LOG_INFO,
714                    "The specified picture size of %dx%d is not valid for "
715                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
716                    "352x288, 704x576, and 1408x1152."
717                    "Try H.263+.\n", s->width, s->height);
718             return -1;
719         }
720         s->out_format = FMT_H263;
721         avctx->delay  = 0;
722         s->low_delay  = 1;
723         break;
724     case CODEC_ID_H263P:
725         s->out_format = FMT_H263;
726         s->h263_plus  = 1;
727         /* Fx */
728 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
729         if (avctx->flags & CODEC_FLAG_H263P_UMV)
730             s->umvplus = 1;
731         if (avctx->flags & CODEC_FLAG_H263P_AIV)
732             s->alt_inter_vlc = 1;
733         if (avctx->flags & CODEC_FLAG_H263P_SLICE_STRUCT)
734             s->h263_slice_structured = 1;
735 #endif
736         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
737         s->modified_quant  = s->h263_aic;
738         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
739         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
740
741         /* /Fx */
742         /* These are just to be sure */
743         avctx->delay = 0;
744         s->low_delay = 1;
745         break;
746     case CODEC_ID_FLV1:
747         s->out_format      = FMT_H263;
748         s->h263_flv        = 2; /* format = 1; 11-bit codes */
749         s->unrestricted_mv = 1;
750         s->rtp_mode  = 0; /* don't allow GOB */
751         avctx->delay = 0;
752         s->low_delay = 1;
753         break;
754     case CODEC_ID_RV10:
755         s->out_format = FMT_H263;
756         avctx->delay  = 0;
757         s->low_delay  = 1;
758         break;
759     case CODEC_ID_RV20:
760         s->out_format      = FMT_H263;
761         avctx->delay       = 0;
762         s->low_delay       = 1;
763         s->modified_quant  = 1;
764         s->h263_aic        = 1;
765         s->h263_plus       = 1;
766         s->loop_filter     = 1;
767         s->unrestricted_mv = 0;
768         break;
769     case CODEC_ID_MPEG4:
770         s->out_format      = FMT_H263;
771         s->h263_pred       = 1;
772         s->unrestricted_mv = 1;
773         s->low_delay       = s->max_b_frames ? 0 : 1;
774         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
775         break;
776     case CODEC_ID_MSMPEG4V2:
777         s->out_format      = FMT_H263;
778         s->h263_pred       = 1;
779         s->unrestricted_mv = 1;
780         s->msmpeg4_version = 2;
781         avctx->delay       = 0;
782         s->low_delay       = 1;
783         break;
784     case CODEC_ID_MSMPEG4V3:
785         s->out_format        = FMT_H263;
786         s->h263_pred         = 1;
787         s->unrestricted_mv   = 1;
788         s->msmpeg4_version   = 3;
789         s->flipflop_rounding = 1;
790         avctx->delay         = 0;
791         s->low_delay         = 1;
792         break;
793     case CODEC_ID_WMV1:
794         s->out_format        = FMT_H263;
795         s->h263_pred         = 1;
796         s->unrestricted_mv   = 1;
797         s->msmpeg4_version   = 4;
798         s->flipflop_rounding = 1;
799         avctx->delay         = 0;
800         s->low_delay         = 1;
801         break;
802     case CODEC_ID_WMV2:
803         s->out_format        = FMT_H263;
804         s->h263_pred         = 1;
805         s->unrestricted_mv   = 1;
806         s->msmpeg4_version   = 5;
807         s->flipflop_rounding = 1;
808         avctx->delay         = 0;
809         s->low_delay         = 1;
810         break;
811     default:
812         return -1;
813     }
814
815     avctx->has_b_frames = !s->low_delay;
816
817     s->encoding = 1;
818
819     s->progressive_frame    =
820     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
821                                                 CODEC_FLAG_INTERLACED_ME) ||
822                                 s->alternate_scan);
823
824     /* init */
825     if (MPV_common_init(s) < 0)
826         return -1;
827
828     if (!s->dct_quantize)
829         s->dct_quantize = dct_quantize_c;
830     if (!s->denoise_dct)
831         s->denoise_dct  = denoise_dct_c;
832     s->fast_dct_quantize = s->dct_quantize;
833     if (avctx->trellis)
834         s->dct_quantize  = dct_quantize_trellis_c;
835
836     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
837         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
838
839     s->quant_precision = 5;
840
841     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
842     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
843
844     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
845         ff_h261_encode_init(s);
846     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
847         h263_encode_init(s);
848     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
849         ff_msmpeg4_encode_init(s);
850     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
851         && s->out_format == FMT_MPEG1)
852         ff_mpeg1_encode_init(s);
853
854     /* init q matrix */
855     for (i = 0; i < 64; i++) {
856         int j = s->dsp.idct_permutation[i];
857         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
858             s->mpeg_quant) {
859             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
860             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
861         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
862             s->intra_matrix[j] =
863             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
864         } else {
865             /* mpeg1/2 */
866             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
867             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
868         }
869         if (s->avctx->intra_matrix)
870             s->intra_matrix[j] = s->avctx->intra_matrix[i];
871         if (s->avctx->inter_matrix)
872             s->inter_matrix[j] = s->avctx->inter_matrix[i];
873     }
874
875     /* precompute matrix */
876     /* for mjpeg, we do include qscale in the matrix */
877     if (s->out_format != FMT_MJPEG) {
878         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
879                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
880                           31, 1);
881         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
882                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
883                           31, 0);
884     }
885
886     if (ff_rate_control_init(s) < 0)
887         return -1;
888
889     return 0;
890 }
891
892 av_cold int MPV_encode_end(AVCodecContext *avctx)
893 {
894     MpegEncContext *s = avctx->priv_data;
895
896     ff_rate_control_uninit(s);
897
898     MPV_common_end(s);
899     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
900         s->out_format == FMT_MJPEG)
901         ff_mjpeg_encode_close(s);
902
903     av_freep(&avctx->extradata);
904
905     return 0;
906 }
907
908 static int get_sae(uint8_t *src, int ref, int stride)
909 {
910     int x,y;
911     int acc = 0;
912
913     for (y = 0; y < 16; y++) {
914         for (x = 0; x < 16; x++) {
915             acc += FFABS(src[x + y * stride] - ref);
916         }
917     }
918
919     return acc;
920 }
921
922 static int get_intra_count(MpegEncContext *s, uint8_t *src,
923                            uint8_t *ref, int stride)
924 {
925     int x, y, w, h;
926     int acc = 0;
927
928     w = s->width  & ~15;
929     h = s->height & ~15;
930
931     for (y = 0; y < h; y += 16) {
932         for (x = 0; x < w; x += 16) {
933             int offset = x + y * stride;
934             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
935                                      16);
936             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
937             int sae  = get_sae(src + offset, mean, stride);
938
939             acc += sae + 500 < sad;
940         }
941     }
942     return acc;
943 }
944
945
946 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
947 {
948     AVFrame *pic = NULL;
949     int64_t pts;
950     int i;
951     const int encoding_delay = s->max_b_frames;
952     int direct = 1;
953
954     if (pic_arg) {
955         pts = pic_arg->pts;
956         pic_arg->display_picture_number = s->input_picture_number++;
957
958         if (pts != AV_NOPTS_VALUE) {
959             if (s->user_specified_pts != AV_NOPTS_VALUE) {
960                 int64_t time = pts;
961                 int64_t last = s->user_specified_pts;
962
963                 if (time <= last) {
964                     av_log(s->avctx, AV_LOG_ERROR,
965                            "Error, Invalid timestamp=%"PRId64", "
966                            "last=%"PRId64"\n", pts, s->user_specified_pts);
967                     return -1;
968                 }
969             }
970             s->user_specified_pts = pts;
971         } else {
972             if (s->user_specified_pts != AV_NOPTS_VALUE) {
973                 s->user_specified_pts =
974                 pts = s->user_specified_pts + 1;
975                 av_log(s->avctx, AV_LOG_INFO,
976                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
977                        pts);
978             } else {
979                 pts = pic_arg->display_picture_number;
980             }
981         }
982     }
983
984   if (pic_arg) {
985     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
986         direct = 0;
987     if (pic_arg->linesize[0] != s->linesize)
988         direct = 0;
989     if (pic_arg->linesize[1] != s->uvlinesize)
990         direct = 0;
991     if (pic_arg->linesize[2] != s->uvlinesize)
992         direct = 0;
993
994     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
995     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
996
997     if (direct) {
998         i = ff_find_unused_picture(s, 1);
999         if (i < 0)
1000             return i;
1001
1002         pic = (AVFrame *) &s->picture[i];
1003         pic->reference = 3;
1004
1005         for (i = 0; i < 4; i++) {
1006             pic->data[i]     = pic_arg->data[i];
1007             pic->linesize[i] = pic_arg->linesize[i];
1008         }
1009         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1010             return -1;
1011         }
1012     } else {
1013         i = ff_find_unused_picture(s, 0);
1014         if (i < 0)
1015             return i;
1016
1017         pic = (AVFrame *) &s->picture[i];
1018         pic->reference = 3;
1019
1020         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1021             return -1;
1022         }
1023
1024         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1025             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1026             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1027             // empty
1028         } else {
1029             int h_chroma_shift, v_chroma_shift;
1030             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1031                                           &v_chroma_shift);
1032
1033             for (i = 0; i < 3; i++) {
1034                 int src_stride = pic_arg->linesize[i];
1035                 int dst_stride = i ? s->uvlinesize : s->linesize;
1036                 int h_shift = i ? h_chroma_shift : 0;
1037                 int v_shift = i ? v_chroma_shift : 0;
1038                 int w = s->width  >> h_shift;
1039                 int h = s->height >> v_shift;
1040                 uint8_t *src = pic_arg->data[i];
1041                 uint8_t *dst = pic->data[i];
1042
1043                 if (!s->avctx->rc_buffer_size)
1044                     dst += INPLACE_OFFSET;
1045
1046                 if (src_stride == dst_stride)
1047                     memcpy(dst, src, src_stride * h);
1048                 else {
1049                     while (h--) {
1050                         memcpy(dst, src, w);
1051                         dst += dst_stride;
1052                         src += src_stride;
1053                     }
1054                 }
1055             }
1056         }
1057     }
1058     copy_picture_attributes(s, pic, pic_arg);
1059     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1060   }
1061
1062     /* shift buffer entries */
1063     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1064         s->input_picture[i - 1] = s->input_picture[i];
1065
1066     s->input_picture[encoding_delay] = (Picture*) pic;
1067
1068     return 0;
1069 }
1070
1071 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1072 {
1073     int x, y, plane;
1074     int score = 0;
1075     int64_t score64 = 0;
1076
1077     for (plane = 0; plane < 3; plane++) {
1078         const int stride = p->f.linesize[plane];
1079         const int bw = plane ? 1 : 2;
1080         for (y = 0; y < s->mb_height * bw; y++) {
1081             for (x = 0; x < s->mb_width * bw; x++) {
1082                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1083                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1084                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1085                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1086
1087                 switch (s->avctx->frame_skip_exp) {
1088                 case 0: score    =  FFMAX(score, v);          break;
1089                 case 1: score   += FFABS(v);                  break;
1090                 case 2: score   += v * v;                     break;
1091                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1092                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1093                 }
1094             }
1095         }
1096     }
1097
1098     if (score)
1099         score64 = score;
1100
1101     if (score64 < s->avctx->frame_skip_threshold)
1102         return 1;
1103     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1104         return 1;
1105     return 0;
1106 }
1107
1108 static int estimate_best_b_count(MpegEncContext *s)
1109 {
1110     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1111     AVCodecContext *c = avcodec_alloc_context3(NULL);
1112     AVFrame input[FF_MAX_B_FRAMES + 2];
1113     const int scale = s->avctx->brd_scale;
1114     int i, j, out_size, p_lambda, b_lambda, lambda2;
1115     int outbuf_size  = s->width * s->height; // FIXME
1116     uint8_t *outbuf  = av_malloc(outbuf_size);
1117     int64_t best_rd  = INT64_MAX;
1118     int best_b_count = -1;
1119
1120     assert(scale >= 0 && scale <= 3);
1121
1122     //emms_c();
1123     //s->next_picture_ptr->quality;
1124     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1125     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1126     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1127     if (!b_lambda) // FIXME we should do this somewhere else
1128         b_lambda = p_lambda;
1129     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1130                FF_LAMBDA_SHIFT;
1131
1132     c->width        = s->width  >> scale;
1133     c->height       = s->height >> scale;
1134     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1135                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1136     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1137     c->mb_decision  = s->avctx->mb_decision;
1138     c->me_cmp       = s->avctx->me_cmp;
1139     c->mb_cmp       = s->avctx->mb_cmp;
1140     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1141     c->pix_fmt      = PIX_FMT_YUV420P;
1142     c->time_base    = s->avctx->time_base;
1143     c->max_b_frames = s->max_b_frames;
1144
1145     if (avcodec_open2(c, codec, NULL) < 0)
1146         return -1;
1147
1148     for (i = 0; i < s->max_b_frames + 2; i++) {
1149         int ysize = c->width * c->height;
1150         int csize = (c->width / 2) * (c->height / 2);
1151         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1152                                                 s->next_picture_ptr;
1153
1154         avcodec_get_frame_defaults(&input[i]);
1155         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1156         input[i].data[1]     = input[i].data[0] + ysize;
1157         input[i].data[2]     = input[i].data[1] + csize;
1158         input[i].linesize[0] = c->width;
1159         input[i].linesize[1] =
1160         input[i].linesize[2] = c->width / 2;
1161
1162         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1163             pre_input = *pre_input_ptr;
1164
1165             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1166                 pre_input.f.data[0] += INPLACE_OFFSET;
1167                 pre_input.f.data[1] += INPLACE_OFFSET;
1168                 pre_input.f.data[2] += INPLACE_OFFSET;
1169             }
1170
1171             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1172                                  pre_input.f.data[0], pre_input.f.linesize[0],
1173                                  c->width,      c->height);
1174             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1175                                  pre_input.f.data[1], pre_input.f.linesize[1],
1176                                  c->width >> 1, c->height >> 1);
1177             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1178                                  pre_input.f.data[2], pre_input.f.linesize[2],
1179                                  c->width >> 1, c->height >> 1);
1180         }
1181     }
1182
1183     for (j = 0; j < s->max_b_frames + 1; j++) {
1184         int64_t rd = 0;
1185
1186         if (!s->input_picture[j])
1187             break;
1188
1189         c->error[0] = c->error[1] = c->error[2] = 0;
1190
1191         input[0].pict_type = AV_PICTURE_TYPE_I;
1192         input[0].quality   = 1 * FF_QP2LAMBDA;
1193         out_size           = avcodec_encode_video(c, outbuf,
1194                                                   outbuf_size, &input[0]);
1195         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1196
1197         for (i = 0; i < s->max_b_frames + 1; i++) {
1198             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1199
1200             input[i + 1].pict_type = is_p ?
1201                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1202             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1203             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1204                                             &input[i + 1]);
1205             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1206         }
1207
1208         /* get the delayed frames */
1209         while (out_size) {
1210             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1211             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1212         }
1213
1214         rd += c->error[0] + c->error[1] + c->error[2];
1215
1216         if (rd < best_rd) {
1217             best_rd = rd;
1218             best_b_count = j;
1219         }
1220     }
1221
1222     av_freep(&outbuf);
1223     avcodec_close(c);
1224     av_freep(&c);
1225
1226     for (i = 0; i < s->max_b_frames + 2; i++) {
1227         av_freep(&input[i].data[0]);
1228     }
1229
1230     return best_b_count;
1231 }
1232
1233 static int select_input_picture(MpegEncContext *s)
1234 {
1235     int i;
1236
1237     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1238         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1239     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1240
1241     /* set next picture type & ordering */
1242     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1243         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1244             s->next_picture_ptr == NULL || s->intra_only) {
1245             s->reordered_input_picture[0] = s->input_picture[0];
1246             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1247             s->reordered_input_picture[0]->f.coded_picture_number =
1248                 s->coded_picture_number++;
1249         } else {
1250             int b_frames;
1251
1252             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1253                 if (s->picture_in_gop_number < s->gop_size &&
1254                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1255                     // FIXME check that te gop check above is +-1 correct
1256                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1257                     //       s->input_picture[0]->f.data[0],
1258                     //       s->input_picture[0]->pts);
1259
1260                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1261                         for (i = 0; i < 4; i++)
1262                             s->input_picture[0]->f.data[i] = NULL;
1263                         s->input_picture[0]->f.type = 0;
1264                     } else {
1265                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1266                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1267
1268                         s->avctx->release_buffer(s->avctx,
1269                                                  (AVFrame *) s->input_picture[0]);
1270                     }
1271
1272                     emms_c();
1273                     ff_vbv_update(s, 0);
1274
1275                     goto no_output_pic;
1276                 }
1277             }
1278
1279             if (s->flags & CODEC_FLAG_PASS2) {
1280                 for (i = 0; i < s->max_b_frames + 1; i++) {
1281                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1282
1283                     if (pict_num >= s->rc_context.num_entries)
1284                         break;
1285                     if (!s->input_picture[i]) {
1286                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1287                         break;
1288                     }
1289
1290                     s->input_picture[i]->f.pict_type =
1291                         s->rc_context.entry[pict_num].new_pict_type;
1292                 }
1293             }
1294
1295             if (s->avctx->b_frame_strategy == 0) {
1296                 b_frames = s->max_b_frames;
1297                 while (b_frames && !s->input_picture[b_frames])
1298                     b_frames--;
1299             } else if (s->avctx->b_frame_strategy == 1) {
1300                 for (i = 1; i < s->max_b_frames + 1; i++) {
1301                     if (s->input_picture[i] &&
1302                         s->input_picture[i]->b_frame_score == 0) {
1303                         s->input_picture[i]->b_frame_score =
1304                             get_intra_count(s,
1305                                             s->input_picture[i    ]->f.data[0],
1306                                             s->input_picture[i - 1]->f.data[0],
1307                                             s->linesize) + 1;
1308                     }
1309                 }
1310                 for (i = 0; i < s->max_b_frames + 1; i++) {
1311                     if (s->input_picture[i] == NULL ||
1312                         s->input_picture[i]->b_frame_score - 1 >
1313                             s->mb_num / s->avctx->b_sensitivity)
1314                         break;
1315                 }
1316
1317                 b_frames = FFMAX(0, i - 1);
1318
1319                 /* reset scores */
1320                 for (i = 0; i < b_frames + 1; i++) {
1321                     s->input_picture[i]->b_frame_score = 0;
1322                 }
1323             } else if (s->avctx->b_frame_strategy == 2) {
1324                 b_frames = estimate_best_b_count(s);
1325             } else {
1326                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1327                 b_frames = 0;
1328             }
1329
1330             emms_c();
1331             //static int b_count = 0;
1332             //b_count += b_frames;
1333             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1334
1335             for (i = b_frames - 1; i >= 0; i--) {
1336                 int type = s->input_picture[i]->f.pict_type;
1337                 if (type && type != AV_PICTURE_TYPE_B)
1338                     b_frames = i;
1339             }
1340             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1341                 b_frames == s->max_b_frames) {
1342                 av_log(s->avctx, AV_LOG_ERROR,
1343                        "warning, too many b frames in a row\n");
1344             }
1345
1346             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1347                 if ((s->flags2 & CODEC_FLAG2_STRICT_GOP) &&
1348                     s->gop_size > s->picture_in_gop_number) {
1349                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1350                 } else {
1351                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1352                         b_frames = 0;
1353                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1354                 }
1355             }
1356
1357             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1358                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1359                 b_frames--;
1360
1361             s->reordered_input_picture[0] = s->input_picture[b_frames];
1362             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1363                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1364             s->reordered_input_picture[0]->f.coded_picture_number =
1365                 s->coded_picture_number++;
1366             for (i = 0; i < b_frames; i++) {
1367                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1368                 s->reordered_input_picture[i + 1]->f.pict_type =
1369                     AV_PICTURE_TYPE_B;
1370                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1371                     s->coded_picture_number++;
1372             }
1373         }
1374     }
1375 no_output_pic:
1376     if (s->reordered_input_picture[0]) {
1377         s->reordered_input_picture[0]->f.reference =
1378            s->reordered_input_picture[0]->f.pict_type !=
1379                AV_PICTURE_TYPE_B ? 3 : 0;
1380
1381         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1382
1383         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1384             s->avctx->rc_buffer_size) {
1385             // input is a shared pix, so we can't modifiy it -> alloc a new
1386             // one & ensure that the shared one is reuseable
1387
1388             Picture *pic;
1389             int i = ff_find_unused_picture(s, 0);
1390             if (i < 0)
1391                 return i;
1392             pic = &s->picture[i];
1393
1394             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1395             if (ff_alloc_picture(s, pic, 0) < 0) {
1396                 return -1;
1397             }
1398
1399             /* mark us unused / free shared pic */
1400             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1401                 s->avctx->release_buffer(s->avctx,
1402                                          (AVFrame *) s->reordered_input_picture[0]);
1403             for (i = 0; i < 4; i++)
1404                 s->reordered_input_picture[0]->f.data[i] = NULL;
1405             s->reordered_input_picture[0]->f.type = 0;
1406
1407             copy_picture_attributes(s, (AVFrame *) pic,
1408                                     (AVFrame *) s->reordered_input_picture[0]);
1409
1410             s->current_picture_ptr = pic;
1411         } else {
1412             // input is not a shared pix -> reuse buffer for current_pix
1413
1414             assert(s->reordered_input_picture[0]->f.type ==
1415                        FF_BUFFER_TYPE_USER ||
1416                    s->reordered_input_picture[0]->f.type ==
1417                        FF_BUFFER_TYPE_INTERNAL);
1418
1419             s->current_picture_ptr = s->reordered_input_picture[0];
1420             for (i = 0; i < 4; i++) {
1421                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1422             }
1423         }
1424         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1425
1426         s->picture_number = s->new_picture.f.display_picture_number;
1427         //printf("dpn:%d\n", s->picture_number);
1428     } else {
1429         memset(&s->new_picture, 0, sizeof(Picture));
1430     }
1431     return 0;
1432 }
1433
1434 int MPV_encode_picture(AVCodecContext *avctx,
1435                        unsigned char *buf, int buf_size, void *data)
1436 {
1437     MpegEncContext *s = avctx->priv_data;
1438     AVFrame *pic_arg  = data;
1439     int i, stuffing_count;
1440     int context_count = s->slice_context_count;
1441
1442     for (i = 0; i < context_count; i++) {
1443         int start_y = s->thread_context[i]->start_mb_y;
1444         int   end_y = s->thread_context[i]->  end_mb_y;
1445         int h       = s->mb_height;
1446         uint8_t *start = buf + (size_t)(((int64_t) buf_size) * start_y / h);
1447         uint8_t *end   = buf + (size_t)(((int64_t) buf_size) *   end_y / h);
1448
1449         init_put_bits(&s->thread_context[i]->pb, start, end - start);
1450     }
1451
1452     s->picture_in_gop_number++;
1453
1454     if (load_input_picture(s, pic_arg) < 0)
1455         return -1;
1456
1457     if (select_input_picture(s) < 0) {
1458         return -1;
1459     }
1460
1461     /* output? */
1462     if (s->new_picture.f.data[0]) {
1463         s->pict_type = s->new_picture.f.pict_type;
1464         //emms_c();
1465         //printf("qs:%f %f %d\n", s->new_picture.quality,
1466         //       s->current_picture.quality, s->qscale);
1467         MPV_frame_start(s, avctx);
1468 vbv_retry:
1469         if (encode_picture(s, s->picture_number) < 0)
1470             return -1;
1471
1472         avctx->header_bits = s->header_bits;
1473         avctx->mv_bits     = s->mv_bits;
1474         avctx->misc_bits   = s->misc_bits;
1475         avctx->i_tex_bits  = s->i_tex_bits;
1476         avctx->p_tex_bits  = s->p_tex_bits;
1477         avctx->i_count     = s->i_count;
1478         // FIXME f/b_count in avctx
1479         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1480         avctx->skip_count  = s->skip_count;
1481
1482         MPV_frame_end(s);
1483
1484         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1485             ff_mjpeg_encode_picture_trailer(s);
1486
1487         if (avctx->rc_buffer_size) {
1488             RateControlContext *rcc = &s->rc_context;
1489             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1490
1491             if (put_bits_count(&s->pb) > max_size &&
1492                 s->lambda < s->avctx->lmax) {
1493                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1494                                        (s->qscale + 1) / s->qscale);
1495                 if (s->adaptive_quant) {
1496                     int i;
1497                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1498                         s->lambda_table[i] =
1499                             FFMAX(s->lambda_table[i] + 1,
1500                                   s->lambda_table[i] * (s->qscale + 1) /
1501                                   s->qscale);
1502                 }
1503                 s->mb_skipped = 0;        // done in MPV_frame_start()
1504                 // done in encode_picture() so we must undo it
1505                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1506                     if (s->flipflop_rounding          ||
1507                         s->codec_id == CODEC_ID_H263P ||
1508                         s->codec_id == CODEC_ID_MPEG4)
1509                         s->no_rounding ^= 1;
1510                 }
1511                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1512                     s->time_base       = s->last_time_base;
1513                     s->last_non_b_time = s->time - s->pp_time;
1514                 }
1515                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1516                 for (i = 0; i < context_count; i++) {
1517                     PutBitContext *pb = &s->thread_context[i]->pb;
1518                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1519                 }
1520                 goto vbv_retry;
1521             }
1522
1523             assert(s->avctx->rc_max_rate);
1524         }
1525
1526         if (s->flags & CODEC_FLAG_PASS1)
1527             ff_write_pass1_stats(s);
1528
1529         for (i = 0; i < 4; i++) {
1530             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1531             avctx->error[i] += s->current_picture_ptr->f.error[i];
1532         }
1533
1534         if (s->flags & CODEC_FLAG_PASS1)
1535             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1536                    avctx->i_tex_bits + avctx->p_tex_bits ==
1537                        put_bits_count(&s->pb));
1538         flush_put_bits(&s->pb);
1539         s->frame_bits  = put_bits_count(&s->pb);
1540
1541         stuffing_count = ff_vbv_update(s, s->frame_bits);
1542         if (stuffing_count) {
1543             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1544                     stuffing_count + 50) {
1545                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1546                 return -1;
1547             }
1548
1549             switch (s->codec_id) {
1550             case CODEC_ID_MPEG1VIDEO:
1551             case CODEC_ID_MPEG2VIDEO:
1552                 while (stuffing_count--) {
1553                     put_bits(&s->pb, 8, 0);
1554                 }
1555             break;
1556             case CODEC_ID_MPEG4:
1557                 put_bits(&s->pb, 16, 0);
1558                 put_bits(&s->pb, 16, 0x1C3);
1559                 stuffing_count -= 4;
1560                 while (stuffing_count--) {
1561                     put_bits(&s->pb, 8, 0xFF);
1562                 }
1563             break;
1564             default:
1565                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1566             }
1567             flush_put_bits(&s->pb);
1568             s->frame_bits  = put_bits_count(&s->pb);
1569         }
1570
1571         /* update mpeg1/2 vbv_delay for CBR */
1572         if (s->avctx->rc_max_rate                          &&
1573             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1574             s->out_format == FMT_MPEG1                     &&
1575             90000LL * (avctx->rc_buffer_size - 1) <=
1576                 s->avctx->rc_max_rate * 0xFFFFLL) {
1577             int vbv_delay, min_delay;
1578             double inbits  = s->avctx->rc_max_rate *
1579                              av_q2d(s->avctx->time_base);
1580             int    minbits = s->frame_bits - 8 *
1581                              (s->vbv_delay_ptr - s->pb.buf - 1);
1582             double bits    = s->rc_context.buffer_index + minbits - inbits;
1583
1584             if (bits < 0)
1585                 av_log(s->avctx, AV_LOG_ERROR,
1586                        "Internal error, negative bits\n");
1587
1588             assert(s->repeat_first_field == 0);
1589
1590             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1591             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1592                         s->avctx->rc_max_rate;
1593
1594             vbv_delay = FFMAX(vbv_delay, min_delay);
1595
1596             assert(vbv_delay < 0xFFFF);
1597
1598             s->vbv_delay_ptr[0] &= 0xF8;
1599             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1600             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1601             s->vbv_delay_ptr[2] &= 0x07;
1602             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1603             avctx->vbv_delay     = vbv_delay * 300;
1604         }
1605         s->total_bits     += s->frame_bits;
1606         avctx->frame_bits  = s->frame_bits;
1607     } else {
1608         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1609         s->frame_bits = 0;
1610     }
1611     assert((s->frame_bits & 7) == 0);
1612
1613     return s->frame_bits / 8;
1614 }
1615
1616 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1617                                                 int n, int threshold)
1618 {
1619     static const char tab[64] = {
1620         3, 2, 2, 1, 1, 1, 1, 1,
1621         1, 1, 1, 1, 1, 1, 1, 1,
1622         1, 1, 1, 1, 1, 1, 1, 1,
1623         0, 0, 0, 0, 0, 0, 0, 0,
1624         0, 0, 0, 0, 0, 0, 0, 0,
1625         0, 0, 0, 0, 0, 0, 0, 0,
1626         0, 0, 0, 0, 0, 0, 0, 0,
1627         0, 0, 0, 0, 0, 0, 0, 0
1628     };
1629     int score = 0;
1630     int run = 0;
1631     int i;
1632     DCTELEM *block = s->block[n];
1633     const int last_index = s->block_last_index[n];
1634     int skip_dc;
1635
1636     if (threshold < 0) {
1637         skip_dc = 0;
1638         threshold = -threshold;
1639     } else
1640         skip_dc = 1;
1641
1642     /* Are all we could set to zero already zero? */
1643     if (last_index <= skip_dc - 1)
1644         return;
1645
1646     for (i = 0; i <= last_index; i++) {
1647         const int j = s->intra_scantable.permutated[i];
1648         const int level = FFABS(block[j]);
1649         if (level == 1) {
1650             if (skip_dc && i == 0)
1651                 continue;
1652             score += tab[run];
1653             run = 0;
1654         } else if (level > 1) {
1655             return;
1656         } else {
1657             run++;
1658         }
1659     }
1660     if (score >= threshold)
1661         return;
1662     for (i = skip_dc; i <= last_index; i++) {
1663         const int j = s->intra_scantable.permutated[i];
1664         block[j] = 0;
1665     }
1666     if (block[0])
1667         s->block_last_index[n] = 0;
1668     else
1669         s->block_last_index[n] = -1;
1670 }
1671
1672 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1673                                int last_index)
1674 {
1675     int i;
1676     const int maxlevel = s->max_qcoeff;
1677     const int minlevel = s->min_qcoeff;
1678     int overflow = 0;
1679
1680     if (s->mb_intra) {
1681         i = 1; // skip clipping of intra dc
1682     } else
1683         i = 0;
1684
1685     for (; i <= last_index; i++) {
1686         const int j = s->intra_scantable.permutated[i];
1687         int level = block[j];
1688
1689         if (level > maxlevel) {
1690             level = maxlevel;
1691             overflow++;
1692         } else if (level < minlevel) {
1693             level = minlevel;
1694             overflow++;
1695         }
1696
1697         block[j] = level;
1698     }
1699
1700     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1701         av_log(s->avctx, AV_LOG_INFO,
1702                "warning, clipping %d dct coefficients to %d..%d\n",
1703                overflow, minlevel, maxlevel);
1704 }
1705
1706 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1707 {
1708     int x, y;
1709     // FIXME optimize
1710     for (y = 0; y < 8; y++) {
1711         for (x = 0; x < 8; x++) {
1712             int x2, y2;
1713             int sum = 0;
1714             int sqr = 0;
1715             int count = 0;
1716
1717             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1718                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1719                     int v = ptr[x2 + y2 * stride];
1720                     sum += v;
1721                     sqr += v * v;
1722                     count++;
1723                 }
1724             }
1725             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1726         }
1727     }
1728 }
1729
1730 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1731                                                 int motion_x, int motion_y,
1732                                                 int mb_block_height,
1733                                                 int mb_block_count)
1734 {
1735     int16_t weight[8][64];
1736     DCTELEM orig[8][64];
1737     const int mb_x = s->mb_x;
1738     const int mb_y = s->mb_y;
1739     int i;
1740     int skip_dct[8];
1741     int dct_offset = s->linesize * 8; // default for progressive frames
1742     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1743     int wrap_y, wrap_c;
1744
1745     for (i = 0; i < mb_block_count; i++)
1746         skip_dct[i] = s->skipdct;
1747
1748     if (s->adaptive_quant) {
1749         const int last_qp = s->qscale;
1750         const int mb_xy = mb_x + mb_y * s->mb_stride;
1751
1752         s->lambda = s->lambda_table[mb_xy];
1753         update_qscale(s);
1754
1755         if (!(s->flags & CODEC_FLAG_QP_RD)) {
1756             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1757             s->dquant = s->qscale - last_qp;
1758
1759             if (s->out_format == FMT_H263) {
1760                 s->dquant = av_clip(s->dquant, -2, 2);
1761
1762                 if (s->codec_id == CODEC_ID_MPEG4) {
1763                     if (!s->mb_intra) {
1764                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1765                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1766                                 s->dquant = 0;
1767                         }
1768                         if (s->mv_type == MV_TYPE_8X8)
1769                             s->dquant = 0;
1770                     }
1771                 }
1772             }
1773         }
1774         ff_set_qscale(s, last_qp + s->dquant);
1775     } else if (s->flags & CODEC_FLAG_QP_RD)
1776         ff_set_qscale(s, s->qscale + s->dquant);
1777
1778     wrap_y = s->linesize;
1779     wrap_c = s->uvlinesize;
1780     ptr_y  = s->new_picture.f.data[0] +
1781              (mb_y * 16 * wrap_y)              + mb_x * 16;
1782     ptr_cb = s->new_picture.f.data[1] +
1783              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1784     ptr_cr = s->new_picture.f.data[2] +
1785              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1786
1787     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1788         uint8_t *ebuf = s->edge_emu_buffer + 32;
1789         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1790                                 mb_y * 16, s->width, s->height);
1791         ptr_y = ebuf;
1792         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1793                                 mb_block_height, mb_x * 8, mb_y * 8,
1794                                 s->width >> 1, s->height >> 1);
1795         ptr_cb = ebuf + 18 * wrap_y;
1796         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1797                                 mb_block_height, mb_x * 8, mb_y * 8,
1798                                 s->width >> 1, s->height >> 1);
1799         ptr_cr = ebuf + 18 * wrap_y + 8;
1800     }
1801
1802     if (s->mb_intra) {
1803         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1804             int progressive_score, interlaced_score;
1805
1806             s->interlaced_dct = 0;
1807             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1808                                                     NULL, wrap_y, 8) +
1809                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1810                                                     NULL, wrap_y, 8) - 400;
1811
1812             if (progressive_score > 0) {
1813                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1814                                                        NULL, wrap_y * 2, 8) +
1815                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1816                                                        NULL, wrap_y * 2, 8);
1817                 if (progressive_score > interlaced_score) {
1818                     s->interlaced_dct = 1;
1819
1820                     dct_offset = wrap_y;
1821                     wrap_y <<= 1;
1822                     if (s->chroma_format == CHROMA_422)
1823                         wrap_c <<= 1;
1824                 }
1825             }
1826         }
1827
1828         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1829         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1830         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1831         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1832
1833         if (s->flags & CODEC_FLAG_GRAY) {
1834             skip_dct[4] = 1;
1835             skip_dct[5] = 1;
1836         } else {
1837             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1838             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1839             if (!s->chroma_y_shift) { /* 422 */
1840                 s->dsp.get_pixels(s->block[6],
1841                                   ptr_cb + (dct_offset >> 1), wrap_c);
1842                 s->dsp.get_pixels(s->block[7],
1843                                   ptr_cr + (dct_offset >> 1), wrap_c);
1844             }
1845         }
1846     } else {
1847         op_pixels_func (*op_pix)[4];
1848         qpel_mc_func (*op_qpix)[16];
1849         uint8_t *dest_y, *dest_cb, *dest_cr;
1850
1851         dest_y  = s->dest[0];
1852         dest_cb = s->dest[1];
1853         dest_cr = s->dest[2];
1854
1855         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1856             op_pix  = s->dsp.put_pixels_tab;
1857             op_qpix = s->dsp.put_qpel_pixels_tab;
1858         } else {
1859             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1860             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1861         }
1862
1863         if (s->mv_dir & MV_DIR_FORWARD) {
1864             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1865                        op_pix, op_qpix);
1866             op_pix  = s->dsp.avg_pixels_tab;
1867             op_qpix = s->dsp.avg_qpel_pixels_tab;
1868         }
1869         if (s->mv_dir & MV_DIR_BACKWARD) {
1870             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1871                        op_pix, op_qpix);
1872         }
1873
1874         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1875             int progressive_score, interlaced_score;
1876
1877             s->interlaced_dct = 0;
1878             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1879                                                     ptr_y,              wrap_y,
1880                                                     8) +
1881                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1882                                                     ptr_y + wrap_y * 8, wrap_y,
1883                                                     8) - 400;
1884
1885             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1886                 progressive_score -= 400;
1887
1888             if (progressive_score > 0) {
1889                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1890                                                        ptr_y,
1891                                                        wrap_y * 2, 8) +
1892                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1893                                                        ptr_y + wrap_y,
1894                                                        wrap_y * 2, 8);
1895
1896                 if (progressive_score > interlaced_score) {
1897                     s->interlaced_dct = 1;
1898
1899                     dct_offset = wrap_y;
1900                     wrap_y <<= 1;
1901                     if (s->chroma_format == CHROMA_422)
1902                         wrap_c <<= 1;
1903                 }
1904             }
1905         }
1906
1907         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1908         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1909         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1910                            dest_y + dct_offset, wrap_y);
1911         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1912                            dest_y + dct_offset + 8, wrap_y);
1913
1914         if (s->flags & CODEC_FLAG_GRAY) {
1915             skip_dct[4] = 1;
1916             skip_dct[5] = 1;
1917         } else {
1918             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1919             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1920             if (!s->chroma_y_shift) { /* 422 */
1921                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1922                                    dest_cb + (dct_offset >> 1), wrap_c);
1923                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1924                                    dest_cr + (dct_offset >> 1), wrap_c);
1925             }
1926         }
1927         /* pre quantization */
1928         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1929                 2 * s->qscale * s->qscale) {
1930             // FIXME optimize
1931             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1932                               wrap_y, 8) < 20 * s->qscale)
1933                 skip_dct[0] = 1;
1934             if (s->dsp.sad[1](NULL, ptr_y + 8,
1935                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1936                 skip_dct[1] = 1;
1937             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1938                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1939                 skip_dct[2] = 1;
1940             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1941                               dest_y + dct_offset + 8,
1942                               wrap_y, 8) < 20 * s->qscale)
1943                 skip_dct[3] = 1;
1944             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1945                               wrap_c, 8) < 20 * s->qscale)
1946                 skip_dct[4] = 1;
1947             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1948                               wrap_c, 8) < 20 * s->qscale)
1949                 skip_dct[5] = 1;
1950             if (!s->chroma_y_shift) { /* 422 */
1951                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1952                                   dest_cb + (dct_offset >> 1),
1953                                   wrap_c, 8) < 20 * s->qscale)
1954                     skip_dct[6] = 1;
1955                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1956                                   dest_cr + (dct_offset >> 1),
1957                                   wrap_c, 8) < 20 * s->qscale)
1958                     skip_dct[7] = 1;
1959             }
1960         }
1961     }
1962
1963     if (s->avctx->quantizer_noise_shaping) {
1964         if (!skip_dct[0])
1965             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1966         if (!skip_dct[1])
1967             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1968         if (!skip_dct[2])
1969             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1970         if (!skip_dct[3])
1971             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1972         if (!skip_dct[4])
1973             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1974         if (!skip_dct[5])
1975             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1976         if (!s->chroma_y_shift) { /* 422 */
1977             if (!skip_dct[6])
1978                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1979                                   wrap_c);
1980             if (!skip_dct[7])
1981                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1982                                   wrap_c);
1983         }
1984         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1985     }
1986
1987     /* DCT & quantize */
1988     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1989     {
1990         for (i = 0; i < mb_block_count; i++) {
1991             if (!skip_dct[i]) {
1992                 int overflow;
1993                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1994                 // FIXME we could decide to change to quantizer instead of
1995                 // clipping
1996                 // JS: I don't think that would be a good idea it could lower
1997                 //     quality instead of improve it. Just INTRADC clipping
1998                 //     deserves changes in quantizer
1999                 if (overflow)
2000                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2001             } else
2002                 s->block_last_index[i] = -1;
2003         }
2004         if (s->avctx->quantizer_noise_shaping) {
2005             for (i = 0; i < mb_block_count; i++) {
2006                 if (!skip_dct[i]) {
2007                     s->block_last_index[i] =
2008                         dct_quantize_refine(s, s->block[i], weight[i],
2009                                             orig[i], i, s->qscale);
2010                 }
2011             }
2012         }
2013
2014         if (s->luma_elim_threshold && !s->mb_intra)
2015             for (i = 0; i < 4; i++)
2016                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2017         if (s->chroma_elim_threshold && !s->mb_intra)
2018             for (i = 4; i < mb_block_count; i++)
2019                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2020
2021         if (s->flags & CODEC_FLAG_CBP_RD) {
2022             for (i = 0; i < mb_block_count; i++) {
2023                 if (s->block_last_index[i] == -1)
2024                     s->coded_score[i] = INT_MAX / 256;
2025             }
2026         }
2027     }
2028
2029     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2030         s->block_last_index[4] =
2031         s->block_last_index[5] = 0;
2032         s->block[4][0] =
2033         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2034     }
2035
2036     // non c quantize code returns incorrect block_last_index FIXME
2037     if (s->alternate_scan && s->dct_quantize != dct_quantize_c) {
2038         for (i = 0; i < mb_block_count; i++) {
2039             int j;
2040             if (s->block_last_index[i] > 0) {
2041                 for (j = 63; j > 0; j--) {
2042                     if (s->block[i][s->intra_scantable.permutated[j]])
2043                         break;
2044                 }
2045                 s->block_last_index[i] = j;
2046             }
2047         }
2048     }
2049
2050     /* huffman encode */
2051     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2052     case CODEC_ID_MPEG1VIDEO:
2053     case CODEC_ID_MPEG2VIDEO:
2054         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2055             mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2056         break;
2057     case CODEC_ID_MPEG4:
2058         if (CONFIG_MPEG4_ENCODER)
2059             mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2060         break;
2061     case CODEC_ID_MSMPEG4V2:
2062     case CODEC_ID_MSMPEG4V3:
2063     case CODEC_ID_WMV1:
2064         if (CONFIG_MSMPEG4_ENCODER)
2065             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2066         break;
2067     case CODEC_ID_WMV2:
2068         if (CONFIG_WMV2_ENCODER)
2069             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2070         break;
2071     case CODEC_ID_H261:
2072         if (CONFIG_H261_ENCODER)
2073             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2074         break;
2075     case CODEC_ID_H263:
2076     case CODEC_ID_H263P:
2077     case CODEC_ID_FLV1:
2078     case CODEC_ID_RV10:
2079     case CODEC_ID_RV20:
2080         if (CONFIG_H263_ENCODER)
2081             h263_encode_mb(s, s->block, motion_x, motion_y);
2082         break;
2083     case CODEC_ID_MJPEG:
2084         if (CONFIG_MJPEG_ENCODER)
2085             ff_mjpeg_encode_mb(s, s->block);
2086         break;
2087     default:
2088         assert(0);
2089     }
2090 }
2091
2092 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2093 {
2094     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2095     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2096 }
2097
2098 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2099     int i;
2100
2101     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2102
2103     /* mpeg1 */
2104     d->mb_skip_run= s->mb_skip_run;
2105     for(i=0; i<3; i++)
2106         d->last_dc[i] = s->last_dc[i];
2107
2108     /* statistics */
2109     d->mv_bits= s->mv_bits;
2110     d->i_tex_bits= s->i_tex_bits;
2111     d->p_tex_bits= s->p_tex_bits;
2112     d->i_count= s->i_count;
2113     d->f_count= s->f_count;
2114     d->b_count= s->b_count;
2115     d->skip_count= s->skip_count;
2116     d->misc_bits= s->misc_bits;
2117     d->last_bits= 0;
2118
2119     d->mb_skipped= 0;
2120     d->qscale= s->qscale;
2121     d->dquant= s->dquant;
2122
2123     d->esc3_level_length= s->esc3_level_length;
2124 }
2125
2126 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2127     int i;
2128
2129     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2130     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2131
2132     /* mpeg1 */
2133     d->mb_skip_run= s->mb_skip_run;
2134     for(i=0; i<3; i++)
2135         d->last_dc[i] = s->last_dc[i];
2136
2137     /* statistics */
2138     d->mv_bits= s->mv_bits;
2139     d->i_tex_bits= s->i_tex_bits;
2140     d->p_tex_bits= s->p_tex_bits;
2141     d->i_count= s->i_count;
2142     d->f_count= s->f_count;
2143     d->b_count= s->b_count;
2144     d->skip_count= s->skip_count;
2145     d->misc_bits= s->misc_bits;
2146
2147     d->mb_intra= s->mb_intra;
2148     d->mb_skipped= s->mb_skipped;
2149     d->mv_type= s->mv_type;
2150     d->mv_dir= s->mv_dir;
2151     d->pb= s->pb;
2152     if(s->data_partitioning){
2153         d->pb2= s->pb2;
2154         d->tex_pb= s->tex_pb;
2155     }
2156     d->block= s->block;
2157     for(i=0; i<8; i++)
2158         d->block_last_index[i]= s->block_last_index[i];
2159     d->interlaced_dct= s->interlaced_dct;
2160     d->qscale= s->qscale;
2161
2162     d->esc3_level_length= s->esc3_level_length;
2163 }
2164
2165 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2166                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2167                            int *dmin, int *next_block, int motion_x, int motion_y)
2168 {
2169     int score;
2170     uint8_t *dest_backup[3];
2171
2172     copy_context_before_encode(s, backup, type);
2173
2174     s->block= s->blocks[*next_block];
2175     s->pb= pb[*next_block];
2176     if(s->data_partitioning){
2177         s->pb2   = pb2   [*next_block];
2178         s->tex_pb= tex_pb[*next_block];
2179     }
2180
2181     if(*next_block){
2182         memcpy(dest_backup, s->dest, sizeof(s->dest));
2183         s->dest[0] = s->rd_scratchpad;
2184         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2185         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2186         assert(s->linesize >= 32); //FIXME
2187     }
2188
2189     encode_mb(s, motion_x, motion_y);
2190
2191     score= put_bits_count(&s->pb);
2192     if(s->data_partitioning){
2193         score+= put_bits_count(&s->pb2);
2194         score+= put_bits_count(&s->tex_pb);
2195     }
2196
2197     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2198         MPV_decode_mb(s, s->block);
2199
2200         score *= s->lambda2;
2201         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2202     }
2203
2204     if(*next_block){
2205         memcpy(s->dest, dest_backup, sizeof(s->dest));
2206     }
2207
2208     if(score<*dmin){
2209         *dmin= score;
2210         *next_block^=1;
2211
2212         copy_context_after_encode(best, s, type);
2213     }
2214 }
2215
2216 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2217     uint32_t *sq = ff_squareTbl + 256;
2218     int acc=0;
2219     int x,y;
2220
2221     if(w==16 && h==16)
2222         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2223     else if(w==8 && h==8)
2224         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2225
2226     for(y=0; y<h; y++){
2227         for(x=0; x<w; x++){
2228             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2229         }
2230     }
2231
2232     assert(acc>=0);
2233
2234     return acc;
2235 }
2236
2237 static int sse_mb(MpegEncContext *s){
2238     int w= 16;
2239     int h= 16;
2240
2241     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2242     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2243
2244     if(w==16 && h==16)
2245       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2246         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2247                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2248                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2249       }else{
2250         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2251                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2252                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2253       }
2254     else
2255         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2256                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2257                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2258 }
2259
2260 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2261     MpegEncContext *s= *(void**)arg;
2262
2263
2264     s->me.pre_pass=1;
2265     s->me.dia_size= s->avctx->pre_dia_size;
2266     s->first_slice_line=1;
2267     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2268         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2269             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2270         }
2271         s->first_slice_line=0;
2272     }
2273
2274     s->me.pre_pass=0;
2275
2276     return 0;
2277 }
2278
2279 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2280     MpegEncContext *s= *(void**)arg;
2281
2282     ff_check_alignment();
2283
2284     s->me.dia_size= s->avctx->dia_size;
2285     s->first_slice_line=1;
2286     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2287         s->mb_x=0; //for block init below
2288         ff_init_block_index(s);
2289         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2290             s->block_index[0]+=2;
2291             s->block_index[1]+=2;
2292             s->block_index[2]+=2;
2293             s->block_index[3]+=2;
2294
2295             /* compute motion vector & mb_type and store in context */
2296             if(s->pict_type==AV_PICTURE_TYPE_B)
2297                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2298             else
2299                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2300         }
2301         s->first_slice_line=0;
2302     }
2303     return 0;
2304 }
2305
2306 static int mb_var_thread(AVCodecContext *c, void *arg){
2307     MpegEncContext *s= *(void**)arg;
2308     int mb_x, mb_y;
2309
2310     ff_check_alignment();
2311
2312     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2313         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2314             int xx = mb_x * 16;
2315             int yy = mb_y * 16;
2316             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2317             int varc;
2318             int sum = s->dsp.pix_sum(pix, s->linesize);
2319
2320             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2321
2322             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2323             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2324             s->me.mb_var_sum_temp    += varc;
2325         }
2326     }
2327     return 0;
2328 }
2329
2330 static void write_slice_end(MpegEncContext *s){
2331     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2332         if(s->partitioned_frame){
2333             ff_mpeg4_merge_partitions(s);
2334         }
2335
2336         ff_mpeg4_stuffing(&s->pb);
2337     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2338         ff_mjpeg_encode_stuffing(&s->pb);
2339     }
2340
2341     avpriv_align_put_bits(&s->pb);
2342     flush_put_bits(&s->pb);
2343
2344     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2345         s->misc_bits+= get_bits_diff(s);
2346 }
2347
2348 static int encode_thread(AVCodecContext *c, void *arg){
2349     MpegEncContext *s= *(void**)arg;
2350     int mb_x, mb_y, pdif = 0;
2351     int chr_h= 16>>s->chroma_y_shift;
2352     int i, j;
2353     MpegEncContext best_s, backup_s;
2354     uint8_t bit_buf[2][MAX_MB_BYTES];
2355     uint8_t bit_buf2[2][MAX_MB_BYTES];
2356     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2357     PutBitContext pb[2], pb2[2], tex_pb[2];
2358 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2359
2360     ff_check_alignment();
2361
2362     for(i=0; i<2; i++){
2363         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2364         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2365         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2366     }
2367
2368     s->last_bits= put_bits_count(&s->pb);
2369     s->mv_bits=0;
2370     s->misc_bits=0;
2371     s->i_tex_bits=0;
2372     s->p_tex_bits=0;
2373     s->i_count=0;
2374     s->f_count=0;
2375     s->b_count=0;
2376     s->skip_count=0;
2377
2378     for(i=0; i<3; i++){
2379         /* init last dc values */
2380         /* note: quant matrix value (8) is implied here */
2381         s->last_dc[i] = 128 << s->intra_dc_precision;
2382
2383         s->current_picture.f.error[i] = 0;
2384     }
2385     s->mb_skip_run = 0;
2386     memset(s->last_mv, 0, sizeof(s->last_mv));
2387
2388     s->last_mv_dir = 0;
2389
2390     switch(s->codec_id){
2391     case CODEC_ID_H263:
2392     case CODEC_ID_H263P:
2393     case CODEC_ID_FLV1:
2394         if (CONFIG_H263_ENCODER)
2395             s->gob_index = ff_h263_get_gob_height(s);
2396         break;
2397     case CODEC_ID_MPEG4:
2398         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2399             ff_mpeg4_init_partitions(s);
2400         break;
2401     }
2402
2403     s->resync_mb_x=0;
2404     s->resync_mb_y=0;
2405     s->first_slice_line = 1;
2406     s->ptr_lastgob = s->pb.buf;
2407     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2408 //    printf("row %d at %X\n", s->mb_y, (int)s);
2409         s->mb_x=0;
2410         s->mb_y= mb_y;
2411
2412         ff_set_qscale(s, s->qscale);
2413         ff_init_block_index(s);
2414
2415         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2416             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2417             int mb_type= s->mb_type[xy];
2418 //            int d;
2419             int dmin= INT_MAX;
2420             int dir;
2421
2422             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2423                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2424                 return -1;
2425             }
2426             if(s->data_partitioning){
2427                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2428                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2429                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2430                     return -1;
2431                 }
2432             }
2433
2434             s->mb_x = mb_x;
2435             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2436             ff_update_block_index(s);
2437
2438             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2439                 ff_h261_reorder_mb_index(s);
2440                 xy= s->mb_y*s->mb_stride + s->mb_x;
2441                 mb_type= s->mb_type[xy];
2442             }
2443
2444             /* write gob / video packet header  */
2445             if(s->rtp_mode){
2446                 int current_packet_size, is_gob_start;
2447
2448                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2449
2450                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2451
2452                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2453
2454                 switch(s->codec_id){
2455                 case CODEC_ID_H263:
2456                 case CODEC_ID_H263P:
2457                     if(!s->h263_slice_structured)
2458                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2459                     break;
2460                 case CODEC_ID_MPEG2VIDEO:
2461                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2462                 case CODEC_ID_MPEG1VIDEO:
2463                     if(s->mb_skip_run) is_gob_start=0;
2464                     break;
2465                 }
2466
2467                 if(is_gob_start){
2468                     if(s->start_mb_y != mb_y || mb_x!=0){
2469                         write_slice_end(s);
2470
2471                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2472                             ff_mpeg4_init_partitions(s);
2473                         }
2474                     }
2475
2476                     assert((put_bits_count(&s->pb)&7) == 0);
2477                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2478
2479                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2480                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2481                         int d= 100 / s->avctx->error_rate;
2482                         if(r % d == 0){
2483                             current_packet_size=0;
2484                             s->pb.buf_ptr= s->ptr_lastgob;
2485                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2486                         }
2487                     }
2488
2489                     if (s->avctx->rtp_callback){
2490                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2491                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2492                     }
2493
2494                     switch(s->codec_id){
2495                     case CODEC_ID_MPEG4:
2496                         if (CONFIG_MPEG4_ENCODER) {
2497                             ff_mpeg4_encode_video_packet_header(s);
2498                             ff_mpeg4_clean_buffers(s);
2499                         }
2500                     break;
2501                     case CODEC_ID_MPEG1VIDEO:
2502                     case CODEC_ID_MPEG2VIDEO:
2503                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2504                             ff_mpeg1_encode_slice_header(s);
2505                             ff_mpeg1_clean_buffers(s);
2506                         }
2507                     break;
2508                     case CODEC_ID_H263:
2509                     case CODEC_ID_H263P:
2510                         if (CONFIG_H263_ENCODER)
2511                             h263_encode_gob_header(s, mb_y);
2512                     break;
2513                     }
2514
2515                     if(s->flags&CODEC_FLAG_PASS1){
2516                         int bits= put_bits_count(&s->pb);
2517                         s->misc_bits+= bits - s->last_bits;
2518                         s->last_bits= bits;
2519                     }
2520
2521                     s->ptr_lastgob += current_packet_size;
2522                     s->first_slice_line=1;
2523                     s->resync_mb_x=mb_x;
2524                     s->resync_mb_y=mb_y;
2525                 }
2526             }
2527
2528             if(  (s->resync_mb_x   == s->mb_x)
2529                && s->resync_mb_y+1 == s->mb_y){
2530                 s->first_slice_line=0;
2531             }
2532
2533             s->mb_skipped=0;
2534             s->dquant=0; //only for QP_RD
2535
2536             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
2537                 int next_block=0;
2538                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2539
2540                 copy_context_before_encode(&backup_s, s, -1);
2541                 backup_s.pb= s->pb;
2542                 best_s.data_partitioning= s->data_partitioning;
2543                 best_s.partitioned_frame= s->partitioned_frame;
2544                 if(s->data_partitioning){
2545                     backup_s.pb2= s->pb2;
2546                     backup_s.tex_pb= s->tex_pb;
2547                 }
2548
2549                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2550                     s->mv_dir = MV_DIR_FORWARD;
2551                     s->mv_type = MV_TYPE_16X16;
2552                     s->mb_intra= 0;
2553                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2554                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2555                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2556                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2557                 }
2558                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2559                     s->mv_dir = MV_DIR_FORWARD;
2560                     s->mv_type = MV_TYPE_FIELD;
2561                     s->mb_intra= 0;
2562                     for(i=0; i<2; i++){
2563                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2564                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2565                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2566                     }
2567                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2568                                  &dmin, &next_block, 0, 0);
2569                 }
2570                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2571                     s->mv_dir = MV_DIR_FORWARD;
2572                     s->mv_type = MV_TYPE_16X16;
2573                     s->mb_intra= 0;
2574                     s->mv[0][0][0] = 0;
2575                     s->mv[0][0][1] = 0;
2576                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2577                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2578                 }
2579                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2580                     s->mv_dir = MV_DIR_FORWARD;
2581                     s->mv_type = MV_TYPE_8X8;
2582                     s->mb_intra= 0;
2583                     for(i=0; i<4; i++){
2584                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2585                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2586                     }
2587                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2588                                  &dmin, &next_block, 0, 0);
2589                 }
2590                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2591                     s->mv_dir = MV_DIR_FORWARD;
2592                     s->mv_type = MV_TYPE_16X16;
2593                     s->mb_intra= 0;
2594                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2595                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2596                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2597                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2598                 }
2599                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2600                     s->mv_dir = MV_DIR_BACKWARD;
2601                     s->mv_type = MV_TYPE_16X16;
2602                     s->mb_intra= 0;
2603                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2604                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2605                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2606                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2607                 }
2608                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2609                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2610                     s->mv_type = MV_TYPE_16X16;
2611                     s->mb_intra= 0;
2612                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2613                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2614                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2615                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2616                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2617                                  &dmin, &next_block, 0, 0);
2618                 }
2619                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2620                     s->mv_dir = MV_DIR_FORWARD;
2621                     s->mv_type = MV_TYPE_FIELD;
2622                     s->mb_intra= 0;
2623                     for(i=0; i<2; i++){
2624                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2625                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2626                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2627                     }
2628                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2629                                  &dmin, &next_block, 0, 0);
2630                 }
2631                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2632                     s->mv_dir = MV_DIR_BACKWARD;
2633                     s->mv_type = MV_TYPE_FIELD;
2634                     s->mb_intra= 0;
2635                     for(i=0; i<2; i++){
2636                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2637                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2638                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2639                     }
2640                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2641                                  &dmin, &next_block, 0, 0);
2642                 }
2643                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2644                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2645                     s->mv_type = MV_TYPE_FIELD;
2646                     s->mb_intra= 0;
2647                     for(dir=0; dir<2; dir++){
2648                         for(i=0; i<2; i++){
2649                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2650                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2651                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2652                         }
2653                     }
2654                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2655                                  &dmin, &next_block, 0, 0);
2656                 }
2657                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2658                     s->mv_dir = 0;
2659                     s->mv_type = MV_TYPE_16X16;
2660                     s->mb_intra= 1;
2661                     s->mv[0][0][0] = 0;
2662                     s->mv[0][0][1] = 0;
2663                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2664                                  &dmin, &next_block, 0, 0);
2665                     if(s->h263_pred || s->h263_aic){
2666                         if(best_s.mb_intra)
2667                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2668                         else
2669                             ff_clean_intra_table_entries(s); //old mode?
2670                     }
2671                 }
2672
2673                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
2674                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2675                         const int last_qp= backup_s.qscale;
2676                         int qpi, qp, dc[6];
2677                         DCTELEM ac[6][16];
2678                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2679                         static const int dquant_tab[4]={-1,1,-2,2};
2680
2681                         assert(backup_s.dquant == 0);
2682
2683                         //FIXME intra
2684                         s->mv_dir= best_s.mv_dir;
2685                         s->mv_type = MV_TYPE_16X16;
2686                         s->mb_intra= best_s.mb_intra;
2687                         s->mv[0][0][0] = best_s.mv[0][0][0];
2688                         s->mv[0][0][1] = best_s.mv[0][0][1];
2689                         s->mv[1][0][0] = best_s.mv[1][0][0];
2690                         s->mv[1][0][1] = best_s.mv[1][0][1];
2691
2692                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2693                         for(; qpi<4; qpi++){
2694                             int dquant= dquant_tab[qpi];
2695                             qp= last_qp + dquant;
2696                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2697                                 continue;
2698                             backup_s.dquant= dquant;
2699                             if(s->mb_intra && s->dc_val[0]){
2700                                 for(i=0; i<6; i++){
2701                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2702                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2703                                 }
2704                             }
2705
2706                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2707                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2708                             if(best_s.qscale != qp){
2709                                 if(s->mb_intra && s->dc_val[0]){
2710                                     for(i=0; i<6; i++){
2711                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2712                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2713                                     }
2714                                 }
2715                             }
2716                         }
2717                     }
2718                 }
2719                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2720                     int mx= s->b_direct_mv_table[xy][0];
2721                     int my= s->b_direct_mv_table[xy][1];
2722
2723                     backup_s.dquant = 0;
2724                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2725                     s->mb_intra= 0;
2726                     ff_mpeg4_set_direct_mv(s, mx, my);
2727                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2728                                  &dmin, &next_block, mx, my);
2729                 }
2730                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2731                     backup_s.dquant = 0;
2732                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2733                     s->mb_intra= 0;
2734                     ff_mpeg4_set_direct_mv(s, 0, 0);
2735                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2736                                  &dmin, &next_block, 0, 0);
2737                 }
2738                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
2739                     int coded=0;
2740                     for(i=0; i<6; i++)
2741                         coded |= s->block_last_index[i];
2742                     if(coded){
2743                         int mx,my;
2744                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2745                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2746                             mx=my=0; //FIXME find the one we actually used
2747                             ff_mpeg4_set_direct_mv(s, mx, my);
2748                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2749                             mx= s->mv[1][0][0];
2750                             my= s->mv[1][0][1];
2751                         }else{
2752                             mx= s->mv[0][0][0];
2753                             my= s->mv[0][0][1];
2754                         }
2755
2756                         s->mv_dir= best_s.mv_dir;
2757                         s->mv_type = best_s.mv_type;
2758                         s->mb_intra= 0;
2759 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2760                         s->mv[0][0][1] = best_s.mv[0][0][1];
2761                         s->mv[1][0][0] = best_s.mv[1][0][0];
2762                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2763                         backup_s.dquant= 0;
2764                         s->skipdct=1;
2765                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2766                                         &dmin, &next_block, mx, my);
2767                         s->skipdct=0;
2768                     }
2769                 }
2770
2771                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2772
2773                 copy_context_after_encode(s, &best_s, -1);
2774
2775                 pb_bits_count= put_bits_count(&s->pb);
2776                 flush_put_bits(&s->pb);
2777                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2778                 s->pb= backup_s.pb;
2779
2780                 if(s->data_partitioning){
2781                     pb2_bits_count= put_bits_count(&s->pb2);
2782                     flush_put_bits(&s->pb2);
2783                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2784                     s->pb2= backup_s.pb2;
2785
2786                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2787                     flush_put_bits(&s->tex_pb);
2788                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2789                     s->tex_pb= backup_s.tex_pb;
2790                 }
2791                 s->last_bits= put_bits_count(&s->pb);
2792
2793                 if (CONFIG_H263_ENCODER &&
2794                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2795                     ff_h263_update_motion_val(s);
2796
2797                 if(next_block==0){ //FIXME 16 vs linesize16
2798                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2799                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2800                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2801                 }
2802
2803                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2804                     MPV_decode_mb(s, s->block);
2805             } else {
2806                 int motion_x = 0, motion_y = 0;
2807                 s->mv_type=MV_TYPE_16X16;
2808                 // only one MB-Type possible
2809
2810                 switch(mb_type){
2811                 case CANDIDATE_MB_TYPE_INTRA:
2812                     s->mv_dir = 0;
2813                     s->mb_intra= 1;
2814                     motion_x= s->mv[0][0][0] = 0;
2815                     motion_y= s->mv[0][0][1] = 0;
2816                     break;
2817                 case CANDIDATE_MB_TYPE_INTER:
2818                     s->mv_dir = MV_DIR_FORWARD;
2819                     s->mb_intra= 0;
2820                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2821                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2822                     break;
2823                 case CANDIDATE_MB_TYPE_INTER_I:
2824                     s->mv_dir = MV_DIR_FORWARD;
2825                     s->mv_type = MV_TYPE_FIELD;
2826                     s->mb_intra= 0;
2827                     for(i=0; i<2; i++){
2828                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2829                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2830                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2831                     }
2832                     break;
2833                 case CANDIDATE_MB_TYPE_INTER4V:
2834                     s->mv_dir = MV_DIR_FORWARD;
2835                     s->mv_type = MV_TYPE_8X8;
2836                     s->mb_intra= 0;
2837                     for(i=0; i<4; i++){
2838                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2839                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2840                     }
2841                     break;
2842                 case CANDIDATE_MB_TYPE_DIRECT:
2843                     if (CONFIG_MPEG4_ENCODER) {
2844                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2845                         s->mb_intra= 0;
2846                         motion_x=s->b_direct_mv_table[xy][0];
2847                         motion_y=s->b_direct_mv_table[xy][1];
2848                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2849                     }
2850                     break;
2851                 case CANDIDATE_MB_TYPE_DIRECT0:
2852                     if (CONFIG_MPEG4_ENCODER) {
2853                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2854                         s->mb_intra= 0;
2855                         ff_mpeg4_set_direct_mv(s, 0, 0);
2856                     }
2857                     break;
2858                 case CANDIDATE_MB_TYPE_BIDIR:
2859                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2860                     s->mb_intra= 0;
2861                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2862                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2863                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2864                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2865                     break;
2866                 case CANDIDATE_MB_TYPE_BACKWARD:
2867                     s->mv_dir = MV_DIR_BACKWARD;
2868                     s->mb_intra= 0;
2869                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2870                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2871                     break;
2872                 case CANDIDATE_MB_TYPE_FORWARD:
2873                     s->mv_dir = MV_DIR_FORWARD;
2874                     s->mb_intra= 0;
2875                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2876                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2877 //                    printf(" %d %d ", motion_x, motion_y);
2878                     break;
2879                 case CANDIDATE_MB_TYPE_FORWARD_I:
2880                     s->mv_dir = MV_DIR_FORWARD;
2881                     s->mv_type = MV_TYPE_FIELD;
2882                     s->mb_intra= 0;
2883                     for(i=0; i<2; i++){
2884                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2885                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2886                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2887                     }
2888                     break;
2889                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2890                     s->mv_dir = MV_DIR_BACKWARD;
2891                     s->mv_type = MV_TYPE_FIELD;
2892                     s->mb_intra= 0;
2893                     for(i=0; i<2; i++){
2894                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2895                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2896                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2897                     }
2898                     break;
2899                 case CANDIDATE_MB_TYPE_BIDIR_I:
2900                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2901                     s->mv_type = MV_TYPE_FIELD;
2902                     s->mb_intra= 0;
2903                     for(dir=0; dir<2; dir++){
2904                         for(i=0; i<2; i++){
2905                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2906                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2907                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2908                         }
2909                     }
2910                     break;
2911                 default:
2912                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2913                 }
2914
2915                 encode_mb(s, motion_x, motion_y);
2916
2917                 // RAL: Update last macroblock type
2918                 s->last_mv_dir = s->mv_dir;
2919
2920                 if (CONFIG_H263_ENCODER &&
2921                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2922                     ff_h263_update_motion_val(s);
2923
2924                 MPV_decode_mb(s, s->block);
2925             }
2926
2927             /* clean the MV table in IPS frames for direct mode in B frames */
2928             if(s->mb_intra /* && I,P,S_TYPE */){
2929                 s->p_mv_table[xy][0]=0;
2930                 s->p_mv_table[xy][1]=0;
2931             }
2932
2933             if(s->flags&CODEC_FLAG_PSNR){
2934                 int w= 16;
2935                 int h= 16;
2936
2937                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2938                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2939
2940                 s->current_picture.f.error[0] += sse(
2941                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2942                     s->dest[0], w, h, s->linesize);
2943                 s->current_picture.f.error[1] += sse(
2944                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2945                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2946                 s->current_picture.f.error[2] += sse(
2947                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2948                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2949             }
2950             if(s->loop_filter){
2951                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2952                     ff_h263_loop_filter(s);
2953             }
2954 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
2955         }
2956     }
2957
2958     //not beautiful here but we must write it before flushing so it has to be here
2959     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2960         msmpeg4_encode_ext_header(s);
2961
2962     write_slice_end(s);
2963
2964     /* Send the last GOB if RTP */
2965     if (s->avctx->rtp_callback) {
2966         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2967         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2968         /* Call the RTP callback to send the last GOB */
2969         emms_c();
2970         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2971     }
2972
2973     return 0;
2974 }
2975
2976 #define MERGE(field) dst->field += src->field; src->field=0
2977 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2978     MERGE(me.scene_change_score);
2979     MERGE(me.mc_mb_var_sum_temp);
2980     MERGE(me.mb_var_sum_temp);
2981 }
2982
2983 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2984     int i;
2985
2986     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2987     MERGE(dct_count[1]);
2988     MERGE(mv_bits);
2989     MERGE(i_tex_bits);
2990     MERGE(p_tex_bits);
2991     MERGE(i_count);
2992     MERGE(f_count);
2993     MERGE(b_count);
2994     MERGE(skip_count);
2995     MERGE(misc_bits);
2996     MERGE(error_count);
2997     MERGE(padding_bug_score);
2998     MERGE(current_picture.f.error[0]);
2999     MERGE(current_picture.f.error[1]);
3000     MERGE(current_picture.f.error[2]);
3001
3002     if(dst->avctx->noise_reduction){
3003         for(i=0; i<64; i++){
3004             MERGE(dct_error_sum[0][i]);
3005             MERGE(dct_error_sum[1][i]);
3006         }
3007     }
3008
3009     assert(put_bits_count(&src->pb) % 8 ==0);
3010     assert(put_bits_count(&dst->pb) % 8 ==0);
3011     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3012     flush_put_bits(&dst->pb);
3013 }
3014
3015 static int estimate_qp(MpegEncContext *s, int dry_run){
3016     if (s->next_lambda){
3017         s->current_picture_ptr->f.quality =
3018         s->current_picture.f.quality = s->next_lambda;
3019         if(!dry_run) s->next_lambda= 0;
3020     } else if (!s->fixed_qscale) {
3021         s->current_picture_ptr->f.quality =
3022         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3023         if (s->current_picture.f.quality < 0)
3024             return -1;
3025     }
3026
3027     if(s->adaptive_quant){
3028         switch(s->codec_id){
3029         case CODEC_ID_MPEG4:
3030             if (CONFIG_MPEG4_ENCODER)
3031                 ff_clean_mpeg4_qscales(s);
3032             break;
3033         case CODEC_ID_H263:
3034         case CODEC_ID_H263P:
3035         case CODEC_ID_FLV1:
3036             if (CONFIG_H263_ENCODER)
3037                 ff_clean_h263_qscales(s);
3038             break;
3039         default:
3040             ff_init_qscale_tab(s);
3041         }
3042
3043         s->lambda= s->lambda_table[0];
3044         //FIXME broken
3045     }else
3046         s->lambda = s->current_picture.f.quality;
3047 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3048     update_qscale(s);
3049     return 0;
3050 }
3051
3052 /* must be called before writing the header */
3053 static void set_frame_distances(MpegEncContext * s){
3054     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3055     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3056
3057     if(s->pict_type==AV_PICTURE_TYPE_B){
3058         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3059         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3060     }else{
3061         s->pp_time= s->time - s->last_non_b_time;
3062         s->last_non_b_time= s->time;
3063         assert(s->picture_number==0 || s->pp_time > 0);
3064     }
3065 }
3066
3067 static int encode_picture(MpegEncContext *s, int picture_number)
3068 {
3069     int i;
3070     int bits;
3071     int context_count = s->slice_context_count;
3072
3073     s->picture_number = picture_number;
3074
3075     /* Reset the average MB variance */
3076     s->me.mb_var_sum_temp    =
3077     s->me.mc_mb_var_sum_temp = 0;
3078
3079     /* we need to initialize some time vars before we can encode b-frames */
3080     // RAL: Condition added for MPEG1VIDEO
3081     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3082         set_frame_distances(s);
3083     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3084         ff_set_mpeg4_time(s);
3085
3086     s->me.scene_change_score=0;
3087
3088 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3089
3090     if(s->pict_type==AV_PICTURE_TYPE_I){
3091         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3092         else                        s->no_rounding=0;
3093     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3094         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3095             s->no_rounding ^= 1;
3096     }
3097
3098     if(s->flags & CODEC_FLAG_PASS2){
3099         if (estimate_qp(s,1) < 0)
3100             return -1;
3101         ff_get_2pass_fcode(s);
3102     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3103         if(s->pict_type==AV_PICTURE_TYPE_B)
3104             s->lambda= s->last_lambda_for[s->pict_type];
3105         else
3106             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3107         update_qscale(s);
3108     }
3109
3110     s->mb_intra=0; //for the rate distortion & bit compare functions
3111     for(i=1; i<context_count; i++){
3112         ff_update_duplicate_context(s->thread_context[i], s);
3113     }
3114
3115     if(ff_init_me(s)<0)
3116         return -1;
3117
3118     /* Estimate motion for every MB */
3119     if(s->pict_type != AV_PICTURE_TYPE_I){
3120         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3121         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3122         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3123             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3124                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3125             }
3126         }
3127
3128         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3129     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3130         /* I-Frame */
3131         for(i=0; i<s->mb_stride*s->mb_height; i++)
3132             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3133
3134         if(!s->fixed_qscale){
3135             /* finding spatial complexity for I-frame rate control */
3136             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3137         }
3138     }
3139     for(i=1; i<context_count; i++){
3140         merge_context_after_me(s, s->thread_context[i]);
3141     }
3142     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3143     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3144     emms_c();
3145
3146     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3147         s->pict_type= AV_PICTURE_TYPE_I;
3148         for(i=0; i<s->mb_stride*s->mb_height; i++)
3149             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3150 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3151     }
3152
3153     if(!s->umvplus){
3154         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3155             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3156
3157             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3158                 int a,b;
3159                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3160                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3161                 s->f_code= FFMAX3(s->f_code, a, b);
3162             }
3163
3164             ff_fix_long_p_mvs(s);
3165             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3166             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3167                 int j;
3168                 for(i=0; i<2; i++){
3169                     for(j=0; j<2; j++)
3170                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3171                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3172                 }
3173             }
3174         }
3175
3176         if(s->pict_type==AV_PICTURE_TYPE_B){
3177             int a, b;
3178
3179             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3180             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3181             s->f_code = FFMAX(a, b);
3182
3183             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3184             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3185             s->b_code = FFMAX(a, b);
3186
3187             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3188             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3189             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3190             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3191             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3192                 int dir, j;
3193                 for(dir=0; dir<2; dir++){
3194                     for(i=0; i<2; i++){
3195                         for(j=0; j<2; j++){
3196                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3197                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3198                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3199                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3200                         }
3201                     }
3202                 }
3203             }
3204         }
3205     }
3206
3207     if (estimate_qp(s, 0) < 0)
3208         return -1;
3209
3210     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3211         s->qscale= 3; //reduce clipping problems
3212
3213     if (s->out_format == FMT_MJPEG) {
3214         /* for mjpeg, we do include qscale in the matrix */
3215         for(i=1;i<64;i++){
3216             int j= s->dsp.idct_permutation[i];
3217
3218             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3219         }
3220         s->y_dc_scale_table=
3221         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3222         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3223         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3224                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3225         s->qscale= 8;
3226     }
3227
3228     //FIXME var duplication
3229     s->current_picture_ptr->f.key_frame =
3230     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3231     s->current_picture_ptr->f.pict_type =
3232     s->current_picture.f.pict_type = s->pict_type;
3233
3234     if (s->current_picture.f.key_frame)
3235         s->picture_in_gop_number=0;
3236
3237     s->last_bits= put_bits_count(&s->pb);
3238     switch(s->out_format) {
3239     case FMT_MJPEG:
3240         if (CONFIG_MJPEG_ENCODER)
3241             ff_mjpeg_encode_picture_header(s);
3242         break;
3243     case FMT_H261:
3244         if (CONFIG_H261_ENCODER)
3245             ff_h261_encode_picture_header(s, picture_number);
3246         break;
3247     case FMT_H263:
3248         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3249             ff_wmv2_encode_picture_header(s, picture_number);
3250         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3251             msmpeg4_encode_picture_header(s, picture_number);
3252         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3253             mpeg4_encode_picture_header(s, picture_number);
3254         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3255             rv10_encode_picture_header(s, picture_number);
3256         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3257             rv20_encode_picture_header(s, picture_number);
3258         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3259             ff_flv_encode_picture_header(s, picture_number);
3260         else if (CONFIG_H263_ENCODER)
3261             h263_encode_picture_header(s, picture_number);
3262         break;
3263     case FMT_MPEG1:
3264         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3265             mpeg1_encode_picture_header(s, picture_number);
3266         break;
3267     case FMT_H264:
3268         break;
3269     default:
3270         assert(0);
3271     }
3272     bits= put_bits_count(&s->pb);
3273     s->header_bits= bits - s->last_bits;
3274
3275     for(i=1; i<context_count; i++){
3276         update_duplicate_context_after_me(s->thread_context[i], s);
3277     }
3278     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3279     for(i=1; i<context_count; i++){
3280         merge_context_after_encode(s, s->thread_context[i]);
3281     }
3282     emms_c();
3283     return 0;
3284 }
3285
3286 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3287     const int intra= s->mb_intra;
3288     int i;
3289
3290     s->dct_count[intra]++;
3291
3292     for(i=0; i<64; i++){
3293         int level= block[i];
3294
3295         if(level){
3296             if(level>0){
3297                 s->dct_error_sum[intra][i] += level;
3298                 level -= s->dct_offset[intra][i];
3299                 if(level<0) level=0;
3300             }else{
3301                 s->dct_error_sum[intra][i] -= level;
3302                 level += s->dct_offset[intra][i];
3303                 if(level>0) level=0;
3304             }
3305             block[i]= level;
3306         }
3307     }
3308 }
3309
3310 static int dct_quantize_trellis_c(MpegEncContext *s,
3311                                   DCTELEM *block, int n,
3312                                   int qscale, int *overflow){
3313     const int *qmat;
3314     const uint8_t *scantable= s->intra_scantable.scantable;
3315     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3316     int max=0;
3317     unsigned int threshold1, threshold2;
3318     int bias=0;
3319     int run_tab[65];
3320     int level_tab[65];
3321     int score_tab[65];
3322     int survivor[65];
3323     int survivor_count;
3324     int last_run=0;
3325     int last_level=0;
3326     int last_score= 0;
3327     int last_i;
3328     int coeff[2][64];
3329     int coeff_count[64];
3330     int qmul, qadd, start_i, last_non_zero, i, dc;
3331     const int esc_length= s->ac_esc_length;
3332     uint8_t * length;
3333     uint8_t * last_length;
3334     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3335
3336     s->dsp.fdct (block);
3337
3338     if(s->dct_error_sum)
3339         s->denoise_dct(s, block);
3340     qmul= qscale*16;
3341     qadd= ((qscale-1)|1)*8;
3342
3343     if (s->mb_intra) {
3344         int q;
3345         if (!s->h263_aic) {
3346             if (n < 4)
3347                 q = s->y_dc_scale;
3348             else
3349                 q = s->c_dc_scale;
3350             q = q << 3;
3351         } else{
3352             /* For AIC we skip quant/dequant of INTRADC */
3353             q = 1 << 3;
3354             qadd=0;
3355         }
3356
3357         /* note: block[0] is assumed to be positive */
3358         block[0] = (block[0] + (q >> 1)) / q;
3359         start_i = 1;
3360         last_non_zero = 0;
3361         qmat = s->q_intra_matrix[qscale];
3362         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3363             bias= 1<<(QMAT_SHIFT-1);
3364         length     = s->intra_ac_vlc_length;
3365         last_length= s->intra_ac_vlc_last_length;
3366     } else {
3367         start_i = 0;
3368         last_non_zero = -1;
3369         qmat = s->q_inter_matrix[qscale];
3370         length     = s->inter_ac_vlc_length;
3371         last_length= s->inter_ac_vlc_last_length;
3372     }
3373     last_i= start_i;
3374
3375     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3376     threshold2= (threshold1<<1);
3377
3378     for(i=63; i>=start_i; i--) {
3379         const int j = scantable[i];
3380         int level = block[j] * qmat[j];
3381
3382         if(((unsigned)(level+threshold1))>threshold2){
3383             last_non_zero = i;
3384             break;
3385         }
3386     }
3387
3388     for(i=start_i; i<=last_non_zero; i++) {
3389         const int j = scantable[i];
3390         int level = block[j] * qmat[j];
3391
3392 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3393 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3394         if(((unsigned)(level+threshold1))>threshold2){
3395             if(level>0){
3396                 level= (bias + level)>>QMAT_SHIFT;
3397                 coeff[0][i]= level;
3398                 coeff[1][i]= level-1;
3399 //                coeff[2][k]= level-2;
3400             }else{
3401                 level= (bias - level)>>QMAT_SHIFT;
3402                 coeff[0][i]= -level;
3403                 coeff[1][i]= -level+1;
3404 //                coeff[2][k]= -level+2;
3405             }
3406             coeff_count[i]= FFMIN(level, 2);
3407             assert(coeff_count[i]);
3408             max |=level;
3409         }else{
3410             coeff[0][i]= (level>>31)|1;
3411             coeff_count[i]= 1;
3412         }
3413     }
3414
3415     *overflow= s->max_qcoeff < max; //overflow might have happened
3416
3417     if(last_non_zero < start_i){
3418         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3419         return last_non_zero;
3420     }
3421
3422     score_tab[start_i]= 0;
3423     survivor[0]= start_i;
3424     survivor_count= 1;
3425
3426     for(i=start_i; i<=last_non_zero; i++){
3427         int level_index, j, zero_distortion;
3428         int dct_coeff= FFABS(block[ scantable[i] ]);
3429         int best_score=256*256*256*120;
3430
3431         if (   s->dsp.fdct == fdct_ifast
3432 #ifndef FAAN_POSTSCALE
3433             || s->dsp.fdct == ff_faandct
3434 #endif
3435            )
3436             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3437         zero_distortion= dct_coeff*dct_coeff;
3438
3439         for(level_index=0; level_index < coeff_count[i]; level_index++){
3440             int distortion;
3441             int level= coeff[level_index][i];
3442             const int alevel= FFABS(level);
3443             int unquant_coeff;
3444
3445             assert(level);
3446
3447             if(s->out_format == FMT_H263){
3448                 unquant_coeff= alevel*qmul + qadd;
3449             }else{ //MPEG1
3450                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3451                 if(s->mb_intra){
3452                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3453                         unquant_coeff =   (unquant_coeff - 1) | 1;
3454                 }else{
3455                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3456                         unquant_coeff =   (unquant_coeff - 1) | 1;
3457                 }
3458                 unquant_coeff<<= 3;
3459             }
3460
3461             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3462             level+=64;
3463             if((level&(~127)) == 0){
3464                 for(j=survivor_count-1; j>=0; j--){
3465                     int run= i - survivor[j];
3466                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3467                     score += score_tab[i-run];
3468
3469                     if(score < best_score){
3470                         best_score= score;
3471                         run_tab[i+1]= run;
3472                         level_tab[i+1]= level-64;
3473                     }
3474                 }
3475
3476                 if(s->out_format == FMT_H263){
3477                     for(j=survivor_count-1; j>=0; j--){
3478                         int run= i - survivor[j];
3479                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3480                         score += score_tab[i-run];
3481                         if(score < last_score){
3482                             last_score= score;
3483                             last_run= run;
3484                             last_level= level-64;
3485                             last_i= i+1;
3486                         }
3487                     }
3488                 }
3489             }else{
3490                 distortion += esc_length*lambda;
3491                 for(j=survivor_count-1; j>=0; j--){
3492                     int run= i - survivor[j];
3493                     int score= distortion + score_tab[i-run];
3494
3495                     if(score < best_score){
3496                         best_score= score;
3497                         run_tab[i+1]= run;
3498                         level_tab[i+1]= level-64;
3499                     }
3500                 }
3501
3502                 if(s->out_format == FMT_H263){
3503                   for(j=survivor_count-1; j>=0; j--){
3504                         int run= i - survivor[j];
3505                         int score= distortion + score_tab[i-run];
3506                         if(score < last_score){
3507                             last_score= score;
3508                             last_run= run;
3509                             last_level= level-64;
3510                             last_i= i+1;
3511                         }
3512                     }
3513                 }
3514             }
3515         }
3516
3517         score_tab[i+1]= best_score;
3518
3519         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3520         if(last_non_zero <= 27){
3521             for(; survivor_count; survivor_count--){
3522                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3523                     break;
3524             }
3525         }else{
3526             for(; survivor_count; survivor_count--){
3527                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3528                     break;
3529             }
3530         }
3531
3532         survivor[ survivor_count++ ]= i+1;
3533     }
3534
3535     if(s->out_format != FMT_H263){
3536         last_score= 256*256*256*120;
3537         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3538             int score= score_tab[i];
3539             if(i) score += lambda*2; //FIXME exacter?
3540
3541             if(score < last_score){
3542                 last_score= score;
3543                 last_i= i;
3544                 last_level= level_tab[i];
3545                 last_run= run_tab[i];
3546             }
3547         }
3548     }
3549
3550     s->coded_score[n] = last_score;
3551
3552     dc= FFABS(block[0]);
3553     last_non_zero= last_i - 1;
3554     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3555
3556     if(last_non_zero < start_i)
3557         return last_non_zero;
3558
3559     if(last_non_zero == 0 && start_i == 0){
3560         int best_level= 0;
3561         int best_score= dc * dc;
3562
3563         for(i=0; i<coeff_count[0]; i++){
3564             int level= coeff[i][0];
3565             int alevel= FFABS(level);
3566             int unquant_coeff, score, distortion;
3567
3568             if(s->out_format == FMT_H263){
3569                     unquant_coeff= (alevel*qmul + qadd)>>3;
3570             }else{ //MPEG1
3571                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3572                     unquant_coeff =   (unquant_coeff - 1) | 1;
3573             }
3574             unquant_coeff = (unquant_coeff + 4) >> 3;
3575             unquant_coeff<<= 3 + 3;
3576
3577             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3578             level+=64;
3579             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3580             else                    score= distortion + esc_length*lambda;
3581
3582             if(score < best_score){
3583                 best_score= score;
3584                 best_level= level - 64;
3585             }
3586         }
3587         block[0]= best_level;
3588         s->coded_score[n] = best_score - dc*dc;
3589         if(best_level == 0) return -1;
3590         else                return last_non_zero;
3591     }
3592
3593     i= last_i;
3594     assert(last_level);
3595
3596     block[ perm_scantable[last_non_zero] ]= last_level;
3597     i -= last_run + 1;
3598
3599     for(; i>start_i; i -= run_tab[i] + 1){
3600         block[ perm_scantable[i-1] ]= level_tab[i];
3601     }
3602
3603     return last_non_zero;
3604 }
3605
3606 //#define REFINE_STATS 1
3607 static int16_t basis[64][64];
3608
3609 static void build_basis(uint8_t *perm){
3610     int i, j, x, y;
3611     emms_c();
3612     for(i=0; i<8; i++){
3613         for(j=0; j<8; j++){
3614             for(y=0; y<8; y++){
3615                 for(x=0; x<8; x++){
3616                     double s= 0.25*(1<<BASIS_SHIFT);
3617                     int index= 8*i + j;
3618                     int perm_index= perm[index];
3619                     if(i==0) s*= sqrt(0.5);
3620                     if(j==0) s*= sqrt(0.5);
3621                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3622                 }
3623             }
3624         }
3625     }
3626 }
3627
3628 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3629                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3630                         int n, int qscale){
3631     int16_t rem[64];
3632     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3633     const uint8_t *scantable= s->intra_scantable.scantable;
3634     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3635 //    unsigned int threshold1, threshold2;
3636 //    int bias=0;
3637     int run_tab[65];
3638     int prev_run=0;
3639     int prev_level=0;
3640     int qmul, qadd, start_i, last_non_zero, i, dc;
3641     uint8_t * length;
3642     uint8_t * last_length;
3643     int lambda;
3644     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3645 #ifdef REFINE_STATS
3646 static int count=0;
3647 static int after_last=0;
3648 static int to_zero=0;
3649 static int from_zero=0;
3650 static int raise=0;
3651 static int lower=0;
3652 static int messed_sign=0;
3653 #endif
3654
3655     if(basis[0][0] == 0)
3656         build_basis(s->dsp.idct_permutation);
3657
3658     qmul= qscale*2;
3659     qadd= (qscale-1)|1;
3660     if (s->mb_intra) {
3661         if (!s->h263_aic) {
3662             if (n < 4)
3663                 q = s->y_dc_scale;
3664             else
3665                 q = s->c_dc_scale;
3666         } else{
3667             /* For AIC we skip quant/dequant of INTRADC */
3668             q = 1;
3669             qadd=0;
3670         }
3671         q <<= RECON_SHIFT-3;
3672         /* note: block[0] is assumed to be positive */
3673         dc= block[0]*q;
3674 //        block[0] = (block[0] + (q >> 1)) / q;
3675         start_i = 1;
3676 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3677 //            bias= 1<<(QMAT_SHIFT-1);
3678         length     = s->intra_ac_vlc_length;
3679         last_length= s->intra_ac_vlc_last_length;
3680     } else {
3681         dc= 0;
3682         start_i = 0;
3683         length     = s->inter_ac_vlc_length;
3684         last_length= s->inter_ac_vlc_last_length;
3685     }
3686     last_non_zero = s->block_last_index[n];
3687
3688 #ifdef REFINE_STATS
3689 {START_TIMER
3690 #endif
3691     dc += (1<<(RECON_SHIFT-1));
3692     for(i=0; i<64; i++){
3693         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3694     }
3695 #ifdef REFINE_STATS
3696 STOP_TIMER("memset rem[]")}
3697 #endif
3698     sum=0;
3699     for(i=0; i<64; i++){
3700         int one= 36;
3701         int qns=4;
3702         int w;
3703
3704         w= FFABS(weight[i]) + qns*one;
3705         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3706
3707         weight[i] = w;
3708 //        w=weight[i] = (63*qns + (w/2)) / w;
3709
3710         assert(w>0);
3711         assert(w<(1<<6));
3712         sum += w*w;
3713     }
3714     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3715 #ifdef REFINE_STATS
3716 {START_TIMER
3717 #endif
3718     run=0;
3719     rle_index=0;
3720     for(i=start_i; i<=last_non_zero; i++){
3721         int j= perm_scantable[i];
3722         const int level= block[j];
3723         int coeff;
3724
3725         if(level){
3726             if(level<0) coeff= qmul*level - qadd;
3727             else        coeff= qmul*level + qadd;
3728             run_tab[rle_index++]=run;
3729             run=0;
3730
3731             s->dsp.add_8x8basis(rem, basis[j], coeff);
3732         }else{
3733             run++;
3734         }
3735     }
3736 #ifdef REFINE_STATS
3737 if(last_non_zero>0){
3738 STOP_TIMER("init rem[]")
3739 }
3740 }
3741
3742 {START_TIMER
3743 #endif
3744     for(;;){
3745         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3746         int best_coeff=0;
3747         int best_change=0;
3748         int run2, best_unquant_change=0, analyze_gradient;
3749 #ifdef REFINE_STATS
3750 {START_TIMER
3751 #endif
3752         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
3753
3754         if(analyze_gradient){
3755 #ifdef REFINE_STATS
3756 {START_TIMER
3757 #endif
3758             for(i=0; i<64; i++){
3759                 int w= weight[i];
3760
3761                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3762             }
3763 #ifdef REFINE_STATS
3764 STOP_TIMER("rem*w*w")}
3765 {START_TIMER
3766 #endif
3767             s->dsp.fdct(d1);
3768 #ifdef REFINE_STATS
3769 STOP_TIMER("dct")}
3770 #endif
3771         }
3772
3773         if(start_i){
3774             const int level= block[0];
3775             int change, old_coeff;
3776
3777             assert(s->mb_intra);
3778
3779             old_coeff= q*level;
3780
3781             for(change=-1; change<=1; change+=2){
3782                 int new_level= level + change;
3783                 int score, new_coeff;
3784
3785                 new_coeff= q*new_level;
3786                 if(new_coeff >= 2048 || new_coeff < 0)
3787                     continue;
3788
3789                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3790                 if(score<best_score){
3791                     best_score= score;
3792                     best_coeff= 0;
3793                     best_change= change;
3794                     best_unquant_change= new_coeff - old_coeff;
3795                 }
3796             }
3797         }
3798
3799         run=0;
3800         rle_index=0;
3801         run2= run_tab[rle_index++];
3802         prev_level=0;
3803         prev_run=0;
3804
3805         for(i=start_i; i<64; i++){
3806             int j= perm_scantable[i];
3807             const int level= block[j];
3808             int change, old_coeff;
3809
3810             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3811                 break;
3812
3813             if(level){
3814                 if(level<0) old_coeff= qmul*level - qadd;
3815                 else        old_coeff= qmul*level + qadd;
3816                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3817             }else{
3818                 old_coeff=0;
3819                 run2--;
3820                 assert(run2>=0 || i >= last_non_zero );
3821             }
3822
3823             for(change=-1; change<=1; change+=2){
3824                 int new_level= level + change;
3825                 int score, new_coeff, unquant_change;
3826
3827                 score=0;
3828                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3829                    continue;
3830
3831                 if(new_level){
3832                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3833                     else            new_coeff= qmul*new_level + qadd;
3834                     if(new_coeff >= 2048 || new_coeff <= -2048)
3835                         continue;
3836                     //FIXME check for overflow
3837
3838                     if(level){
3839                         if(level < 63 && level > -63){
3840                             if(i < last_non_zero)
3841                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3842                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3843                             else
3844                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3845                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3846                         }
3847                     }else{
3848                         assert(FFABS(new_level)==1);
3849
3850                         if(analyze_gradient){
3851                             int g= d1[ scantable[i] ];
3852                             if(g && (g^new_level) >= 0)
3853                                 continue;
3854                         }
3855
3856                         if(i < last_non_zero){
3857                             int next_i= i + run2 + 1;
3858                             int next_level= block[ perm_scantable[next_i] ] + 64;
3859
3860                             if(next_level&(~127))
3861                                 next_level= 0;
3862
3863                             if(next_i < last_non_zero)
3864                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3865                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3866                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3867                             else
3868                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3869                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3870                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3871                         }else{
3872                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3873                             if(prev_level){
3874                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3875                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3876                             }
3877                         }
3878                     }
3879                 }else{
3880                     new_coeff=0;
3881                     assert(FFABS(level)==1);
3882
3883                     if(i < last_non_zero){
3884                         int next_i= i + run2 + 1;
3885                         int next_level= block[ perm_scantable[next_i] ] + 64;
3886
3887                         if(next_level&(~127))
3888                             next_level= 0;
3889
3890                         if(next_i < last_non_zero)
3891                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3892                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3893                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3894                         else
3895                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3896                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3897                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3898                     }else{
3899                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3900                         if(prev_level){
3901                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3902                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3903                         }
3904                     }
3905                 }
3906
3907                 score *= lambda;
3908
3909                 unquant_change= new_coeff - old_coeff;
3910                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3911
3912                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3913                 if(score<best_score){
3914                     best_score= score;
3915                     best_coeff= i;
3916                     best_change= change;
3917                     best_unquant_change= unquant_change;
3918                 }
3919             }
3920             if(level){
3921                 prev_level= level + 64;
3922                 if(prev_level&(~127))
3923                     prev_level= 0;
3924                 prev_run= run;
3925                 run=0;
3926             }else{
3927                 run++;
3928             }
3929         }
3930 #ifdef REFINE_STATS
3931 STOP_TIMER("iterative step")}
3932 #endif
3933
3934         if(best_change){
3935             int j= perm_scantable[ best_coeff ];
3936
3937             block[j] += best_change;
3938
3939             if(best_coeff > last_non_zero){
3940                 last_non_zero= best_coeff;
3941                 assert(block[j]);
3942 #ifdef REFINE_STATS
3943 after_last++;
3944 #endif
3945             }else{
3946 #ifdef REFINE_STATS
3947 if(block[j]){
3948     if(block[j] - best_change){
3949         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3950             raise++;
3951         }else{
3952             lower++;
3953         }
3954     }else{
3955         from_zero++;
3956     }
3957 }else{
3958     to_zero++;
3959 }
3960 #endif
3961                 for(; last_non_zero>=start_i; last_non_zero--){
3962                     if(block[perm_scantable[last_non_zero]])
3963                         break;
3964                 }
3965             }
3966 #ifdef REFINE_STATS
3967 count++;
3968 if(256*256*256*64 % count == 0){
3969     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3970 }
3971 #endif
3972             run=0;
3973             rle_index=0;
3974             for(i=start_i; i<=last_non_zero; i++){
3975                 int j= perm_scantable[i];
3976                 const int level= block[j];
3977
3978                  if(level){
3979                      run_tab[rle_index++]=run;
3980                      run=0;
3981                  }else{
3982                      run++;
3983                  }
3984             }
3985
3986             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
3987         }else{
3988             break;
3989         }
3990     }
3991 #ifdef REFINE_STATS
3992 if(last_non_zero>0){
3993 STOP_TIMER("iterative search")
3994 }
3995 }
3996 #endif
3997
3998     return last_non_zero;
3999 }
4000
4001 int dct_quantize_c(MpegEncContext *s,
4002                         DCTELEM *block, int n,
4003                         int qscale, int *overflow)
4004 {
4005     int i, j, level, last_non_zero, q, start_i;
4006     const int *qmat;
4007     const uint8_t *scantable= s->intra_scantable.scantable;
4008     int bias;
4009     int max=0;
4010     unsigned int threshold1, threshold2;
4011
4012     s->dsp.fdct (block);
4013
4014     if(s->dct_error_sum)
4015         s->denoise_dct(s, block);
4016
4017     if (s->mb_intra) {
4018         if (!s->h263_aic) {
4019             if (n < 4)
4020                 q = s->y_dc_scale;
4021             else
4022                 q = s->c_dc_scale;
4023             q = q << 3;
4024         } else
4025             /* For AIC we skip quant/dequant of INTRADC */
4026             q = 1 << 3;
4027
4028         /* note: block[0] is assumed to be positive */
4029         block[0] = (block[0] + (q >> 1)) / q;
4030         start_i = 1;
4031         last_non_zero = 0;
4032         qmat = s->q_intra_matrix[qscale];
4033         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4034     } else {
4035         start_i = 0;
4036         last_non_zero = -1;
4037         qmat = s->q_inter_matrix[qscale];
4038         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4039     }
4040     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4041     threshold2= (threshold1<<1);
4042     for(i=63;i>=start_i;i--) {
4043         j = scantable[i];
4044         level = block[j] * qmat[j];
4045
4046         if(((unsigned)(level+threshold1))>threshold2){
4047             last_non_zero = i;
4048             break;
4049         }else{
4050             block[j]=0;
4051         }
4052     }
4053     for(i=start_i; i<=last_non_zero; i++) {
4054         j = scantable[i];
4055         level = block[j] * qmat[j];
4056
4057 //        if(   bias+level >= (1<<QMAT_SHIFT)
4058 //           || bias-level >= (1<<QMAT_SHIFT)){
4059         if(((unsigned)(level+threshold1))>threshold2){
4060             if(level>0){
4061                 level= (bias + level)>>QMAT_SHIFT;
4062                 block[j]= level;
4063             }else{
4064                 level= (bias - level)>>QMAT_SHIFT;
4065                 block[j]= -level;
4066             }
4067             max |=level;
4068         }else{
4069             block[j]=0;
4070         }
4071     }
4072     *overflow= s->max_qcoeff < max; //overflow might have happened
4073
4074     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4075     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4076         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4077
4078     return last_non_zero;
4079 }
4080
4081 #define OFFSET(x) offsetof(MpegEncContext, x)
4082 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4083 static const AVOption h263_options[] = {
4084     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4085     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4086     { NULL },
4087 };
4088
4089 static const AVClass h263_class = {
4090     .class_name = "H.263 encoder",
4091     .item_name  = av_default_item_name,
4092     .option     = h263_options,
4093     .version    = LIBAVUTIL_VERSION_INT,
4094 };
4095
4096 AVCodec ff_h263_encoder = {
4097     .name           = "h263",
4098     .type           = AVMEDIA_TYPE_VIDEO,
4099     .id             = CODEC_ID_H263,
4100     .priv_data_size = sizeof(MpegEncContext),
4101     .init           = MPV_encode_init,
4102     .encode         = MPV_encode_picture,
4103     .close          = MPV_encode_end,
4104     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4105     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4106     .priv_class     = &h263_class,
4107 };
4108
4109 static const AVOption h263p_options[] = {
4110     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4111     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4112     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4113     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4114     { NULL },
4115 };
4116 static const AVClass h263p_class = {
4117     .class_name = "H.263p encoder",
4118     .item_name  = av_default_item_name,
4119     .option     = h263p_options,
4120     .version    = LIBAVUTIL_VERSION_INT,
4121 };
4122
4123 AVCodec ff_h263p_encoder = {
4124     .name           = "h263p",
4125     .type           = AVMEDIA_TYPE_VIDEO,
4126     .id             = CODEC_ID_H263P,
4127     .priv_data_size = sizeof(MpegEncContext),
4128     .init           = MPV_encode_init,
4129     .encode         = MPV_encode_picture,
4130     .close          = MPV_encode_end,
4131     .capabilities = CODEC_CAP_SLICE_THREADS,
4132     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4133     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4134     .priv_class     = &h263p_class,
4135 };
4136
4137 AVCodec ff_msmpeg4v2_encoder = {
4138     .name           = "msmpeg4v2",
4139     .type           = AVMEDIA_TYPE_VIDEO,
4140     .id             = CODEC_ID_MSMPEG4V2,
4141     .priv_data_size = sizeof(MpegEncContext),
4142     .init           = MPV_encode_init,
4143     .encode         = MPV_encode_picture,
4144     .close          = MPV_encode_end,
4145     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4146     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4147 };
4148
4149 AVCodec ff_msmpeg4v3_encoder = {
4150     .name           = "msmpeg4",
4151     .type           = AVMEDIA_TYPE_VIDEO,
4152     .id             = CODEC_ID_MSMPEG4V3,
4153     .priv_data_size = sizeof(MpegEncContext),
4154     .init           = MPV_encode_init,
4155     .encode         = MPV_encode_picture,
4156     .close          = MPV_encode_end,
4157     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4158     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4159 };
4160
4161 AVCodec ff_wmv1_encoder = {
4162     .name           = "wmv1",
4163     .type           = AVMEDIA_TYPE_VIDEO,
4164     .id             = CODEC_ID_WMV1,
4165     .priv_data_size = sizeof(MpegEncContext),
4166     .init           = MPV_encode_init,
4167     .encode         = MPV_encode_picture,
4168     .close          = MPV_encode_end,
4169     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4170     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4171 };