]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
84a5cdaac80ec95a014350fe2de5cd749d2efb31
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of Libav.
9  *
10  * Libav is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * Libav is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with Libav; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "mpegvideo_common.h"
37 #include "h263.h"
38 #include "mjpegenc.h"
39 #include "msmpeg4.h"
40 #include "faandct.h"
41 #include "thread.h"
42 #include "aandcttab.h"
43 #include "flv.h"
44 #include "mpeg4video.h"
45 #include "internal.h"
46 #include <limits.h>
47
48 //#undef NDEBUG
49 //#include <assert.h>
50
51 static int encode_picture(MpegEncContext *s, int picture_number);
52 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
53 static int sse_mb(MpegEncContext *s);
54 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
55 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
56
57 /* enable all paranoid tests for rounding, overflows, etc... */
58 //#define PARANOID
59
60 //#define DEBUG
61
62 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
63 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
64
65 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
66                        uint16_t (*qmat16)[2][64],
67                        const uint16_t *quant_matrix,
68                        int bias, int qmin, int qmax, int intra)
69 {
70     int qscale;
71     int shift = 0;
72
73     for (qscale = qmin; qscale <= qmax; qscale++) {
74         int i;
75         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
76             dsp->fdct == ff_jpeg_fdct_islow_10
77 #ifdef FAAN_POSTSCALE
78             || dsp->fdct == ff_faandct
79 #endif
80             ) {
81             for (i = 0; i < 64; i++) {
82                 const int j = dsp->idct_permutation[i];
83                 /* 16 <= qscale * quant_matrix[i] <= 7905
84                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
85                  *             19952 <=              x  <= 249205026
86                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
87                  *           3444240 >= (1 << 36) / (x) >= 275 */
88
89                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
90                                         (qscale * quant_matrix[j]));
91             }
92         } else if (dsp->fdct == fdct_ifast
93 #ifndef FAAN_POSTSCALE
94                    || dsp->fdct == ff_faandct
95 #endif
96                    ) {
97             for (i = 0; i < 64; i++) {
98                 const int j = dsp->idct_permutation[i];
99                 /* 16 <= qscale * quant_matrix[i] <= 7905
100                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
101                  *             19952 <=              x  <= 249205026
102                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
103                  *           3444240 >= (1 << 36) / (x) >= 275 */
104
105                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
106                                         (ff_aanscales[i] * qscale *
107                                          quant_matrix[j]));
108             }
109         } else {
110             for (i = 0; i < 64; i++) {
111                 const int j = dsp->idct_permutation[i];
112                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
113                  * Assume x = qscale * quant_matrix[i]
114                  * So             16 <=              x  <= 7905
115                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
116                  * so          32768 >= (1 << 19) / (x) >= 67 */
117                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
118                                         (qscale * quant_matrix[j]));
119                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
120                 //                    (qscale * quant_matrix[i]);
121                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
122                                        (qscale * quant_matrix[j]);
123
124                 if (qmat16[qscale][0][i] == 0 ||
125                     qmat16[qscale][0][i] == 128 * 256)
126                     qmat16[qscale][0][i] = 128 * 256 - 1;
127                 qmat16[qscale][1][i] =
128                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
129                                 qmat16[qscale][0][i]);
130             }
131         }
132
133         for (i = intra; i < 64; i++) {
134             int64_t max = 8191;
135             if (dsp->fdct == fdct_ifast
136 #ifndef FAAN_POSTSCALE
137                 || dsp->fdct == ff_faandct
138 #endif
139                ) {
140                 max = (8191LL * ff_aanscales[i]) >> 14;
141             }
142             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
143                 shift++;
144             }
145         }
146     }
147     if (shift) {
148         av_log(NULL, AV_LOG_INFO,
149                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
150                QMAT_SHIFT - shift);
151     }
152 }
153
154 static inline void update_qscale(MpegEncContext *s)
155 {
156     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
157                 (FF_LAMBDA_SHIFT + 7);
158     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
159
160     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
161                  FF_LAMBDA_SHIFT;
162 }
163
164 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
165 {
166     int i;
167
168     if (matrix) {
169         put_bits(pb, 1, 1);
170         for (i = 0; i < 64; i++) {
171             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
172         }
173     } else
174         put_bits(pb, 1, 0);
175 }
176
177 /**
178  * init s->current_picture.qscale_table from s->lambda_table
179  */
180 void ff_init_qscale_tab(MpegEncContext *s)
181 {
182     int8_t * const qscale_table = s->current_picture.f.qscale_table;
183     int i;
184
185     for (i = 0; i < s->mb_num; i++) {
186         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
187         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
188         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
189                                                   s->avctx->qmax);
190     }
191 }
192
193 static void copy_picture_attributes(MpegEncContext *s,
194                                     AVFrame *dst,
195                                     AVFrame *src)
196 {
197     int i;
198
199     dst->pict_type              = src->pict_type;
200     dst->quality                = src->quality;
201     dst->coded_picture_number   = src->coded_picture_number;
202     dst->display_picture_number = src->display_picture_number;
203     //dst->reference              = src->reference;
204     dst->pts                    = src->pts;
205     dst->interlaced_frame       = src->interlaced_frame;
206     dst->top_field_first        = src->top_field_first;
207
208     if (s->avctx->me_threshold) {
209         if (!src->motion_val[0])
210             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
211         if (!src->mb_type)
212             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
213         if (!src->ref_index[0])
214             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
215         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
216             av_log(s->avctx, AV_LOG_ERROR,
217                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
218                    src->motion_subsample_log2, dst->motion_subsample_log2);
219
220         memcpy(dst->mb_type, src->mb_type,
221                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
222
223         for (i = 0; i < 2; i++) {
224             int stride = ((16 * s->mb_width ) >>
225                           src->motion_subsample_log2) + 1;
226             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
227
228             if (src->motion_val[i] &&
229                 src->motion_val[i] != dst->motion_val[i]) {
230                 memcpy(dst->motion_val[i], src->motion_val[i],
231                        2 * stride * height * sizeof(int16_t));
232             }
233             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
234                 memcpy(dst->ref_index[i], src->ref_index[i],
235                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
236             }
237         }
238     }
239 }
240
241 static void update_duplicate_context_after_me(MpegEncContext *dst,
242                                               MpegEncContext *src)
243 {
244 #define COPY(a) dst->a= src->a
245     COPY(pict_type);
246     COPY(current_picture);
247     COPY(f_code);
248     COPY(b_code);
249     COPY(qscale);
250     COPY(lambda);
251     COPY(lambda2);
252     COPY(picture_in_gop_number);
253     COPY(gop_picture_number);
254     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
255     COPY(progressive_frame);    // FIXME don't set in encode_header
256     COPY(partitioned_frame);    // FIXME don't set in encode_header
257 #undef COPY
258 }
259
260 /**
261  * Set the given MpegEncContext to defaults for encoding.
262  * the changed fields will not depend upon the prior state of the MpegEncContext.
263  */
264 static void MPV_encode_defaults(MpegEncContext *s)
265 {
266     int i;
267     MPV_common_defaults(s);
268
269     for (i = -16; i < 16; i++) {
270         default_fcode_tab[i + MAX_MV] = 1;
271     }
272     s->me.mv_penalty = default_mv_penalty;
273     s->fcode_tab     = default_fcode_tab;
274 }
275
276 /* init video encoder */
277 av_cold int MPV_encode_init(AVCodecContext *avctx)
278 {
279     MpegEncContext *s = avctx->priv_data;
280     int i;
281     int chroma_h_shift, chroma_v_shift;
282
283     MPV_encode_defaults(s);
284
285     switch (avctx->codec_id) {
286     case CODEC_ID_MPEG2VIDEO:
287         if (avctx->pix_fmt != PIX_FMT_YUV420P &&
288             avctx->pix_fmt != PIX_FMT_YUV422P) {
289             av_log(avctx, AV_LOG_ERROR,
290                    "only YUV420 and YUV422 are supported\n");
291             return -1;
292         }
293         break;
294     case CODEC_ID_LJPEG:
295         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
296             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
297             avctx->pix_fmt != PIX_FMT_YUVJ444P &&
298             avctx->pix_fmt != PIX_FMT_BGRA     &&
299             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
300               avctx->pix_fmt != PIX_FMT_YUV422P &&
301               avctx->pix_fmt != PIX_FMT_YUV444P) ||
302              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
303             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
304             return -1;
305         }
306         break;
307     case CODEC_ID_MJPEG:
308         if (avctx->pix_fmt != PIX_FMT_YUVJ420P &&
309             avctx->pix_fmt != PIX_FMT_YUVJ422P &&
310             ((avctx->pix_fmt != PIX_FMT_YUV420P &&
311               avctx->pix_fmt != PIX_FMT_YUV422P) ||
312              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
313             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
314             return -1;
315         }
316         break;
317     default:
318         if (avctx->pix_fmt != PIX_FMT_YUV420P) {
319             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
320             return -1;
321         }
322     }
323
324     switch (avctx->pix_fmt) {
325     case PIX_FMT_YUVJ422P:
326     case PIX_FMT_YUV422P:
327         s->chroma_format = CHROMA_422;
328         break;
329     case PIX_FMT_YUVJ420P:
330     case PIX_FMT_YUV420P:
331     default:
332         s->chroma_format = CHROMA_420;
333         break;
334     }
335
336     s->bit_rate = avctx->bit_rate;
337     s->width    = avctx->width;
338     s->height   = avctx->height;
339     if (avctx->gop_size > 600 &&
340         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
341         av_log(avctx, AV_LOG_ERROR,
342                "Warning keyframe interval too large! reducing it ...\n");
343         avctx->gop_size = 600;
344     }
345     s->gop_size     = avctx->gop_size;
346     s->avctx        = avctx;
347     s->flags        = avctx->flags;
348     s->flags2       = avctx->flags2;
349     s->max_b_frames = avctx->max_b_frames;
350     s->codec_id     = avctx->codec->id;
351     s->luma_elim_threshold   = avctx->luma_elim_threshold;
352     s->chroma_elim_threshold = avctx->chroma_elim_threshold;
353     s->strict_std_compliance = avctx->strict_std_compliance;
354 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
355     if (avctx->flags & CODEC_FLAG_PART)
356         s->data_partitioning = 1;
357 #endif
358     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
359     s->mpeg_quant         = avctx->mpeg_quant;
360     s->rtp_mode           = !!avctx->rtp_payload_size;
361     s->intra_dc_precision = avctx->intra_dc_precision;
362     s->user_specified_pts = AV_NOPTS_VALUE;
363
364     if (s->gop_size <= 1) {
365         s->intra_only = 1;
366         s->gop_size   = 12;
367     } else {
368         s->intra_only = 0;
369     }
370
371     s->me_method = avctx->me_method;
372
373     /* Fixed QSCALE */
374     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
375
376     s->adaptive_quant = (s->avctx->lumi_masking ||
377                          s->avctx->dark_masking ||
378                          s->avctx->temporal_cplx_masking ||
379                          s->avctx->spatial_cplx_masking  ||
380                          s->avctx->p_masking      ||
381                          s->avctx->border_masking ||
382                          (s->flags & CODEC_FLAG_QP_RD)) &&
383                         !s->fixed_qscale;
384
385     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
386 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
387     s->alternate_scan   = !!(s->flags  & CODEC_FLAG_ALT_SCAN);
388     s->intra_vlc_format = !!(s->flags2 & CODEC_FLAG2_INTRA_VLC);
389     s->q_scale_type     = !!(s->flags2 & CODEC_FLAG2_NON_LINEAR_QUANT);
390     s->obmc             = !!(s->flags  & CODEC_FLAG_OBMC);
391 #endif
392
393     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
394         av_log(avctx, AV_LOG_ERROR,
395                "a vbv buffer size is needed, "
396                "for encoding with a maximum bitrate\n");
397         return -1;
398     }
399
400     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
401         av_log(avctx, AV_LOG_INFO,
402                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
403     }
404
405     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
406         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
407         return -1;
408     }
409
410     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
411         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
412         return -1;
413     }
414
415     if (avctx->rc_max_rate &&
416         avctx->rc_max_rate == avctx->bit_rate &&
417         avctx->rc_max_rate != avctx->rc_min_rate) {
418         av_log(avctx, AV_LOG_INFO,
419                "impossible bitrate constraints, this will fail\n");
420     }
421
422     if (avctx->rc_buffer_size &&
423         avctx->bit_rate * (int64_t)avctx->time_base.num >
424             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
425         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
426         return -1;
427     }
428
429     if (!s->fixed_qscale &&
430         avctx->bit_rate * av_q2d(avctx->time_base) >
431             avctx->bit_rate_tolerance) {
432         av_log(avctx, AV_LOG_ERROR,
433                "bitrate tolerance too small for bitrate\n");
434         return -1;
435     }
436
437     if (s->avctx->rc_max_rate &&
438         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
439         (s->codec_id == CODEC_ID_MPEG1VIDEO ||
440          s->codec_id == CODEC_ID_MPEG2VIDEO) &&
441         90000LL * (avctx->rc_buffer_size - 1) >
442             s->avctx->rc_max_rate * 0xFFFFLL) {
443         av_log(avctx, AV_LOG_INFO,
444                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
445                "specified vbv buffer is too large for the given bitrate!\n");
446     }
447
448     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != CODEC_ID_MPEG4 &&
449         s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P &&
450         s->codec_id != CODEC_ID_FLV1) {
451         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
452         return -1;
453     }
454
455     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
456         av_log(avctx, AV_LOG_ERROR,
457                "OBMC is only supported with simple mb decision\n");
458         return -1;
459     }
460
461 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
462     if (s->obmc && s->codec_id != CODEC_ID_H263 &&
463         s->codec_id != CODEC_ID_H263P) {
464         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
465         return -1;
466     }
467 #endif
468
469     if (s->quarter_sample && s->codec_id != CODEC_ID_MPEG4) {
470         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
471         return -1;
472     }
473
474 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
475     if (s->data_partitioning && s->codec_id != CODEC_ID_MPEG4) {
476         av_log(avctx, AV_LOG_ERROR,
477                "data partitioning not supported by codec\n");
478         return -1;
479     }
480 #endif
481
482     if (s->max_b_frames                    &&
483         s->codec_id != CODEC_ID_MPEG4      &&
484         s->codec_id != CODEC_ID_MPEG1VIDEO &&
485         s->codec_id != CODEC_ID_MPEG2VIDEO) {
486         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
487         return -1;
488     }
489
490     if ((s->codec_id == CODEC_ID_MPEG4 ||
491          s->codec_id == CODEC_ID_H263  ||
492          s->codec_id == CODEC_ID_H263P) &&
493         (avctx->sample_aspect_ratio.num > 255 ||
494          avctx->sample_aspect_ratio.den > 255)) {
495         av_log(avctx, AV_LOG_ERROR,
496                "Invalid pixel aspect ratio %i/%i, limit is 255/255\n",
497                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
498         return -1;
499     }
500
501     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME |
502                      CODEC_FLAG_ALT_SCAN)) &&
503         s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO) {
504         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
505         return -1;
506     }
507
508     // FIXME mpeg2 uses that too
509     if (s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4) {
510         av_log(avctx, AV_LOG_ERROR,
511                "mpeg2 style quantization not supported by codec\n");
512         return -1;
513     }
514
515     if ((s->flags & CODEC_FLAG_CBP_RD) && !avctx->trellis) {
516         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
517         return -1;
518     }
519
520     if ((s->flags & CODEC_FLAG_QP_RD) &&
521         s->avctx->mb_decision != FF_MB_DECISION_RD) {
522         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
523         return -1;
524     }
525
526     if (s->avctx->scenechange_threshold < 1000000000 &&
527         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
528         av_log(avctx, AV_LOG_ERROR,
529                "closed gop with scene change detection are not supported yet, "
530                "set threshold to 1000000000\n");
531         return -1;
532     }
533
534     if ((s->flags2 & CODEC_FLAG2_INTRA_VLC) &&
535         s->codec_id != CODEC_ID_MPEG2VIDEO) {
536         av_log(avctx, AV_LOG_ERROR,
537                "intra vlc table not supported by codec\n");
538         return -1;
539     }
540
541     if (s->flags & CODEC_FLAG_LOW_DELAY) {
542         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
543             av_log(avctx, AV_LOG_ERROR,
544                   "low delay forcing is only available for mpeg2\n");
545             return -1;
546         }
547         if (s->max_b_frames != 0) {
548             av_log(avctx, AV_LOG_ERROR,
549                    "b frames cannot be used with low delay\n");
550             return -1;
551         }
552     }
553
554     if (s->q_scale_type == 1) {
555 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
556         if (s->codec_id != CODEC_ID_MPEG2VIDEO) {
557             av_log(avctx, AV_LOG_ERROR,
558                    "non linear quant is only available for mpeg2\n");
559             return -1;
560         }
561 #endif
562         if (avctx->qmax > 12) {
563             av_log(avctx, AV_LOG_ERROR,
564                    "non linear quant only supports qmax <= 12 currently\n");
565             return -1;
566         }
567     }
568
569     if (s->avctx->thread_count > 1         &&
570         s->codec_id != CODEC_ID_MPEG4      &&
571         s->codec_id != CODEC_ID_MPEG1VIDEO &&
572         s->codec_id != CODEC_ID_MPEG2VIDEO &&
573         (s->codec_id != CODEC_ID_H263P ||
574          !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))) {
575         av_log(avctx, AV_LOG_ERROR,
576                "multi threaded encoding not supported by codec\n");
577         return -1;
578     }
579
580     if (s->avctx->thread_count < 1) {
581         av_log(avctx, AV_LOG_ERROR,
582                "automatic thread number detection not supported by codec,"
583                "patch welcome\n");
584         return -1;
585     }
586
587     if (s->avctx->thread_count > 1)
588         s->rtp_mode = 1;
589
590     if (!avctx->time_base.den || !avctx->time_base.num) {
591         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
592         return -1;
593     }
594
595     i = (INT_MAX / 2 + 128) >> 8;
596     if (avctx->me_threshold >= i) {
597         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
598                i - 1);
599         return -1;
600     }
601     if (avctx->mb_threshold >= i) {
602         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
603                i - 1);
604         return -1;
605     }
606
607     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
608         av_log(avctx, AV_LOG_INFO,
609                "notice: b_frame_strategy only affects the first pass\n");
610         avctx->b_frame_strategy = 0;
611     }
612
613     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
614     if (i > 1) {
615         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
616         avctx->time_base.den /= i;
617         avctx->time_base.num /= i;
618         //return -1;
619     }
620
621     if (s->mpeg_quant || s->codec_id == CODEC_ID_MPEG1VIDEO ||
622         s->codec_id == CODEC_ID_MPEG2VIDEO || s->codec_id == CODEC_ID_MJPEG) {
623         // (a + x * 3 / 8) / x
624         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
625         s->inter_quant_bias = 0;
626     } else {
627         s->intra_quant_bias = 0;
628         // (a - x / 4) / x
629         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
630     }
631
632     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
633         s->intra_quant_bias = avctx->intra_quant_bias;
634     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
635         s->inter_quant_bias = avctx->inter_quant_bias;
636
637     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
638                                   &chroma_v_shift);
639
640     if (avctx->codec_id == CODEC_ID_MPEG4 &&
641         s->avctx->time_base.den > (1 << 16) - 1) {
642         av_log(avctx, AV_LOG_ERROR,
643                "timebase %d/%d not supported by MPEG 4 standard, "
644                "the maximum admitted value for the timebase denominator "
645                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
646                (1 << 16) - 1);
647         return -1;
648     }
649     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
650
651     switch (avctx->codec->id) {
652     case CODEC_ID_MPEG1VIDEO:
653         s->out_format = FMT_MPEG1;
654         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
655         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
656         break;
657     case CODEC_ID_MPEG2VIDEO:
658         s->out_format = FMT_MPEG1;
659         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
660         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
661         s->rtp_mode   = 1;
662         break;
663     case CODEC_ID_LJPEG:
664     case CODEC_ID_MJPEG:
665         s->out_format = FMT_MJPEG;
666         s->intra_only = 1; /* force intra only for jpeg */
667         if (avctx->codec->id == CODEC_ID_LJPEG &&
668             avctx->pix_fmt   == PIX_FMT_BGRA) {
669             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
670             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
671             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
672         } else {
673             s->mjpeg_vsample[0] = 2;
674             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
675             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
676             s->mjpeg_hsample[0] = 2;
677             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
678             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
679         }
680         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
681             ff_mjpeg_encode_init(s) < 0)
682             return -1;
683         avctx->delay = 0;
684         s->low_delay = 1;
685         break;
686     case CODEC_ID_H261:
687         if (!CONFIG_H261_ENCODER)
688             return -1;
689         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
690             av_log(avctx, AV_LOG_ERROR,
691                    "The specified picture size of %dx%d is not valid for the "
692                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
693                     s->width, s->height);
694             return -1;
695         }
696         s->out_format = FMT_H261;
697         avctx->delay  = 0;
698         s->low_delay  = 1;
699         break;
700     case CODEC_ID_H263:
701         if (!CONFIG_H263_ENCODER)
702         return -1;
703         if (ff_match_2uint16(h263_format, FF_ARRAY_ELEMS(h263_format),
704                              s->width, s->height) == 8) {
705             av_log(avctx, AV_LOG_INFO,
706                    "The specified picture size of %dx%d is not valid for "
707                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
708                    "352x288, 704x576, and 1408x1152."
709                    "Try H.263+.\n", s->width, s->height);
710             return -1;
711         }
712         s->out_format = FMT_H263;
713         avctx->delay  = 0;
714         s->low_delay  = 1;
715         break;
716     case CODEC_ID_H263P:
717         s->out_format = FMT_H263;
718         s->h263_plus  = 1;
719         /* Fx */
720 #if FF_API_MPEGVIDEO_GLOBAL_OPTS
721         if (avctx->flags & CODEC_FLAG_H263P_UMV)
722             s->umvplus = 1;
723         if (avctx->flags & CODEC_FLAG_H263P_AIV)
724             s->alt_inter_vlc = 1;
725         if (avctx->flags & CODEC_FLAG_H263P_SLICE_STRUCT)
726             s->h263_slice_structured = 1;
727 #endif
728         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
729         s->modified_quant  = s->h263_aic;
730         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
731         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
732
733         /* /Fx */
734         /* These are just to be sure */
735         avctx->delay = 0;
736         s->low_delay = 1;
737         break;
738     case CODEC_ID_FLV1:
739         s->out_format      = FMT_H263;
740         s->h263_flv        = 2; /* format = 1; 11-bit codes */
741         s->unrestricted_mv = 1;
742         s->rtp_mode  = 0; /* don't allow GOB */
743         avctx->delay = 0;
744         s->low_delay = 1;
745         break;
746     case CODEC_ID_RV10:
747         s->out_format = FMT_H263;
748         avctx->delay  = 0;
749         s->low_delay  = 1;
750         break;
751     case CODEC_ID_RV20:
752         s->out_format      = FMT_H263;
753         avctx->delay       = 0;
754         s->low_delay       = 1;
755         s->modified_quant  = 1;
756         s->h263_aic        = 1;
757         s->h263_plus       = 1;
758         s->loop_filter     = 1;
759         s->unrestricted_mv = 0;
760         break;
761     case CODEC_ID_MPEG4:
762         s->out_format      = FMT_H263;
763         s->h263_pred       = 1;
764         s->unrestricted_mv = 1;
765         s->low_delay       = s->max_b_frames ? 0 : 1;
766         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
767         break;
768     case CODEC_ID_MSMPEG4V2:
769         s->out_format      = FMT_H263;
770         s->h263_pred       = 1;
771         s->unrestricted_mv = 1;
772         s->msmpeg4_version = 2;
773         avctx->delay       = 0;
774         s->low_delay       = 1;
775         break;
776     case CODEC_ID_MSMPEG4V3:
777         s->out_format        = FMT_H263;
778         s->h263_pred         = 1;
779         s->unrestricted_mv   = 1;
780         s->msmpeg4_version   = 3;
781         s->flipflop_rounding = 1;
782         avctx->delay         = 0;
783         s->low_delay         = 1;
784         break;
785     case CODEC_ID_WMV1:
786         s->out_format        = FMT_H263;
787         s->h263_pred         = 1;
788         s->unrestricted_mv   = 1;
789         s->msmpeg4_version   = 4;
790         s->flipflop_rounding = 1;
791         avctx->delay         = 0;
792         s->low_delay         = 1;
793         break;
794     case CODEC_ID_WMV2:
795         s->out_format        = FMT_H263;
796         s->h263_pred         = 1;
797         s->unrestricted_mv   = 1;
798         s->msmpeg4_version   = 5;
799         s->flipflop_rounding = 1;
800         avctx->delay         = 0;
801         s->low_delay         = 1;
802         break;
803     default:
804         return -1;
805     }
806
807     avctx->has_b_frames = !s->low_delay;
808
809     s->encoding = 1;
810
811     s->progressive_frame    =
812     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
813                                                 CODEC_FLAG_INTERLACED_ME  |
814                                                 CODEC_FLAG_ALT_SCAN));
815
816     /* init */
817     if (MPV_common_init(s) < 0)
818         return -1;
819
820     if (!s->dct_quantize)
821         s->dct_quantize = dct_quantize_c;
822     if (!s->denoise_dct)
823         s->denoise_dct  = denoise_dct_c;
824     s->fast_dct_quantize = s->dct_quantize;
825     if (avctx->trellis)
826         s->dct_quantize  = dct_quantize_trellis_c;
827
828     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
829         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
830
831     s->quant_precision = 5;
832
833     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
834     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
835
836     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
837         ff_h261_encode_init(s);
838     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
839         h263_encode_init(s);
840     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
841         ff_msmpeg4_encode_init(s);
842     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
843         && s->out_format == FMT_MPEG1)
844         ff_mpeg1_encode_init(s);
845
846     /* init q matrix */
847     for (i = 0; i < 64; i++) {
848         int j = s->dsp.idct_permutation[i];
849         if (CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4 &&
850             s->mpeg_quant) {
851             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
852             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
853         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
854             s->intra_matrix[j] =
855             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
856         } else {
857             /* mpeg1/2 */
858             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
859             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
860         }
861         if (s->avctx->intra_matrix)
862             s->intra_matrix[j] = s->avctx->intra_matrix[i];
863         if (s->avctx->inter_matrix)
864             s->inter_matrix[j] = s->avctx->inter_matrix[i];
865     }
866
867     /* precompute matrix */
868     /* for mjpeg, we do include qscale in the matrix */
869     if (s->out_format != FMT_MJPEG) {
870         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
871                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
872                           31, 1);
873         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
874                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
875                           31, 0);
876     }
877
878     if (ff_rate_control_init(s) < 0)
879         return -1;
880
881     return 0;
882 }
883
884 av_cold int MPV_encode_end(AVCodecContext *avctx)
885 {
886     MpegEncContext *s = avctx->priv_data;
887
888     ff_rate_control_uninit(s);
889
890     MPV_common_end(s);
891     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
892         s->out_format == FMT_MJPEG)
893         ff_mjpeg_encode_close(s);
894
895     av_freep(&avctx->extradata);
896
897     return 0;
898 }
899
900 static int get_sae(uint8_t *src, int ref, int stride)
901 {
902     int x,y;
903     int acc = 0;
904
905     for (y = 0; y < 16; y++) {
906         for (x = 0; x < 16; x++) {
907             acc += FFABS(src[x + y * stride] - ref);
908         }
909     }
910
911     return acc;
912 }
913
914 static int get_intra_count(MpegEncContext *s, uint8_t *src,
915                            uint8_t *ref, int stride)
916 {
917     int x, y, w, h;
918     int acc = 0;
919
920     w = s->width  & ~15;
921     h = s->height & ~15;
922
923     for (y = 0; y < h; y += 16) {
924         for (x = 0; x < w; x += 16) {
925             int offset = x + y * stride;
926             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
927                                      16);
928             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
929             int sae  = get_sae(src + offset, mean, stride);
930
931             acc += sae + 500 < sad;
932         }
933     }
934     return acc;
935 }
936
937
938 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
939 {
940     AVFrame *pic = NULL;
941     int64_t pts;
942     int i;
943     const int encoding_delay = s->max_b_frames;
944     int direct = 1;
945
946     if (pic_arg) {
947         pts = pic_arg->pts;
948         pic_arg->display_picture_number = s->input_picture_number++;
949
950         if (pts != AV_NOPTS_VALUE) {
951             if (s->user_specified_pts != AV_NOPTS_VALUE) {
952                 int64_t time = pts;
953                 int64_t last = s->user_specified_pts;
954
955                 if (time <= last) {
956                     av_log(s->avctx, AV_LOG_ERROR,
957                            "Error, Invalid timestamp=%"PRId64", "
958                            "last=%"PRId64"\n", pts, s->user_specified_pts);
959                     return -1;
960                 }
961             }
962             s->user_specified_pts = pts;
963         } else {
964             if (s->user_specified_pts != AV_NOPTS_VALUE) {
965                 s->user_specified_pts =
966                 pts = s->user_specified_pts + 1;
967                 av_log(s->avctx, AV_LOG_INFO,
968                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
969                        pts);
970             } else {
971                 pts = pic_arg->display_picture_number;
972             }
973         }
974     }
975
976   if (pic_arg) {
977     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
978         direct = 0;
979     if (pic_arg->linesize[0] != s->linesize)
980         direct = 0;
981     if (pic_arg->linesize[1] != s->uvlinesize)
982         direct = 0;
983     if (pic_arg->linesize[2] != s->uvlinesize)
984         direct = 0;
985
986     //av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0],
987     //       pic_arg->linesize[1], s->linesize, s->uvlinesize);
988
989     if (direct) {
990         i = ff_find_unused_picture(s, 1);
991         if (i < 0)
992             return i;
993
994         pic = (AVFrame *) &s->picture[i];
995         pic->reference = 3;
996
997         for (i = 0; i < 4; i++) {
998             pic->data[i]     = pic_arg->data[i];
999             pic->linesize[i] = pic_arg->linesize[i];
1000         }
1001         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1002             return -1;
1003         }
1004     } else {
1005         i = ff_find_unused_picture(s, 0);
1006         if (i < 0)
1007             return i;
1008
1009         pic = (AVFrame *) &s->picture[i];
1010         pic->reference = 3;
1011
1012         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1013             return -1;
1014         }
1015
1016         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1017             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1018             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1019             // empty
1020         } else {
1021             int h_chroma_shift, v_chroma_shift;
1022             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1023                                           &v_chroma_shift);
1024
1025             for (i = 0; i < 3; i++) {
1026                 int src_stride = pic_arg->linesize[i];
1027                 int dst_stride = i ? s->uvlinesize : s->linesize;
1028                 int h_shift = i ? h_chroma_shift : 0;
1029                 int v_shift = i ? v_chroma_shift : 0;
1030                 int w = s->width  >> h_shift;
1031                 int h = s->height >> v_shift;
1032                 uint8_t *src = pic_arg->data[i];
1033                 uint8_t *dst = pic->data[i];
1034
1035                 if (!s->avctx->rc_buffer_size)
1036                     dst += INPLACE_OFFSET;
1037
1038                 if (src_stride == dst_stride)
1039                     memcpy(dst, src, src_stride * h);
1040                 else {
1041                     while (h--) {
1042                         memcpy(dst, src, w);
1043                         dst += dst_stride;
1044                         src += src_stride;
1045                     }
1046                 }
1047             }
1048         }
1049     }
1050     copy_picture_attributes(s, pic, pic_arg);
1051     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1052   }
1053
1054     /* shift buffer entries */
1055     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1056         s->input_picture[i - 1] = s->input_picture[i];
1057
1058     s->input_picture[encoding_delay] = (Picture*) pic;
1059
1060     return 0;
1061 }
1062
1063 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1064 {
1065     int x, y, plane;
1066     int score = 0;
1067     int64_t score64 = 0;
1068
1069     for (plane = 0; plane < 3; plane++) {
1070         const int stride = p->f.linesize[plane];
1071         const int bw = plane ? 1 : 2;
1072         for (y = 0; y < s->mb_height * bw; y++) {
1073             for (x = 0; x < s->mb_width * bw; x++) {
1074                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1075                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1076                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1077                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1078
1079                 switch (s->avctx->frame_skip_exp) {
1080                 case 0: score    =  FFMAX(score, v);          break;
1081                 case 1: score   += FFABS(v);                  break;
1082                 case 2: score   += v * v;                     break;
1083                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1084                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1085                 }
1086             }
1087         }
1088     }
1089
1090     if (score)
1091         score64 = score;
1092
1093     if (score64 < s->avctx->frame_skip_threshold)
1094         return 1;
1095     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1096         return 1;
1097     return 0;
1098 }
1099
1100 static int estimate_best_b_count(MpegEncContext *s)
1101 {
1102     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1103     AVCodecContext *c = avcodec_alloc_context3(NULL);
1104     AVFrame input[FF_MAX_B_FRAMES + 2];
1105     const int scale = s->avctx->brd_scale;
1106     int i, j, out_size, p_lambda, b_lambda, lambda2;
1107     int outbuf_size  = s->width * s->height; // FIXME
1108     uint8_t *outbuf  = av_malloc(outbuf_size);
1109     int64_t best_rd  = INT64_MAX;
1110     int best_b_count = -1;
1111
1112     assert(scale >= 0 && scale <= 3);
1113
1114     //emms_c();
1115     //s->next_picture_ptr->quality;
1116     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1117     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1118     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1119     if (!b_lambda) // FIXME we should do this somewhere else
1120         b_lambda = p_lambda;
1121     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1122                FF_LAMBDA_SHIFT;
1123
1124     c->width        = s->width  >> scale;
1125     c->height       = s->height >> scale;
1126     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1127                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1128     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1129     c->mb_decision  = s->avctx->mb_decision;
1130     c->me_cmp       = s->avctx->me_cmp;
1131     c->mb_cmp       = s->avctx->mb_cmp;
1132     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1133     c->pix_fmt      = PIX_FMT_YUV420P;
1134     c->time_base    = s->avctx->time_base;
1135     c->max_b_frames = s->max_b_frames;
1136
1137     if (avcodec_open2(c, codec, NULL) < 0)
1138         return -1;
1139
1140     for (i = 0; i < s->max_b_frames + 2; i++) {
1141         int ysize = c->width * c->height;
1142         int csize = (c->width / 2) * (c->height / 2);
1143         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1144                                                 s->next_picture_ptr;
1145
1146         avcodec_get_frame_defaults(&input[i]);
1147         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1148         input[i].data[1]     = input[i].data[0] + ysize;
1149         input[i].data[2]     = input[i].data[1] + csize;
1150         input[i].linesize[0] = c->width;
1151         input[i].linesize[1] =
1152         input[i].linesize[2] = c->width / 2;
1153
1154         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1155             pre_input = *pre_input_ptr;
1156
1157             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1158                 pre_input.f.data[0] += INPLACE_OFFSET;
1159                 pre_input.f.data[1] += INPLACE_OFFSET;
1160                 pre_input.f.data[2] += INPLACE_OFFSET;
1161             }
1162
1163             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1164                                  pre_input.f.data[0], pre_input.f.linesize[0],
1165                                  c->width,      c->height);
1166             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1167                                  pre_input.f.data[1], pre_input.f.linesize[1],
1168                                  c->width >> 1, c->height >> 1);
1169             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1170                                  pre_input.f.data[2], pre_input.f.linesize[2],
1171                                  c->width >> 1, c->height >> 1);
1172         }
1173     }
1174
1175     for (j = 0; j < s->max_b_frames + 1; j++) {
1176         int64_t rd = 0;
1177
1178         if (!s->input_picture[j])
1179             break;
1180
1181         c->error[0] = c->error[1] = c->error[2] = 0;
1182
1183         input[0].pict_type = AV_PICTURE_TYPE_I;
1184         input[0].quality   = 1 * FF_QP2LAMBDA;
1185         out_size           = avcodec_encode_video(c, outbuf,
1186                                                   outbuf_size, &input[0]);
1187         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1188
1189         for (i = 0; i < s->max_b_frames + 1; i++) {
1190             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1191
1192             input[i + 1].pict_type = is_p ?
1193                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1194             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1195             out_size = avcodec_encode_video(c, outbuf, outbuf_size,
1196                                             &input[i + 1]);
1197             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1198         }
1199
1200         /* get the delayed frames */
1201         while (out_size) {
1202             out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
1203             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1204         }
1205
1206         rd += c->error[0] + c->error[1] + c->error[2];
1207
1208         if (rd < best_rd) {
1209             best_rd = rd;
1210             best_b_count = j;
1211         }
1212     }
1213
1214     av_freep(&outbuf);
1215     avcodec_close(c);
1216     av_freep(&c);
1217
1218     for (i = 0; i < s->max_b_frames + 2; i++) {
1219         av_freep(&input[i].data[0]);
1220     }
1221
1222     return best_b_count;
1223 }
1224
1225 static int select_input_picture(MpegEncContext *s)
1226 {
1227     int i;
1228
1229     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1230         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1231     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1232
1233     /* set next picture type & ordering */
1234     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1235         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1236             s->next_picture_ptr == NULL || s->intra_only) {
1237             s->reordered_input_picture[0] = s->input_picture[0];
1238             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1239             s->reordered_input_picture[0]->f.coded_picture_number =
1240                 s->coded_picture_number++;
1241         } else {
1242             int b_frames;
1243
1244             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1245                 if (s->picture_in_gop_number < s->gop_size &&
1246                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1247                     // FIXME check that te gop check above is +-1 correct
1248                     //av_log(NULL, AV_LOG_DEBUG, "skip %p %"PRId64"\n",
1249                     //       s->input_picture[0]->f.data[0],
1250                     //       s->input_picture[0]->pts);
1251
1252                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1253                         for (i = 0; i < 4; i++)
1254                             s->input_picture[0]->f.data[i] = NULL;
1255                         s->input_picture[0]->f.type = 0;
1256                     } else {
1257                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1258                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1259
1260                         s->avctx->release_buffer(s->avctx,
1261                                                  (AVFrame *) s->input_picture[0]);
1262                     }
1263
1264                     emms_c();
1265                     ff_vbv_update(s, 0);
1266
1267                     goto no_output_pic;
1268                 }
1269             }
1270
1271             if (s->flags & CODEC_FLAG_PASS2) {
1272                 for (i = 0; i < s->max_b_frames + 1; i++) {
1273                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1274
1275                     if (pict_num >= s->rc_context.num_entries)
1276                         break;
1277                     if (!s->input_picture[i]) {
1278                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1279                         break;
1280                     }
1281
1282                     s->input_picture[i]->f.pict_type =
1283                         s->rc_context.entry[pict_num].new_pict_type;
1284                 }
1285             }
1286
1287             if (s->avctx->b_frame_strategy == 0) {
1288                 b_frames = s->max_b_frames;
1289                 while (b_frames && !s->input_picture[b_frames])
1290                     b_frames--;
1291             } else if (s->avctx->b_frame_strategy == 1) {
1292                 for (i = 1; i < s->max_b_frames + 1; i++) {
1293                     if (s->input_picture[i] &&
1294                         s->input_picture[i]->b_frame_score == 0) {
1295                         s->input_picture[i]->b_frame_score =
1296                             get_intra_count(s,
1297                                             s->input_picture[i    ]->f.data[0],
1298                                             s->input_picture[i - 1]->f.data[0],
1299                                             s->linesize) + 1;
1300                     }
1301                 }
1302                 for (i = 0; i < s->max_b_frames + 1; i++) {
1303                     if (s->input_picture[i] == NULL ||
1304                         s->input_picture[i]->b_frame_score - 1 >
1305                             s->mb_num / s->avctx->b_sensitivity)
1306                         break;
1307                 }
1308
1309                 b_frames = FFMAX(0, i - 1);
1310
1311                 /* reset scores */
1312                 for (i = 0; i < b_frames + 1; i++) {
1313                     s->input_picture[i]->b_frame_score = 0;
1314                 }
1315             } else if (s->avctx->b_frame_strategy == 2) {
1316                 b_frames = estimate_best_b_count(s);
1317             } else {
1318                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1319                 b_frames = 0;
1320             }
1321
1322             emms_c();
1323             //static int b_count = 0;
1324             //b_count += b_frames;
1325             //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
1326
1327             for (i = b_frames - 1; i >= 0; i--) {
1328                 int type = s->input_picture[i]->f.pict_type;
1329                 if (type && type != AV_PICTURE_TYPE_B)
1330                     b_frames = i;
1331             }
1332             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1333                 b_frames == s->max_b_frames) {
1334                 av_log(s->avctx, AV_LOG_ERROR,
1335                        "warning, too many b frames in a row\n");
1336             }
1337
1338             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1339                 if ((s->flags2 & CODEC_FLAG2_STRICT_GOP) &&
1340                     s->gop_size > s->picture_in_gop_number) {
1341                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1342                 } else {
1343                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1344                         b_frames = 0;
1345                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1346                 }
1347             }
1348
1349             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1350                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1351                 b_frames--;
1352
1353             s->reordered_input_picture[0] = s->input_picture[b_frames];
1354             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1355                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1356             s->reordered_input_picture[0]->f.coded_picture_number =
1357                 s->coded_picture_number++;
1358             for (i = 0; i < b_frames; i++) {
1359                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1360                 s->reordered_input_picture[i + 1]->f.pict_type =
1361                     AV_PICTURE_TYPE_B;
1362                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1363                     s->coded_picture_number++;
1364             }
1365         }
1366     }
1367 no_output_pic:
1368     if (s->reordered_input_picture[0]) {
1369         s->reordered_input_picture[0]->f.reference =
1370            s->reordered_input_picture[0]->f.pict_type !=
1371                AV_PICTURE_TYPE_B ? 3 : 0;
1372
1373         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1374
1375         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1376             s->avctx->rc_buffer_size) {
1377             // input is a shared pix, so we can't modifiy it -> alloc a new
1378             // one & ensure that the shared one is reuseable
1379
1380             Picture *pic;
1381             int i = ff_find_unused_picture(s, 0);
1382             if (i < 0)
1383                 return i;
1384             pic = &s->picture[i];
1385
1386             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1387             if (ff_alloc_picture(s, pic, 0) < 0) {
1388                 return -1;
1389             }
1390
1391             /* mark us unused / free shared pic */
1392             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1393                 s->avctx->release_buffer(s->avctx,
1394                                          (AVFrame *) s->reordered_input_picture[0]);
1395             for (i = 0; i < 4; i++)
1396                 s->reordered_input_picture[0]->f.data[i] = NULL;
1397             s->reordered_input_picture[0]->f.type = 0;
1398
1399             copy_picture_attributes(s, (AVFrame *) pic,
1400                                     (AVFrame *) s->reordered_input_picture[0]);
1401
1402             s->current_picture_ptr = pic;
1403         } else {
1404             // input is not a shared pix -> reuse buffer for current_pix
1405
1406             assert(s->reordered_input_picture[0]->f.type ==
1407                        FF_BUFFER_TYPE_USER ||
1408                    s->reordered_input_picture[0]->f.type ==
1409                        FF_BUFFER_TYPE_INTERNAL);
1410
1411             s->current_picture_ptr = s->reordered_input_picture[0];
1412             for (i = 0; i < 4; i++) {
1413                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1414             }
1415         }
1416         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1417
1418         s->picture_number = s->new_picture.f.display_picture_number;
1419         //printf("dpn:%d\n", s->picture_number);
1420     } else {
1421         memset(&s->new_picture, 0, sizeof(Picture));
1422     }
1423     return 0;
1424 }
1425
1426 int MPV_encode_picture(AVCodecContext *avctx,
1427                        unsigned char *buf, int buf_size, void *data)
1428 {
1429     MpegEncContext *s = avctx->priv_data;
1430     AVFrame *pic_arg  = data;
1431     int i, stuffing_count;
1432     int context_count = s->slice_context_count;
1433
1434     for (i = 0; i < context_count; i++) {
1435         int start_y = s->thread_context[i]->start_mb_y;
1436         int   end_y = s->thread_context[i]->  end_mb_y;
1437         int h       = s->mb_height;
1438         uint8_t *start = buf + (size_t)(((int64_t) buf_size) * start_y / h);
1439         uint8_t *end   = buf + (size_t)(((int64_t) buf_size) *   end_y / h);
1440
1441         init_put_bits(&s->thread_context[i]->pb, start, end - start);
1442     }
1443
1444     s->picture_in_gop_number++;
1445
1446     if (load_input_picture(s, pic_arg) < 0)
1447         return -1;
1448
1449     if (select_input_picture(s) < 0) {
1450         return -1;
1451     }
1452
1453     /* output? */
1454     if (s->new_picture.f.data[0]) {
1455         s->pict_type = s->new_picture.f.pict_type;
1456         //emms_c();
1457         //printf("qs:%f %f %d\n", s->new_picture.quality,
1458         //       s->current_picture.quality, s->qscale);
1459         MPV_frame_start(s, avctx);
1460 vbv_retry:
1461         if (encode_picture(s, s->picture_number) < 0)
1462             return -1;
1463
1464         avctx->header_bits = s->header_bits;
1465         avctx->mv_bits     = s->mv_bits;
1466         avctx->misc_bits   = s->misc_bits;
1467         avctx->i_tex_bits  = s->i_tex_bits;
1468         avctx->p_tex_bits  = s->p_tex_bits;
1469         avctx->i_count     = s->i_count;
1470         // FIXME f/b_count in avctx
1471         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1472         avctx->skip_count  = s->skip_count;
1473
1474         MPV_frame_end(s);
1475
1476         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1477             ff_mjpeg_encode_picture_trailer(s);
1478
1479         if (avctx->rc_buffer_size) {
1480             RateControlContext *rcc = &s->rc_context;
1481             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1482
1483             if (put_bits_count(&s->pb) > max_size &&
1484                 s->lambda < s->avctx->lmax) {
1485                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1486                                        (s->qscale + 1) / s->qscale);
1487                 if (s->adaptive_quant) {
1488                     int i;
1489                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1490                         s->lambda_table[i] =
1491                             FFMAX(s->lambda_table[i] + 1,
1492                                   s->lambda_table[i] * (s->qscale + 1) /
1493                                   s->qscale);
1494                 }
1495                 s->mb_skipped = 0;        // done in MPV_frame_start()
1496                 // done in encode_picture() so we must undo it
1497                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1498                     if (s->flipflop_rounding          ||
1499                         s->codec_id == CODEC_ID_H263P ||
1500                         s->codec_id == CODEC_ID_MPEG4)
1501                         s->no_rounding ^= 1;
1502                 }
1503                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1504                     s->time_base       = s->last_time_base;
1505                     s->last_non_b_time = s->time - s->pp_time;
1506                 }
1507                 //av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda);
1508                 for (i = 0; i < context_count; i++) {
1509                     PutBitContext *pb = &s->thread_context[i]->pb;
1510                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1511                 }
1512                 goto vbv_retry;
1513             }
1514
1515             assert(s->avctx->rc_max_rate);
1516         }
1517
1518         if (s->flags & CODEC_FLAG_PASS1)
1519             ff_write_pass1_stats(s);
1520
1521         for (i = 0; i < 4; i++) {
1522             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1523             avctx->error[i] += s->current_picture_ptr->f.error[i];
1524         }
1525
1526         if (s->flags & CODEC_FLAG_PASS1)
1527             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1528                    avctx->i_tex_bits + avctx->p_tex_bits ==
1529                        put_bits_count(&s->pb));
1530         flush_put_bits(&s->pb);
1531         s->frame_bits  = put_bits_count(&s->pb);
1532
1533         stuffing_count = ff_vbv_update(s, s->frame_bits);
1534         if (stuffing_count) {
1535             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1536                     stuffing_count + 50) {
1537                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1538                 return -1;
1539             }
1540
1541             switch (s->codec_id) {
1542             case CODEC_ID_MPEG1VIDEO:
1543             case CODEC_ID_MPEG2VIDEO:
1544                 while (stuffing_count--) {
1545                     put_bits(&s->pb, 8, 0);
1546                 }
1547             break;
1548             case CODEC_ID_MPEG4:
1549                 put_bits(&s->pb, 16, 0);
1550                 put_bits(&s->pb, 16, 0x1C3);
1551                 stuffing_count -= 4;
1552                 while (stuffing_count--) {
1553                     put_bits(&s->pb, 8, 0xFF);
1554                 }
1555             break;
1556             default:
1557                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1558             }
1559             flush_put_bits(&s->pb);
1560             s->frame_bits  = put_bits_count(&s->pb);
1561         }
1562
1563         /* update mpeg1/2 vbv_delay for CBR */
1564         if (s->avctx->rc_max_rate                          &&
1565             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1566             s->out_format == FMT_MPEG1                     &&
1567             90000LL * (avctx->rc_buffer_size - 1) <=
1568                 s->avctx->rc_max_rate * 0xFFFFLL) {
1569             int vbv_delay, min_delay;
1570             double inbits  = s->avctx->rc_max_rate *
1571                              av_q2d(s->avctx->time_base);
1572             int    minbits = s->frame_bits - 8 *
1573                              (s->vbv_delay_ptr - s->pb.buf - 1);
1574             double bits    = s->rc_context.buffer_index + minbits - inbits;
1575
1576             if (bits < 0)
1577                 av_log(s->avctx, AV_LOG_ERROR,
1578                        "Internal error, negative bits\n");
1579
1580             assert(s->repeat_first_field == 0);
1581
1582             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1583             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1584                         s->avctx->rc_max_rate;
1585
1586             vbv_delay = FFMAX(vbv_delay, min_delay);
1587
1588             assert(vbv_delay < 0xFFFF);
1589
1590             s->vbv_delay_ptr[0] &= 0xF8;
1591             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1592             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1593             s->vbv_delay_ptr[2] &= 0x07;
1594             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1595             avctx->vbv_delay     = vbv_delay * 300;
1596         }
1597         s->total_bits     += s->frame_bits;
1598         avctx->frame_bits  = s->frame_bits;
1599     } else {
1600         assert((put_bits_ptr(&s->pb) == s->pb.buf));
1601         s->frame_bits = 0;
1602     }
1603     assert((s->frame_bits & 7) == 0);
1604
1605     return s->frame_bits / 8;
1606 }
1607
1608 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1609                                                 int n, int threshold)
1610 {
1611     static const char tab[64] = {
1612         3, 2, 2, 1, 1, 1, 1, 1,
1613         1, 1, 1, 1, 1, 1, 1, 1,
1614         1, 1, 1, 1, 1, 1, 1, 1,
1615         0, 0, 0, 0, 0, 0, 0, 0,
1616         0, 0, 0, 0, 0, 0, 0, 0,
1617         0, 0, 0, 0, 0, 0, 0, 0,
1618         0, 0, 0, 0, 0, 0, 0, 0,
1619         0, 0, 0, 0, 0, 0, 0, 0
1620     };
1621     int score = 0;
1622     int run = 0;
1623     int i;
1624     DCTELEM *block = s->block[n];
1625     const int last_index = s->block_last_index[n];
1626     int skip_dc;
1627
1628     if (threshold < 0) {
1629         skip_dc = 0;
1630         threshold = -threshold;
1631     } else
1632         skip_dc = 1;
1633
1634     /* Are all we could set to zero already zero? */
1635     if (last_index <= skip_dc - 1)
1636         return;
1637
1638     for (i = 0; i <= last_index; i++) {
1639         const int j = s->intra_scantable.permutated[i];
1640         const int level = FFABS(block[j]);
1641         if (level == 1) {
1642             if (skip_dc && i == 0)
1643                 continue;
1644             score += tab[run];
1645             run = 0;
1646         } else if (level > 1) {
1647             return;
1648         } else {
1649             run++;
1650         }
1651     }
1652     if (score >= threshold)
1653         return;
1654     for (i = skip_dc; i <= last_index; i++) {
1655         const int j = s->intra_scantable.permutated[i];
1656         block[j] = 0;
1657     }
1658     if (block[0])
1659         s->block_last_index[n] = 0;
1660     else
1661         s->block_last_index[n] = -1;
1662 }
1663
1664 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1665                                int last_index)
1666 {
1667     int i;
1668     const int maxlevel = s->max_qcoeff;
1669     const int minlevel = s->min_qcoeff;
1670     int overflow = 0;
1671
1672     if (s->mb_intra) {
1673         i = 1; // skip clipping of intra dc
1674     } else
1675         i = 0;
1676
1677     for (; i <= last_index; i++) {
1678         const int j = s->intra_scantable.permutated[i];
1679         int level = block[j];
1680
1681         if (level > maxlevel) {
1682             level = maxlevel;
1683             overflow++;
1684         } else if (level < minlevel) {
1685             level = minlevel;
1686             overflow++;
1687         }
1688
1689         block[j] = level;
1690     }
1691
1692     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1693         av_log(s->avctx, AV_LOG_INFO,
1694                "warning, clipping %d dct coefficients to %d..%d\n",
1695                overflow, minlevel, maxlevel);
1696 }
1697
1698 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1699 {
1700     int x, y;
1701     // FIXME optimize
1702     for (y = 0; y < 8; y++) {
1703         for (x = 0; x < 8; x++) {
1704             int x2, y2;
1705             int sum = 0;
1706             int sqr = 0;
1707             int count = 0;
1708
1709             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1710                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1711                     int v = ptr[x2 + y2 * stride];
1712                     sum += v;
1713                     sqr += v * v;
1714                     count++;
1715                 }
1716             }
1717             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1718         }
1719     }
1720 }
1721
1722 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1723                                                 int motion_x, int motion_y,
1724                                                 int mb_block_height,
1725                                                 int mb_block_count)
1726 {
1727     int16_t weight[8][64];
1728     DCTELEM orig[8][64];
1729     const int mb_x = s->mb_x;
1730     const int mb_y = s->mb_y;
1731     int i;
1732     int skip_dct[8];
1733     int dct_offset = s->linesize * 8; // default for progressive frames
1734     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1735     int wrap_y, wrap_c;
1736
1737     for (i = 0; i < mb_block_count; i++)
1738         skip_dct[i] = s->skipdct;
1739
1740     if (s->adaptive_quant) {
1741         const int last_qp = s->qscale;
1742         const int mb_xy = mb_x + mb_y * s->mb_stride;
1743
1744         s->lambda = s->lambda_table[mb_xy];
1745         update_qscale(s);
1746
1747         if (!(s->flags & CODEC_FLAG_QP_RD)) {
1748             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1749             s->dquant = s->qscale - last_qp;
1750
1751             if (s->out_format == FMT_H263) {
1752                 s->dquant = av_clip(s->dquant, -2, 2);
1753
1754                 if (s->codec_id == CODEC_ID_MPEG4) {
1755                     if (!s->mb_intra) {
1756                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1757                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1758                                 s->dquant = 0;
1759                         }
1760                         if (s->mv_type == MV_TYPE_8X8)
1761                             s->dquant = 0;
1762                     }
1763                 }
1764             }
1765         }
1766         ff_set_qscale(s, last_qp + s->dquant);
1767     } else if (s->flags & CODEC_FLAG_QP_RD)
1768         ff_set_qscale(s, s->qscale + s->dquant);
1769
1770     wrap_y = s->linesize;
1771     wrap_c = s->uvlinesize;
1772     ptr_y  = s->new_picture.f.data[0] +
1773              (mb_y * 16 * wrap_y)              + mb_x * 16;
1774     ptr_cb = s->new_picture.f.data[1] +
1775              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1776     ptr_cr = s->new_picture.f.data[2] +
1777              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1778
1779     if (mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) {
1780         uint8_t *ebuf = s->edge_emu_buffer + 32;
1781         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1782                                 mb_y * 16, s->width, s->height);
1783         ptr_y = ebuf;
1784         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1785                                 mb_block_height, mb_x * 8, mb_y * 8,
1786                                 s->width >> 1, s->height >> 1);
1787         ptr_cb = ebuf + 18 * wrap_y;
1788         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1789                                 mb_block_height, mb_x * 8, mb_y * 8,
1790                                 s->width >> 1, s->height >> 1);
1791         ptr_cr = ebuf + 18 * wrap_y + 8;
1792     }
1793
1794     if (s->mb_intra) {
1795         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1796             int progressive_score, interlaced_score;
1797
1798             s->interlaced_dct = 0;
1799             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1800                                                     NULL, wrap_y, 8) +
1801                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1802                                                     NULL, wrap_y, 8) - 400;
1803
1804             if (progressive_score > 0) {
1805                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1806                                                        NULL, wrap_y * 2, 8) +
1807                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1808                                                        NULL, wrap_y * 2, 8);
1809                 if (progressive_score > interlaced_score) {
1810                     s->interlaced_dct = 1;
1811
1812                     dct_offset = wrap_y;
1813                     wrap_y <<= 1;
1814                     if (s->chroma_format == CHROMA_422)
1815                         wrap_c <<= 1;
1816                 }
1817             }
1818         }
1819
1820         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1821         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1822         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1823         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1824
1825         if (s->flags & CODEC_FLAG_GRAY) {
1826             skip_dct[4] = 1;
1827             skip_dct[5] = 1;
1828         } else {
1829             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1830             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1831             if (!s->chroma_y_shift) { /* 422 */
1832                 s->dsp.get_pixels(s->block[6],
1833                                   ptr_cb + (dct_offset >> 1), wrap_c);
1834                 s->dsp.get_pixels(s->block[7],
1835                                   ptr_cr + (dct_offset >> 1), wrap_c);
1836             }
1837         }
1838     } else {
1839         op_pixels_func (*op_pix)[4];
1840         qpel_mc_func (*op_qpix)[16];
1841         uint8_t *dest_y, *dest_cb, *dest_cr;
1842
1843         dest_y  = s->dest[0];
1844         dest_cb = s->dest[1];
1845         dest_cr = s->dest[2];
1846
1847         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1848             op_pix  = s->dsp.put_pixels_tab;
1849             op_qpix = s->dsp.put_qpel_pixels_tab;
1850         } else {
1851             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1852             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1853         }
1854
1855         if (s->mv_dir & MV_DIR_FORWARD) {
1856             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data,
1857                        op_pix, op_qpix);
1858             op_pix  = s->dsp.avg_pixels_tab;
1859             op_qpix = s->dsp.avg_qpel_pixels_tab;
1860         }
1861         if (s->mv_dir & MV_DIR_BACKWARD) {
1862             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data,
1863                        op_pix, op_qpix);
1864         }
1865
1866         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1867             int progressive_score, interlaced_score;
1868
1869             s->interlaced_dct = 0;
1870             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1871                                                     ptr_y,              wrap_y,
1872                                                     8) +
1873                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1874                                                     ptr_y + wrap_y * 8, wrap_y,
1875                                                     8) - 400;
1876
1877             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1878                 progressive_score -= 400;
1879
1880             if (progressive_score > 0) {
1881                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1882                                                        ptr_y,
1883                                                        wrap_y * 2, 8) +
1884                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1885                                                        ptr_y + wrap_y,
1886                                                        wrap_y * 2, 8);
1887
1888                 if (progressive_score > interlaced_score) {
1889                     s->interlaced_dct = 1;
1890
1891                     dct_offset = wrap_y;
1892                     wrap_y <<= 1;
1893                     if (s->chroma_format == CHROMA_422)
1894                         wrap_c <<= 1;
1895                 }
1896             }
1897         }
1898
1899         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1900         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1901         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1902                            dest_y + dct_offset, wrap_y);
1903         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1904                            dest_y + dct_offset + 8, wrap_y);
1905
1906         if (s->flags & CODEC_FLAG_GRAY) {
1907             skip_dct[4] = 1;
1908             skip_dct[5] = 1;
1909         } else {
1910             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1911             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1912             if (!s->chroma_y_shift) { /* 422 */
1913                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1914                                    dest_cb + (dct_offset >> 1), wrap_c);
1915                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1916                                    dest_cr + (dct_offset >> 1), wrap_c);
1917             }
1918         }
1919         /* pre quantization */
1920         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1921                 2 * s->qscale * s->qscale) {
1922             // FIXME optimize
1923             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1924                               wrap_y, 8) < 20 * s->qscale)
1925                 skip_dct[0] = 1;
1926             if (s->dsp.sad[1](NULL, ptr_y + 8,
1927                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1928                 skip_dct[1] = 1;
1929             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1930                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1931                 skip_dct[2] = 1;
1932             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1933                               dest_y + dct_offset + 8,
1934                               wrap_y, 8) < 20 * s->qscale)
1935                 skip_dct[3] = 1;
1936             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1937                               wrap_c, 8) < 20 * s->qscale)
1938                 skip_dct[4] = 1;
1939             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1940                               wrap_c, 8) < 20 * s->qscale)
1941                 skip_dct[5] = 1;
1942             if (!s->chroma_y_shift) { /* 422 */
1943                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
1944                                   dest_cb + (dct_offset >> 1),
1945                                   wrap_c, 8) < 20 * s->qscale)
1946                     skip_dct[6] = 1;
1947                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
1948                                   dest_cr + (dct_offset >> 1),
1949                                   wrap_c, 8) < 20 * s->qscale)
1950                     skip_dct[7] = 1;
1951             }
1952         }
1953     }
1954
1955     if (s->avctx->quantizer_noise_shaping) {
1956         if (!skip_dct[0])
1957             get_visual_weight(weight[0], ptr_y                 , wrap_y);
1958         if (!skip_dct[1])
1959             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
1960         if (!skip_dct[2])
1961             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
1962         if (!skip_dct[3])
1963             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
1964         if (!skip_dct[4])
1965             get_visual_weight(weight[4], ptr_cb                , wrap_c);
1966         if (!skip_dct[5])
1967             get_visual_weight(weight[5], ptr_cr                , wrap_c);
1968         if (!s->chroma_y_shift) { /* 422 */
1969             if (!skip_dct[6])
1970                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
1971                                   wrap_c);
1972             if (!skip_dct[7])
1973                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
1974                                   wrap_c);
1975         }
1976         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
1977     }
1978
1979     /* DCT & quantize */
1980     assert(s->out_format != FMT_MJPEG || s->qscale == 8);
1981     {
1982         for (i = 0; i < mb_block_count; i++) {
1983             if (!skip_dct[i]) {
1984                 int overflow;
1985                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
1986                 // FIXME we could decide to change to quantizer instead of
1987                 // clipping
1988                 // JS: I don't think that would be a good idea it could lower
1989                 //     quality instead of improve it. Just INTRADC clipping
1990                 //     deserves changes in quantizer
1991                 if (overflow)
1992                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
1993             } else
1994                 s->block_last_index[i] = -1;
1995         }
1996         if (s->avctx->quantizer_noise_shaping) {
1997             for (i = 0; i < mb_block_count; i++) {
1998                 if (!skip_dct[i]) {
1999                     s->block_last_index[i] =
2000                         dct_quantize_refine(s, s->block[i], weight[i],
2001                                             orig[i], i, s->qscale);
2002                 }
2003             }
2004         }
2005
2006         if (s->luma_elim_threshold && !s->mb_intra)
2007             for (i = 0; i < 4; i++)
2008                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2009         if (s->chroma_elim_threshold && !s->mb_intra)
2010             for (i = 4; i < mb_block_count; i++)
2011                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2012
2013         if (s->flags & CODEC_FLAG_CBP_RD) {
2014             for (i = 0; i < mb_block_count; i++) {
2015                 if (s->block_last_index[i] == -1)
2016                     s->coded_score[i] = INT_MAX / 256;
2017             }
2018         }
2019     }
2020
2021     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2022         s->block_last_index[4] =
2023         s->block_last_index[5] = 0;
2024         s->block[4][0] =
2025         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2026     }
2027
2028     // non c quantize code returns incorrect block_last_index FIXME
2029     if (s->alternate_scan && s->dct_quantize != dct_quantize_c) {
2030         for (i = 0; i < mb_block_count; i++) {
2031             int j;
2032             if (s->block_last_index[i] > 0) {
2033                 for (j = 63; j > 0; j--) {
2034                     if (s->block[i][s->intra_scantable.permutated[j]])
2035                         break;
2036                 }
2037                 s->block_last_index[i] = j;
2038             }
2039         }
2040     }
2041
2042     /* huffman encode */
2043     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2044     case CODEC_ID_MPEG1VIDEO:
2045     case CODEC_ID_MPEG2VIDEO:
2046         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2047             mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2048         break;
2049     case CODEC_ID_MPEG4:
2050         if (CONFIG_MPEG4_ENCODER)
2051             mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2052         break;
2053     case CODEC_ID_MSMPEG4V2:
2054     case CODEC_ID_MSMPEG4V3:
2055     case CODEC_ID_WMV1:
2056         if (CONFIG_MSMPEG4_ENCODER)
2057             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2058         break;
2059     case CODEC_ID_WMV2:
2060         if (CONFIG_WMV2_ENCODER)
2061             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2062         break;
2063     case CODEC_ID_H261:
2064         if (CONFIG_H261_ENCODER)
2065             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2066         break;
2067     case CODEC_ID_H263:
2068     case CODEC_ID_H263P:
2069     case CODEC_ID_FLV1:
2070     case CODEC_ID_RV10:
2071     case CODEC_ID_RV20:
2072         if (CONFIG_H263_ENCODER)
2073             h263_encode_mb(s, s->block, motion_x, motion_y);
2074         break;
2075     case CODEC_ID_MJPEG:
2076         if (CONFIG_MJPEG_ENCODER)
2077             ff_mjpeg_encode_mb(s, s->block);
2078         break;
2079     default:
2080         assert(0);
2081     }
2082 }
2083
2084 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2085 {
2086     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2087     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2088 }
2089
2090 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2091     int i;
2092
2093     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2094
2095     /* mpeg1 */
2096     d->mb_skip_run= s->mb_skip_run;
2097     for(i=0; i<3; i++)
2098         d->last_dc[i] = s->last_dc[i];
2099
2100     /* statistics */
2101     d->mv_bits= s->mv_bits;
2102     d->i_tex_bits= s->i_tex_bits;
2103     d->p_tex_bits= s->p_tex_bits;
2104     d->i_count= s->i_count;
2105     d->f_count= s->f_count;
2106     d->b_count= s->b_count;
2107     d->skip_count= s->skip_count;
2108     d->misc_bits= s->misc_bits;
2109     d->last_bits= 0;
2110
2111     d->mb_skipped= 0;
2112     d->qscale= s->qscale;
2113     d->dquant= s->dquant;
2114
2115     d->esc3_level_length= s->esc3_level_length;
2116 }
2117
2118 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2119     int i;
2120
2121     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2122     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2123
2124     /* mpeg1 */
2125     d->mb_skip_run= s->mb_skip_run;
2126     for(i=0; i<3; i++)
2127         d->last_dc[i] = s->last_dc[i];
2128
2129     /* statistics */
2130     d->mv_bits= s->mv_bits;
2131     d->i_tex_bits= s->i_tex_bits;
2132     d->p_tex_bits= s->p_tex_bits;
2133     d->i_count= s->i_count;
2134     d->f_count= s->f_count;
2135     d->b_count= s->b_count;
2136     d->skip_count= s->skip_count;
2137     d->misc_bits= s->misc_bits;
2138
2139     d->mb_intra= s->mb_intra;
2140     d->mb_skipped= s->mb_skipped;
2141     d->mv_type= s->mv_type;
2142     d->mv_dir= s->mv_dir;
2143     d->pb= s->pb;
2144     if(s->data_partitioning){
2145         d->pb2= s->pb2;
2146         d->tex_pb= s->tex_pb;
2147     }
2148     d->block= s->block;
2149     for(i=0; i<8; i++)
2150         d->block_last_index[i]= s->block_last_index[i];
2151     d->interlaced_dct= s->interlaced_dct;
2152     d->qscale= s->qscale;
2153
2154     d->esc3_level_length= s->esc3_level_length;
2155 }
2156
2157 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2158                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2159                            int *dmin, int *next_block, int motion_x, int motion_y)
2160 {
2161     int score;
2162     uint8_t *dest_backup[3];
2163
2164     copy_context_before_encode(s, backup, type);
2165
2166     s->block= s->blocks[*next_block];
2167     s->pb= pb[*next_block];
2168     if(s->data_partitioning){
2169         s->pb2   = pb2   [*next_block];
2170         s->tex_pb= tex_pb[*next_block];
2171     }
2172
2173     if(*next_block){
2174         memcpy(dest_backup, s->dest, sizeof(s->dest));
2175         s->dest[0] = s->rd_scratchpad;
2176         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2177         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2178         assert(s->linesize >= 32); //FIXME
2179     }
2180
2181     encode_mb(s, motion_x, motion_y);
2182
2183     score= put_bits_count(&s->pb);
2184     if(s->data_partitioning){
2185         score+= put_bits_count(&s->pb2);
2186         score+= put_bits_count(&s->tex_pb);
2187     }
2188
2189     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2190         MPV_decode_mb(s, s->block);
2191
2192         score *= s->lambda2;
2193         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2194     }
2195
2196     if(*next_block){
2197         memcpy(s->dest, dest_backup, sizeof(s->dest));
2198     }
2199
2200     if(score<*dmin){
2201         *dmin= score;
2202         *next_block^=1;
2203
2204         copy_context_after_encode(best, s, type);
2205     }
2206 }
2207
2208 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2209     uint32_t *sq = ff_squareTbl + 256;
2210     int acc=0;
2211     int x,y;
2212
2213     if(w==16 && h==16)
2214         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2215     else if(w==8 && h==8)
2216         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2217
2218     for(y=0; y<h; y++){
2219         for(x=0; x<w; x++){
2220             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2221         }
2222     }
2223
2224     assert(acc>=0);
2225
2226     return acc;
2227 }
2228
2229 static int sse_mb(MpegEncContext *s){
2230     int w= 16;
2231     int h= 16;
2232
2233     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2234     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2235
2236     if(w==16 && h==16)
2237       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2238         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2239                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2240                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2241       }else{
2242         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2243                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2244                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2245       }
2246     else
2247         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2248                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2249                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2250 }
2251
2252 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2253     MpegEncContext *s= *(void**)arg;
2254
2255
2256     s->me.pre_pass=1;
2257     s->me.dia_size= s->avctx->pre_dia_size;
2258     s->first_slice_line=1;
2259     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2260         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2261             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2262         }
2263         s->first_slice_line=0;
2264     }
2265
2266     s->me.pre_pass=0;
2267
2268     return 0;
2269 }
2270
2271 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2272     MpegEncContext *s= *(void**)arg;
2273
2274     ff_check_alignment();
2275
2276     s->me.dia_size= s->avctx->dia_size;
2277     s->first_slice_line=1;
2278     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2279         s->mb_x=0; //for block init below
2280         ff_init_block_index(s);
2281         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2282             s->block_index[0]+=2;
2283             s->block_index[1]+=2;
2284             s->block_index[2]+=2;
2285             s->block_index[3]+=2;
2286
2287             /* compute motion vector & mb_type and store in context */
2288             if(s->pict_type==AV_PICTURE_TYPE_B)
2289                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2290             else
2291                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2292         }
2293         s->first_slice_line=0;
2294     }
2295     return 0;
2296 }
2297
2298 static int mb_var_thread(AVCodecContext *c, void *arg){
2299     MpegEncContext *s= *(void**)arg;
2300     int mb_x, mb_y;
2301
2302     ff_check_alignment();
2303
2304     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2305         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2306             int xx = mb_x * 16;
2307             int yy = mb_y * 16;
2308             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2309             int varc;
2310             int sum = s->dsp.pix_sum(pix, s->linesize);
2311
2312             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2313
2314             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2315             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2316             s->me.mb_var_sum_temp    += varc;
2317         }
2318     }
2319     return 0;
2320 }
2321
2322 static void write_slice_end(MpegEncContext *s){
2323     if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4){
2324         if(s->partitioned_frame){
2325             ff_mpeg4_merge_partitions(s);
2326         }
2327
2328         ff_mpeg4_stuffing(&s->pb);
2329     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2330         ff_mjpeg_encode_stuffing(&s->pb);
2331     }
2332
2333     avpriv_align_put_bits(&s->pb);
2334     flush_put_bits(&s->pb);
2335
2336     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2337         s->misc_bits+= get_bits_diff(s);
2338 }
2339
2340 static int encode_thread(AVCodecContext *c, void *arg){
2341     MpegEncContext *s= *(void**)arg;
2342     int mb_x, mb_y, pdif = 0;
2343     int chr_h= 16>>s->chroma_y_shift;
2344     int i, j;
2345     MpegEncContext best_s, backup_s;
2346     uint8_t bit_buf[2][MAX_MB_BYTES];
2347     uint8_t bit_buf2[2][MAX_MB_BYTES];
2348     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2349     PutBitContext pb[2], pb2[2], tex_pb[2];
2350 //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y);
2351
2352     ff_check_alignment();
2353
2354     for(i=0; i<2; i++){
2355         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2356         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2357         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2358     }
2359
2360     s->last_bits= put_bits_count(&s->pb);
2361     s->mv_bits=0;
2362     s->misc_bits=0;
2363     s->i_tex_bits=0;
2364     s->p_tex_bits=0;
2365     s->i_count=0;
2366     s->f_count=0;
2367     s->b_count=0;
2368     s->skip_count=0;
2369
2370     for(i=0; i<3; i++){
2371         /* init last dc values */
2372         /* note: quant matrix value (8) is implied here */
2373         s->last_dc[i] = 128 << s->intra_dc_precision;
2374
2375         s->current_picture.f.error[i] = 0;
2376     }
2377     s->mb_skip_run = 0;
2378     memset(s->last_mv, 0, sizeof(s->last_mv));
2379
2380     s->last_mv_dir = 0;
2381
2382     switch(s->codec_id){
2383     case CODEC_ID_H263:
2384     case CODEC_ID_H263P:
2385     case CODEC_ID_FLV1:
2386         if (CONFIG_H263_ENCODER)
2387             s->gob_index = ff_h263_get_gob_height(s);
2388         break;
2389     case CODEC_ID_MPEG4:
2390         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2391             ff_mpeg4_init_partitions(s);
2392         break;
2393     }
2394
2395     s->resync_mb_x=0;
2396     s->resync_mb_y=0;
2397     s->first_slice_line = 1;
2398     s->ptr_lastgob = s->pb.buf;
2399     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2400 //    printf("row %d at %X\n", s->mb_y, (int)s);
2401         s->mb_x=0;
2402         s->mb_y= mb_y;
2403
2404         ff_set_qscale(s, s->qscale);
2405         ff_init_block_index(s);
2406
2407         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2408             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2409             int mb_type= s->mb_type[xy];
2410 //            int d;
2411             int dmin= INT_MAX;
2412             int dir;
2413
2414             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2415                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2416                 return -1;
2417             }
2418             if(s->data_partitioning){
2419                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2420                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2421                     av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2422                     return -1;
2423                 }
2424             }
2425
2426             s->mb_x = mb_x;
2427             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2428             ff_update_block_index(s);
2429
2430             if(CONFIG_H261_ENCODER && s->codec_id == CODEC_ID_H261){
2431                 ff_h261_reorder_mb_index(s);
2432                 xy= s->mb_y*s->mb_stride + s->mb_x;
2433                 mb_type= s->mb_type[xy];
2434             }
2435
2436             /* write gob / video packet header  */
2437             if(s->rtp_mode){
2438                 int current_packet_size, is_gob_start;
2439
2440                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2441
2442                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2443
2444                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2445
2446                 switch(s->codec_id){
2447                 case CODEC_ID_H263:
2448                 case CODEC_ID_H263P:
2449                     if(!s->h263_slice_structured)
2450                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2451                     break;
2452                 case CODEC_ID_MPEG2VIDEO:
2453                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2454                 case CODEC_ID_MPEG1VIDEO:
2455                     if(s->mb_skip_run) is_gob_start=0;
2456                     break;
2457                 }
2458
2459                 if(is_gob_start){
2460                     if(s->start_mb_y != mb_y || mb_x!=0){
2461                         write_slice_end(s);
2462
2463                         if(CONFIG_MPEG4_ENCODER && s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame){
2464                             ff_mpeg4_init_partitions(s);
2465                         }
2466                     }
2467
2468                     assert((put_bits_count(&s->pb)&7) == 0);
2469                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2470
2471                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2472                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2473                         int d= 100 / s->avctx->error_rate;
2474                         if(r % d == 0){
2475                             current_packet_size=0;
2476                             s->pb.buf_ptr= s->ptr_lastgob;
2477                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2478                         }
2479                     }
2480
2481                     if (s->avctx->rtp_callback){
2482                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2483                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2484                     }
2485
2486                     switch(s->codec_id){
2487                     case CODEC_ID_MPEG4:
2488                         if (CONFIG_MPEG4_ENCODER) {
2489                             ff_mpeg4_encode_video_packet_header(s);
2490                             ff_mpeg4_clean_buffers(s);
2491                         }
2492                     break;
2493                     case CODEC_ID_MPEG1VIDEO:
2494                     case CODEC_ID_MPEG2VIDEO:
2495                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2496                             ff_mpeg1_encode_slice_header(s);
2497                             ff_mpeg1_clean_buffers(s);
2498                         }
2499                     break;
2500                     case CODEC_ID_H263:
2501                     case CODEC_ID_H263P:
2502                         if (CONFIG_H263_ENCODER)
2503                             h263_encode_gob_header(s, mb_y);
2504                     break;
2505                     }
2506
2507                     if(s->flags&CODEC_FLAG_PASS1){
2508                         int bits= put_bits_count(&s->pb);
2509                         s->misc_bits+= bits - s->last_bits;
2510                         s->last_bits= bits;
2511                     }
2512
2513                     s->ptr_lastgob += current_packet_size;
2514                     s->first_slice_line=1;
2515                     s->resync_mb_x=mb_x;
2516                     s->resync_mb_y=mb_y;
2517                 }
2518             }
2519
2520             if(  (s->resync_mb_x   == s->mb_x)
2521                && s->resync_mb_y+1 == s->mb_y){
2522                 s->first_slice_line=0;
2523             }
2524
2525             s->mb_skipped=0;
2526             s->dquant=0; //only for QP_RD
2527
2528             if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD
2529                 int next_block=0;
2530                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2531
2532                 copy_context_before_encode(&backup_s, s, -1);
2533                 backup_s.pb= s->pb;
2534                 best_s.data_partitioning= s->data_partitioning;
2535                 best_s.partitioned_frame= s->partitioned_frame;
2536                 if(s->data_partitioning){
2537                     backup_s.pb2= s->pb2;
2538                     backup_s.tex_pb= s->tex_pb;
2539                 }
2540
2541                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2542                     s->mv_dir = MV_DIR_FORWARD;
2543                     s->mv_type = MV_TYPE_16X16;
2544                     s->mb_intra= 0;
2545                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2546                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2547                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2548                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2549                 }
2550                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2551                     s->mv_dir = MV_DIR_FORWARD;
2552                     s->mv_type = MV_TYPE_FIELD;
2553                     s->mb_intra= 0;
2554                     for(i=0; i<2; i++){
2555                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2556                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2557                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2558                     }
2559                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2560                                  &dmin, &next_block, 0, 0);
2561                 }
2562                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2563                     s->mv_dir = MV_DIR_FORWARD;
2564                     s->mv_type = MV_TYPE_16X16;
2565                     s->mb_intra= 0;
2566                     s->mv[0][0][0] = 0;
2567                     s->mv[0][0][1] = 0;
2568                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2569                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2570                 }
2571                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2572                     s->mv_dir = MV_DIR_FORWARD;
2573                     s->mv_type = MV_TYPE_8X8;
2574                     s->mb_intra= 0;
2575                     for(i=0; i<4; i++){
2576                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2577                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2578                     }
2579                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2580                                  &dmin, &next_block, 0, 0);
2581                 }
2582                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2583                     s->mv_dir = MV_DIR_FORWARD;
2584                     s->mv_type = MV_TYPE_16X16;
2585                     s->mb_intra= 0;
2586                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2587                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2588                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2589                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2590                 }
2591                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2592                     s->mv_dir = MV_DIR_BACKWARD;
2593                     s->mv_type = MV_TYPE_16X16;
2594                     s->mb_intra= 0;
2595                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2596                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2597                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2598                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2599                 }
2600                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2601                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2602                     s->mv_type = MV_TYPE_16X16;
2603                     s->mb_intra= 0;
2604                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2605                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2606                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2607                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2608                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2609                                  &dmin, &next_block, 0, 0);
2610                 }
2611                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2612                     s->mv_dir = MV_DIR_FORWARD;
2613                     s->mv_type = MV_TYPE_FIELD;
2614                     s->mb_intra= 0;
2615                     for(i=0; i<2; i++){
2616                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2617                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2618                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2619                     }
2620                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2621                                  &dmin, &next_block, 0, 0);
2622                 }
2623                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2624                     s->mv_dir = MV_DIR_BACKWARD;
2625                     s->mv_type = MV_TYPE_FIELD;
2626                     s->mb_intra= 0;
2627                     for(i=0; i<2; i++){
2628                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2629                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2630                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2631                     }
2632                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2633                                  &dmin, &next_block, 0, 0);
2634                 }
2635                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2636                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2637                     s->mv_type = MV_TYPE_FIELD;
2638                     s->mb_intra= 0;
2639                     for(dir=0; dir<2; dir++){
2640                         for(i=0; i<2; i++){
2641                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2642                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2643                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2644                         }
2645                     }
2646                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2647                                  &dmin, &next_block, 0, 0);
2648                 }
2649                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2650                     s->mv_dir = 0;
2651                     s->mv_type = MV_TYPE_16X16;
2652                     s->mb_intra= 1;
2653                     s->mv[0][0][0] = 0;
2654                     s->mv[0][0][1] = 0;
2655                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2656                                  &dmin, &next_block, 0, 0);
2657                     if(s->h263_pred || s->h263_aic){
2658                         if(best_s.mb_intra)
2659                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2660                         else
2661                             ff_clean_intra_table_entries(s); //old mode?
2662                     }
2663                 }
2664
2665                 if((s->flags & CODEC_FLAG_QP_RD) && dmin < INT_MAX){
2666                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2667                         const int last_qp= backup_s.qscale;
2668                         int qpi, qp, dc[6];
2669                         DCTELEM ac[6][16];
2670                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2671                         static const int dquant_tab[4]={-1,1,-2,2};
2672
2673                         assert(backup_s.dquant == 0);
2674
2675                         //FIXME intra
2676                         s->mv_dir= best_s.mv_dir;
2677                         s->mv_type = MV_TYPE_16X16;
2678                         s->mb_intra= best_s.mb_intra;
2679                         s->mv[0][0][0] = best_s.mv[0][0][0];
2680                         s->mv[0][0][1] = best_s.mv[0][0][1];
2681                         s->mv[1][0][0] = best_s.mv[1][0][0];
2682                         s->mv[1][0][1] = best_s.mv[1][0][1];
2683
2684                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2685                         for(; qpi<4; qpi++){
2686                             int dquant= dquant_tab[qpi];
2687                             qp= last_qp + dquant;
2688                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2689                                 continue;
2690                             backup_s.dquant= dquant;
2691                             if(s->mb_intra && s->dc_val[0]){
2692                                 for(i=0; i<6; i++){
2693                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2694                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2695                                 }
2696                             }
2697
2698                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2699                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2700                             if(best_s.qscale != qp){
2701                                 if(s->mb_intra && s->dc_val[0]){
2702                                     for(i=0; i<6; i++){
2703                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2704                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2705                                     }
2706                                 }
2707                             }
2708                         }
2709                     }
2710                 }
2711                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2712                     int mx= s->b_direct_mv_table[xy][0];
2713                     int my= s->b_direct_mv_table[xy][1];
2714
2715                     backup_s.dquant = 0;
2716                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2717                     s->mb_intra= 0;
2718                     ff_mpeg4_set_direct_mv(s, mx, my);
2719                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2720                                  &dmin, &next_block, mx, my);
2721                 }
2722                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2723                     backup_s.dquant = 0;
2724                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2725                     s->mb_intra= 0;
2726                     ff_mpeg4_set_direct_mv(s, 0, 0);
2727                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2728                                  &dmin, &next_block, 0, 0);
2729                 }
2730                 if(!best_s.mb_intra && s->flags2&CODEC_FLAG2_SKIP_RD){
2731                     int coded=0;
2732                     for(i=0; i<6; i++)
2733                         coded |= s->block_last_index[i];
2734                     if(coded){
2735                         int mx,my;
2736                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2737                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2738                             mx=my=0; //FIXME find the one we actually used
2739                             ff_mpeg4_set_direct_mv(s, mx, my);
2740                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2741                             mx= s->mv[1][0][0];
2742                             my= s->mv[1][0][1];
2743                         }else{
2744                             mx= s->mv[0][0][0];
2745                             my= s->mv[0][0][1];
2746                         }
2747
2748                         s->mv_dir= best_s.mv_dir;
2749                         s->mv_type = best_s.mv_type;
2750                         s->mb_intra= 0;
2751 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2752                         s->mv[0][0][1] = best_s.mv[0][0][1];
2753                         s->mv[1][0][0] = best_s.mv[1][0][0];
2754                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2755                         backup_s.dquant= 0;
2756                         s->skipdct=1;
2757                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2758                                         &dmin, &next_block, mx, my);
2759                         s->skipdct=0;
2760                     }
2761                 }
2762
2763                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2764
2765                 copy_context_after_encode(s, &best_s, -1);
2766
2767                 pb_bits_count= put_bits_count(&s->pb);
2768                 flush_put_bits(&s->pb);
2769                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2770                 s->pb= backup_s.pb;
2771
2772                 if(s->data_partitioning){
2773                     pb2_bits_count= put_bits_count(&s->pb2);
2774                     flush_put_bits(&s->pb2);
2775                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2776                     s->pb2= backup_s.pb2;
2777
2778                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2779                     flush_put_bits(&s->tex_pb);
2780                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2781                     s->tex_pb= backup_s.tex_pb;
2782                 }
2783                 s->last_bits= put_bits_count(&s->pb);
2784
2785                 if (CONFIG_H263_ENCODER &&
2786                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2787                     ff_h263_update_motion_val(s);
2788
2789                 if(next_block==0){ //FIXME 16 vs linesize16
2790                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2791                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2792                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2793                 }
2794
2795                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2796                     MPV_decode_mb(s, s->block);
2797             } else {
2798                 int motion_x = 0, motion_y = 0;
2799                 s->mv_type=MV_TYPE_16X16;
2800                 // only one MB-Type possible
2801
2802                 switch(mb_type){
2803                 case CANDIDATE_MB_TYPE_INTRA:
2804                     s->mv_dir = 0;
2805                     s->mb_intra= 1;
2806                     motion_x= s->mv[0][0][0] = 0;
2807                     motion_y= s->mv[0][0][1] = 0;
2808                     break;
2809                 case CANDIDATE_MB_TYPE_INTER:
2810                     s->mv_dir = MV_DIR_FORWARD;
2811                     s->mb_intra= 0;
2812                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2813                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2814                     break;
2815                 case CANDIDATE_MB_TYPE_INTER_I:
2816                     s->mv_dir = MV_DIR_FORWARD;
2817                     s->mv_type = MV_TYPE_FIELD;
2818                     s->mb_intra= 0;
2819                     for(i=0; i<2; i++){
2820                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2821                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2822                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2823                     }
2824                     break;
2825                 case CANDIDATE_MB_TYPE_INTER4V:
2826                     s->mv_dir = MV_DIR_FORWARD;
2827                     s->mv_type = MV_TYPE_8X8;
2828                     s->mb_intra= 0;
2829                     for(i=0; i<4; i++){
2830                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2831                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2832                     }
2833                     break;
2834                 case CANDIDATE_MB_TYPE_DIRECT:
2835                     if (CONFIG_MPEG4_ENCODER) {
2836                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2837                         s->mb_intra= 0;
2838                         motion_x=s->b_direct_mv_table[xy][0];
2839                         motion_y=s->b_direct_mv_table[xy][1];
2840                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2841                     }
2842                     break;
2843                 case CANDIDATE_MB_TYPE_DIRECT0:
2844                     if (CONFIG_MPEG4_ENCODER) {
2845                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2846                         s->mb_intra= 0;
2847                         ff_mpeg4_set_direct_mv(s, 0, 0);
2848                     }
2849                     break;
2850                 case CANDIDATE_MB_TYPE_BIDIR:
2851                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2852                     s->mb_intra= 0;
2853                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2854                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2855                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2856                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2857                     break;
2858                 case CANDIDATE_MB_TYPE_BACKWARD:
2859                     s->mv_dir = MV_DIR_BACKWARD;
2860                     s->mb_intra= 0;
2861                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2862                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2863                     break;
2864                 case CANDIDATE_MB_TYPE_FORWARD:
2865                     s->mv_dir = MV_DIR_FORWARD;
2866                     s->mb_intra= 0;
2867                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2868                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2869 //                    printf(" %d %d ", motion_x, motion_y);
2870                     break;
2871                 case CANDIDATE_MB_TYPE_FORWARD_I:
2872                     s->mv_dir = MV_DIR_FORWARD;
2873                     s->mv_type = MV_TYPE_FIELD;
2874                     s->mb_intra= 0;
2875                     for(i=0; i<2; i++){
2876                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2877                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2878                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2879                     }
2880                     break;
2881                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2882                     s->mv_dir = MV_DIR_BACKWARD;
2883                     s->mv_type = MV_TYPE_FIELD;
2884                     s->mb_intra= 0;
2885                     for(i=0; i<2; i++){
2886                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2887                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2888                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2889                     }
2890                     break;
2891                 case CANDIDATE_MB_TYPE_BIDIR_I:
2892                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2893                     s->mv_type = MV_TYPE_FIELD;
2894                     s->mb_intra= 0;
2895                     for(dir=0; dir<2; dir++){
2896                         for(i=0; i<2; i++){
2897                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2898                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2899                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2900                         }
2901                     }
2902                     break;
2903                 default:
2904                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
2905                 }
2906
2907                 encode_mb(s, motion_x, motion_y);
2908
2909                 // RAL: Update last macroblock type
2910                 s->last_mv_dir = s->mv_dir;
2911
2912                 if (CONFIG_H263_ENCODER &&
2913                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2914                     ff_h263_update_motion_val(s);
2915
2916                 MPV_decode_mb(s, s->block);
2917             }
2918
2919             /* clean the MV table in IPS frames for direct mode in B frames */
2920             if(s->mb_intra /* && I,P,S_TYPE */){
2921                 s->p_mv_table[xy][0]=0;
2922                 s->p_mv_table[xy][1]=0;
2923             }
2924
2925             if(s->flags&CODEC_FLAG_PSNR){
2926                 int w= 16;
2927                 int h= 16;
2928
2929                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2930                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2931
2932                 s->current_picture.f.error[0] += sse(
2933                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
2934                     s->dest[0], w, h, s->linesize);
2935                 s->current_picture.f.error[1] += sse(
2936                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2937                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2938                 s->current_picture.f.error[2] += sse(
2939                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
2940                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
2941             }
2942             if(s->loop_filter){
2943                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
2944                     ff_h263_loop_filter(s);
2945             }
2946 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb));
2947         }
2948     }
2949
2950     //not beautiful here but we must write it before flushing so it has to be here
2951     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
2952         msmpeg4_encode_ext_header(s);
2953
2954     write_slice_end(s);
2955
2956     /* Send the last GOB if RTP */
2957     if (s->avctx->rtp_callback) {
2958         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
2959         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
2960         /* Call the RTP callback to send the last GOB */
2961         emms_c();
2962         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
2963     }
2964
2965     return 0;
2966 }
2967
2968 #define MERGE(field) dst->field += src->field; src->field=0
2969 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
2970     MERGE(me.scene_change_score);
2971     MERGE(me.mc_mb_var_sum_temp);
2972     MERGE(me.mb_var_sum_temp);
2973 }
2974
2975 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
2976     int i;
2977
2978     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
2979     MERGE(dct_count[1]);
2980     MERGE(mv_bits);
2981     MERGE(i_tex_bits);
2982     MERGE(p_tex_bits);
2983     MERGE(i_count);
2984     MERGE(f_count);
2985     MERGE(b_count);
2986     MERGE(skip_count);
2987     MERGE(misc_bits);
2988     MERGE(error_count);
2989     MERGE(padding_bug_score);
2990     MERGE(current_picture.f.error[0]);
2991     MERGE(current_picture.f.error[1]);
2992     MERGE(current_picture.f.error[2]);
2993
2994     if(dst->avctx->noise_reduction){
2995         for(i=0; i<64; i++){
2996             MERGE(dct_error_sum[0][i]);
2997             MERGE(dct_error_sum[1][i]);
2998         }
2999     }
3000
3001     assert(put_bits_count(&src->pb) % 8 ==0);
3002     assert(put_bits_count(&dst->pb) % 8 ==0);
3003     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3004     flush_put_bits(&dst->pb);
3005 }
3006
3007 static int estimate_qp(MpegEncContext *s, int dry_run){
3008     if (s->next_lambda){
3009         s->current_picture_ptr->f.quality =
3010         s->current_picture.f.quality = s->next_lambda;
3011         if(!dry_run) s->next_lambda= 0;
3012     } else if (!s->fixed_qscale) {
3013         s->current_picture_ptr->f.quality =
3014         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3015         if (s->current_picture.f.quality < 0)
3016             return -1;
3017     }
3018
3019     if(s->adaptive_quant){
3020         switch(s->codec_id){
3021         case CODEC_ID_MPEG4:
3022             if (CONFIG_MPEG4_ENCODER)
3023                 ff_clean_mpeg4_qscales(s);
3024             break;
3025         case CODEC_ID_H263:
3026         case CODEC_ID_H263P:
3027         case CODEC_ID_FLV1:
3028             if (CONFIG_H263_ENCODER)
3029                 ff_clean_h263_qscales(s);
3030             break;
3031         default:
3032             ff_init_qscale_tab(s);
3033         }
3034
3035         s->lambda= s->lambda_table[0];
3036         //FIXME broken
3037     }else
3038         s->lambda = s->current_picture.f.quality;
3039 //printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
3040     update_qscale(s);
3041     return 0;
3042 }
3043
3044 /* must be called before writing the header */
3045 static void set_frame_distances(MpegEncContext * s){
3046     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3047     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3048
3049     if(s->pict_type==AV_PICTURE_TYPE_B){
3050         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3051         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3052     }else{
3053         s->pp_time= s->time - s->last_non_b_time;
3054         s->last_non_b_time= s->time;
3055         assert(s->picture_number==0 || s->pp_time > 0);
3056     }
3057 }
3058
3059 static int encode_picture(MpegEncContext *s, int picture_number)
3060 {
3061     int i;
3062     int bits;
3063     int context_count = s->slice_context_count;
3064
3065     s->picture_number = picture_number;
3066
3067     /* Reset the average MB variance */
3068     s->me.mb_var_sum_temp    =
3069     s->me.mc_mb_var_sum_temp = 0;
3070
3071     /* we need to initialize some time vars before we can encode b-frames */
3072     // RAL: Condition added for MPEG1VIDEO
3073     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3074         set_frame_distances(s);
3075     if(CONFIG_MPEG4_ENCODER && s->codec_id == CODEC_ID_MPEG4)
3076         ff_set_mpeg4_time(s);
3077
3078     s->me.scene_change_score=0;
3079
3080 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3081
3082     if(s->pict_type==AV_PICTURE_TYPE_I){
3083         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3084         else                        s->no_rounding=0;
3085     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3086         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
3087             s->no_rounding ^= 1;
3088     }
3089
3090     if(s->flags & CODEC_FLAG_PASS2){
3091         if (estimate_qp(s,1) < 0)
3092             return -1;
3093         ff_get_2pass_fcode(s);
3094     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3095         if(s->pict_type==AV_PICTURE_TYPE_B)
3096             s->lambda= s->last_lambda_for[s->pict_type];
3097         else
3098             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3099         update_qscale(s);
3100     }
3101
3102     s->mb_intra=0; //for the rate distortion & bit compare functions
3103     for(i=1; i<context_count; i++){
3104         ff_update_duplicate_context(s->thread_context[i], s);
3105     }
3106
3107     if(ff_init_me(s)<0)
3108         return -1;
3109
3110     /* Estimate motion for every MB */
3111     if(s->pict_type != AV_PICTURE_TYPE_I){
3112         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3113         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3114         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3115             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3116                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3117             }
3118         }
3119
3120         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3121     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3122         /* I-Frame */
3123         for(i=0; i<s->mb_stride*s->mb_height; i++)
3124             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3125
3126         if(!s->fixed_qscale){
3127             /* finding spatial complexity for I-frame rate control */
3128             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3129         }
3130     }
3131     for(i=1; i<context_count; i++){
3132         merge_context_after_me(s, s->thread_context[i]);
3133     }
3134     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3135     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3136     emms_c();
3137
3138     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3139         s->pict_type= AV_PICTURE_TYPE_I;
3140         for(i=0; i<s->mb_stride*s->mb_height; i++)
3141             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3142 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3143     }
3144
3145     if(!s->umvplus){
3146         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3147             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3148
3149             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3150                 int a,b;
3151                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3152                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3153                 s->f_code= FFMAX3(s->f_code, a, b);
3154             }
3155
3156             ff_fix_long_p_mvs(s);
3157             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3158             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3159                 int j;
3160                 for(i=0; i<2; i++){
3161                     for(j=0; j<2; j++)
3162                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3163                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3164                 }
3165             }
3166         }
3167
3168         if(s->pict_type==AV_PICTURE_TYPE_B){
3169             int a, b;
3170
3171             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3172             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3173             s->f_code = FFMAX(a, b);
3174
3175             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3176             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3177             s->b_code = FFMAX(a, b);
3178
3179             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3180             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3181             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3182             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3183             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3184                 int dir, j;
3185                 for(dir=0; dir<2; dir++){
3186                     for(i=0; i<2; i++){
3187                         for(j=0; j<2; j++){
3188                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3189                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3190                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3191                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3192                         }
3193                     }
3194                 }
3195             }
3196         }
3197     }
3198
3199     if (estimate_qp(s, 0) < 0)
3200         return -1;
3201
3202     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3203         s->qscale= 3; //reduce clipping problems
3204
3205     if (s->out_format == FMT_MJPEG) {
3206         /* for mjpeg, we do include qscale in the matrix */
3207         for(i=1;i<64;i++){
3208             int j= s->dsp.idct_permutation[i];
3209
3210             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3211         }
3212         s->y_dc_scale_table=
3213         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3214         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3215         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3216                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3217         s->qscale= 8;
3218     }
3219
3220     //FIXME var duplication
3221     s->current_picture_ptr->f.key_frame =
3222     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3223     s->current_picture_ptr->f.pict_type =
3224     s->current_picture.f.pict_type = s->pict_type;
3225
3226     if (s->current_picture.f.key_frame)
3227         s->picture_in_gop_number=0;
3228
3229     s->last_bits= put_bits_count(&s->pb);
3230     switch(s->out_format) {
3231     case FMT_MJPEG:
3232         if (CONFIG_MJPEG_ENCODER)
3233             ff_mjpeg_encode_picture_header(s);
3234         break;
3235     case FMT_H261:
3236         if (CONFIG_H261_ENCODER)
3237             ff_h261_encode_picture_header(s, picture_number);
3238         break;
3239     case FMT_H263:
3240         if (CONFIG_WMV2_ENCODER && s->codec_id == CODEC_ID_WMV2)
3241             ff_wmv2_encode_picture_header(s, picture_number);
3242         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3243             msmpeg4_encode_picture_header(s, picture_number);
3244         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3245             mpeg4_encode_picture_header(s, picture_number);
3246         else if (CONFIG_RV10_ENCODER && s->codec_id == CODEC_ID_RV10)
3247             rv10_encode_picture_header(s, picture_number);
3248         else if (CONFIG_RV20_ENCODER && s->codec_id == CODEC_ID_RV20)
3249             rv20_encode_picture_header(s, picture_number);
3250         else if (CONFIG_FLV_ENCODER && s->codec_id == CODEC_ID_FLV1)
3251             ff_flv_encode_picture_header(s, picture_number);
3252         else if (CONFIG_H263_ENCODER)
3253             h263_encode_picture_header(s, picture_number);
3254         break;
3255     case FMT_MPEG1:
3256         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3257             mpeg1_encode_picture_header(s, picture_number);
3258         break;
3259     case FMT_H264:
3260         break;
3261     default:
3262         assert(0);
3263     }
3264     bits= put_bits_count(&s->pb);
3265     s->header_bits= bits - s->last_bits;
3266
3267     for(i=1; i<context_count; i++){
3268         update_duplicate_context_after_me(s->thread_context[i], s);
3269     }
3270     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3271     for(i=1; i<context_count; i++){
3272         merge_context_after_encode(s, s->thread_context[i]);
3273     }
3274     emms_c();
3275     return 0;
3276 }
3277
3278 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3279     const int intra= s->mb_intra;
3280     int i;
3281
3282     s->dct_count[intra]++;
3283
3284     for(i=0; i<64; i++){
3285         int level= block[i];
3286
3287         if(level){
3288             if(level>0){
3289                 s->dct_error_sum[intra][i] += level;
3290                 level -= s->dct_offset[intra][i];
3291                 if(level<0) level=0;
3292             }else{
3293                 s->dct_error_sum[intra][i] -= level;
3294                 level += s->dct_offset[intra][i];
3295                 if(level>0) level=0;
3296             }
3297             block[i]= level;
3298         }
3299     }
3300 }
3301
3302 static int dct_quantize_trellis_c(MpegEncContext *s,
3303                                   DCTELEM *block, int n,
3304                                   int qscale, int *overflow){
3305     const int *qmat;
3306     const uint8_t *scantable= s->intra_scantable.scantable;
3307     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3308     int max=0;
3309     unsigned int threshold1, threshold2;
3310     int bias=0;
3311     int run_tab[65];
3312     int level_tab[65];
3313     int score_tab[65];
3314     int survivor[65];
3315     int survivor_count;
3316     int last_run=0;
3317     int last_level=0;
3318     int last_score= 0;
3319     int last_i;
3320     int coeff[2][64];
3321     int coeff_count[64];
3322     int qmul, qadd, start_i, last_non_zero, i, dc;
3323     const int esc_length= s->ac_esc_length;
3324     uint8_t * length;
3325     uint8_t * last_length;
3326     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3327
3328     s->dsp.fdct (block);
3329
3330     if(s->dct_error_sum)
3331         s->denoise_dct(s, block);
3332     qmul= qscale*16;
3333     qadd= ((qscale-1)|1)*8;
3334
3335     if (s->mb_intra) {
3336         int q;
3337         if (!s->h263_aic) {
3338             if (n < 4)
3339                 q = s->y_dc_scale;
3340             else
3341                 q = s->c_dc_scale;
3342             q = q << 3;
3343         } else{
3344             /* For AIC we skip quant/dequant of INTRADC */
3345             q = 1 << 3;
3346             qadd=0;
3347         }
3348
3349         /* note: block[0] is assumed to be positive */
3350         block[0] = (block[0] + (q >> 1)) / q;
3351         start_i = 1;
3352         last_non_zero = 0;
3353         qmat = s->q_intra_matrix[qscale];
3354         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3355             bias= 1<<(QMAT_SHIFT-1);
3356         length     = s->intra_ac_vlc_length;
3357         last_length= s->intra_ac_vlc_last_length;
3358     } else {
3359         start_i = 0;
3360         last_non_zero = -1;
3361         qmat = s->q_inter_matrix[qscale];
3362         length     = s->inter_ac_vlc_length;
3363         last_length= s->inter_ac_vlc_last_length;
3364     }
3365     last_i= start_i;
3366
3367     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3368     threshold2= (threshold1<<1);
3369
3370     for(i=63; i>=start_i; i--) {
3371         const int j = scantable[i];
3372         int level = block[j] * qmat[j];
3373
3374         if(((unsigned)(level+threshold1))>threshold2){
3375             last_non_zero = i;
3376             break;
3377         }
3378     }
3379
3380     for(i=start_i; i<=last_non_zero; i++) {
3381         const int j = scantable[i];
3382         int level = block[j] * qmat[j];
3383
3384 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3385 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3386         if(((unsigned)(level+threshold1))>threshold2){
3387             if(level>0){
3388                 level= (bias + level)>>QMAT_SHIFT;
3389                 coeff[0][i]= level;
3390                 coeff[1][i]= level-1;
3391 //                coeff[2][k]= level-2;
3392             }else{
3393                 level= (bias - level)>>QMAT_SHIFT;
3394                 coeff[0][i]= -level;
3395                 coeff[1][i]= -level+1;
3396 //                coeff[2][k]= -level+2;
3397             }
3398             coeff_count[i]= FFMIN(level, 2);
3399             assert(coeff_count[i]);
3400             max |=level;
3401         }else{
3402             coeff[0][i]= (level>>31)|1;
3403             coeff_count[i]= 1;
3404         }
3405     }
3406
3407     *overflow= s->max_qcoeff < max; //overflow might have happened
3408
3409     if(last_non_zero < start_i){
3410         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3411         return last_non_zero;
3412     }
3413
3414     score_tab[start_i]= 0;
3415     survivor[0]= start_i;
3416     survivor_count= 1;
3417
3418     for(i=start_i; i<=last_non_zero; i++){
3419         int level_index, j, zero_distortion;
3420         int dct_coeff= FFABS(block[ scantable[i] ]);
3421         int best_score=256*256*256*120;
3422
3423         if (   s->dsp.fdct == fdct_ifast
3424 #ifndef FAAN_POSTSCALE
3425             || s->dsp.fdct == ff_faandct
3426 #endif
3427            )
3428             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3429         zero_distortion= dct_coeff*dct_coeff;
3430
3431         for(level_index=0; level_index < coeff_count[i]; level_index++){
3432             int distortion;
3433             int level= coeff[level_index][i];
3434             const int alevel= FFABS(level);
3435             int unquant_coeff;
3436
3437             assert(level);
3438
3439             if(s->out_format == FMT_H263){
3440                 unquant_coeff= alevel*qmul + qadd;
3441             }else{ //MPEG1
3442                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3443                 if(s->mb_intra){
3444                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3445                         unquant_coeff =   (unquant_coeff - 1) | 1;
3446                 }else{
3447                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3448                         unquant_coeff =   (unquant_coeff - 1) | 1;
3449                 }
3450                 unquant_coeff<<= 3;
3451             }
3452
3453             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3454             level+=64;
3455             if((level&(~127)) == 0){
3456                 for(j=survivor_count-1; j>=0; j--){
3457                     int run= i - survivor[j];
3458                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3459                     score += score_tab[i-run];
3460
3461                     if(score < best_score){
3462                         best_score= score;
3463                         run_tab[i+1]= run;
3464                         level_tab[i+1]= level-64;
3465                     }
3466                 }
3467
3468                 if(s->out_format == FMT_H263){
3469                     for(j=survivor_count-1; j>=0; j--){
3470                         int run= i - survivor[j];
3471                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3472                         score += score_tab[i-run];
3473                         if(score < last_score){
3474                             last_score= score;
3475                             last_run= run;
3476                             last_level= level-64;
3477                             last_i= i+1;
3478                         }
3479                     }
3480                 }
3481             }else{
3482                 distortion += esc_length*lambda;
3483                 for(j=survivor_count-1; j>=0; j--){
3484                     int run= i - survivor[j];
3485                     int score= distortion + score_tab[i-run];
3486
3487                     if(score < best_score){
3488                         best_score= score;
3489                         run_tab[i+1]= run;
3490                         level_tab[i+1]= level-64;
3491                     }
3492                 }
3493
3494                 if(s->out_format == FMT_H263){
3495                   for(j=survivor_count-1; j>=0; j--){
3496                         int run= i - survivor[j];
3497                         int score= distortion + score_tab[i-run];
3498                         if(score < last_score){
3499                             last_score= score;
3500                             last_run= run;
3501                             last_level= level-64;
3502                             last_i= i+1;
3503                         }
3504                     }
3505                 }
3506             }
3507         }
3508
3509         score_tab[i+1]= best_score;
3510
3511         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3512         if(last_non_zero <= 27){
3513             for(; survivor_count; survivor_count--){
3514                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3515                     break;
3516             }
3517         }else{
3518             for(; survivor_count; survivor_count--){
3519                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3520                     break;
3521             }
3522         }
3523
3524         survivor[ survivor_count++ ]= i+1;
3525     }
3526
3527     if(s->out_format != FMT_H263){
3528         last_score= 256*256*256*120;
3529         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3530             int score= score_tab[i];
3531             if(i) score += lambda*2; //FIXME exacter?
3532
3533             if(score < last_score){
3534                 last_score= score;
3535                 last_i= i;
3536                 last_level= level_tab[i];
3537                 last_run= run_tab[i];
3538             }
3539         }
3540     }
3541
3542     s->coded_score[n] = last_score;
3543
3544     dc= FFABS(block[0]);
3545     last_non_zero= last_i - 1;
3546     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3547
3548     if(last_non_zero < start_i)
3549         return last_non_zero;
3550
3551     if(last_non_zero == 0 && start_i == 0){
3552         int best_level= 0;
3553         int best_score= dc * dc;
3554
3555         for(i=0; i<coeff_count[0]; i++){
3556             int level= coeff[i][0];
3557             int alevel= FFABS(level);
3558             int unquant_coeff, score, distortion;
3559
3560             if(s->out_format == FMT_H263){
3561                     unquant_coeff= (alevel*qmul + qadd)>>3;
3562             }else{ //MPEG1
3563                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3564                     unquant_coeff =   (unquant_coeff - 1) | 1;
3565             }
3566             unquant_coeff = (unquant_coeff + 4) >> 3;
3567             unquant_coeff<<= 3 + 3;
3568
3569             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3570             level+=64;
3571             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3572             else                    score= distortion + esc_length*lambda;
3573
3574             if(score < best_score){
3575                 best_score= score;
3576                 best_level= level - 64;
3577             }
3578         }
3579         block[0]= best_level;
3580         s->coded_score[n] = best_score - dc*dc;
3581         if(best_level == 0) return -1;
3582         else                return last_non_zero;
3583     }
3584
3585     i= last_i;
3586     assert(last_level);
3587
3588     block[ perm_scantable[last_non_zero] ]= last_level;
3589     i -= last_run + 1;
3590
3591     for(; i>start_i; i -= run_tab[i] + 1){
3592         block[ perm_scantable[i-1] ]= level_tab[i];
3593     }
3594
3595     return last_non_zero;
3596 }
3597
3598 //#define REFINE_STATS 1
3599 static int16_t basis[64][64];
3600
3601 static void build_basis(uint8_t *perm){
3602     int i, j, x, y;
3603     emms_c();
3604     for(i=0; i<8; i++){
3605         for(j=0; j<8; j++){
3606             for(y=0; y<8; y++){
3607                 for(x=0; x<8; x++){
3608                     double s= 0.25*(1<<BASIS_SHIFT);
3609                     int index= 8*i + j;
3610                     int perm_index= perm[index];
3611                     if(i==0) s*= sqrt(0.5);
3612                     if(j==0) s*= sqrt(0.5);
3613                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3614                 }
3615             }
3616         }
3617     }
3618 }
3619
3620 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3621                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3622                         int n, int qscale){
3623     int16_t rem[64];
3624     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3625     const uint8_t *scantable= s->intra_scantable.scantable;
3626     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3627 //    unsigned int threshold1, threshold2;
3628 //    int bias=0;
3629     int run_tab[65];
3630     int prev_run=0;
3631     int prev_level=0;
3632     int qmul, qadd, start_i, last_non_zero, i, dc;
3633     uint8_t * length;
3634     uint8_t * last_length;
3635     int lambda;
3636     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3637 #ifdef REFINE_STATS
3638 static int count=0;
3639 static int after_last=0;
3640 static int to_zero=0;
3641 static int from_zero=0;
3642 static int raise=0;
3643 static int lower=0;
3644 static int messed_sign=0;
3645 #endif
3646
3647     if(basis[0][0] == 0)
3648         build_basis(s->dsp.idct_permutation);
3649
3650     qmul= qscale*2;
3651     qadd= (qscale-1)|1;
3652     if (s->mb_intra) {
3653         if (!s->h263_aic) {
3654             if (n < 4)
3655                 q = s->y_dc_scale;
3656             else
3657                 q = s->c_dc_scale;
3658         } else{
3659             /* For AIC we skip quant/dequant of INTRADC */
3660             q = 1;
3661             qadd=0;
3662         }
3663         q <<= RECON_SHIFT-3;
3664         /* note: block[0] is assumed to be positive */
3665         dc= block[0]*q;
3666 //        block[0] = (block[0] + (q >> 1)) / q;
3667         start_i = 1;
3668 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3669 //            bias= 1<<(QMAT_SHIFT-1);
3670         length     = s->intra_ac_vlc_length;
3671         last_length= s->intra_ac_vlc_last_length;
3672     } else {
3673         dc= 0;
3674         start_i = 0;
3675         length     = s->inter_ac_vlc_length;
3676         last_length= s->inter_ac_vlc_last_length;
3677     }
3678     last_non_zero = s->block_last_index[n];
3679
3680 #ifdef REFINE_STATS
3681 {START_TIMER
3682 #endif
3683     dc += (1<<(RECON_SHIFT-1));
3684     for(i=0; i<64; i++){
3685         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3686     }
3687 #ifdef REFINE_STATS
3688 STOP_TIMER("memset rem[]")}
3689 #endif
3690     sum=0;
3691     for(i=0; i<64; i++){
3692         int one= 36;
3693         int qns=4;
3694         int w;
3695
3696         w= FFABS(weight[i]) + qns*one;
3697         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3698
3699         weight[i] = w;
3700 //        w=weight[i] = (63*qns + (w/2)) / w;
3701
3702         assert(w>0);
3703         assert(w<(1<<6));
3704         sum += w*w;
3705     }
3706     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3707 #ifdef REFINE_STATS
3708 {START_TIMER
3709 #endif
3710     run=0;
3711     rle_index=0;
3712     for(i=start_i; i<=last_non_zero; i++){
3713         int j= perm_scantable[i];
3714         const int level= block[j];
3715         int coeff;
3716
3717         if(level){
3718             if(level<0) coeff= qmul*level - qadd;
3719             else        coeff= qmul*level + qadd;
3720             run_tab[rle_index++]=run;
3721             run=0;
3722
3723             s->dsp.add_8x8basis(rem, basis[j], coeff);
3724         }else{
3725             run++;
3726         }
3727     }
3728 #ifdef REFINE_STATS
3729 if(last_non_zero>0){
3730 STOP_TIMER("init rem[]")
3731 }
3732 }
3733
3734 {START_TIMER
3735 #endif
3736     for(;;){
3737         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3738         int best_coeff=0;
3739         int best_change=0;
3740         int run2, best_unquant_change=0, analyze_gradient;
3741 #ifdef REFINE_STATS
3742 {START_TIMER
3743 #endif
3744         analyze_gradient = last_non_zero > 2 || s->avctx->quantizer_noise_shaping >= 3;
3745
3746         if(analyze_gradient){
3747 #ifdef REFINE_STATS
3748 {START_TIMER
3749 #endif
3750             for(i=0; i<64; i++){
3751                 int w= weight[i];
3752
3753                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3754             }
3755 #ifdef REFINE_STATS
3756 STOP_TIMER("rem*w*w")}
3757 {START_TIMER
3758 #endif
3759             s->dsp.fdct(d1);
3760 #ifdef REFINE_STATS
3761 STOP_TIMER("dct")}
3762 #endif
3763         }
3764
3765         if(start_i){
3766             const int level= block[0];
3767             int change, old_coeff;
3768
3769             assert(s->mb_intra);
3770
3771             old_coeff= q*level;
3772
3773             for(change=-1; change<=1; change+=2){
3774                 int new_level= level + change;
3775                 int score, new_coeff;
3776
3777                 new_coeff= q*new_level;
3778                 if(new_coeff >= 2048 || new_coeff < 0)
3779                     continue;
3780
3781                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3782                 if(score<best_score){
3783                     best_score= score;
3784                     best_coeff= 0;
3785                     best_change= change;
3786                     best_unquant_change= new_coeff - old_coeff;
3787                 }
3788             }
3789         }
3790
3791         run=0;
3792         rle_index=0;
3793         run2= run_tab[rle_index++];
3794         prev_level=0;
3795         prev_run=0;
3796
3797         for(i=start_i; i<64; i++){
3798             int j= perm_scantable[i];
3799             const int level= block[j];
3800             int change, old_coeff;
3801
3802             if(s->avctx->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3803                 break;
3804
3805             if(level){
3806                 if(level<0) old_coeff= qmul*level - qadd;
3807                 else        old_coeff= qmul*level + qadd;
3808                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3809             }else{
3810                 old_coeff=0;
3811                 run2--;
3812                 assert(run2>=0 || i >= last_non_zero );
3813             }
3814
3815             for(change=-1; change<=1; change+=2){
3816                 int new_level= level + change;
3817                 int score, new_coeff, unquant_change;
3818
3819                 score=0;
3820                 if(s->avctx->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3821                    continue;
3822
3823                 if(new_level){
3824                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3825                     else            new_coeff= qmul*new_level + qadd;
3826                     if(new_coeff >= 2048 || new_coeff <= -2048)
3827                         continue;
3828                     //FIXME check for overflow
3829
3830                     if(level){
3831                         if(level < 63 && level > -63){
3832                             if(i < last_non_zero)
3833                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3834                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3835                             else
3836                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3837                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3838                         }
3839                     }else{
3840                         assert(FFABS(new_level)==1);
3841
3842                         if(analyze_gradient){
3843                             int g= d1[ scantable[i] ];
3844                             if(g && (g^new_level) >= 0)
3845                                 continue;
3846                         }
3847
3848                         if(i < last_non_zero){
3849                             int next_i= i + run2 + 1;
3850                             int next_level= block[ perm_scantable[next_i] ] + 64;
3851
3852                             if(next_level&(~127))
3853                                 next_level= 0;
3854
3855                             if(next_i < last_non_zero)
3856                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3857                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3858                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3859                             else
3860                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3861                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3862                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3863                         }else{
3864                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3865                             if(prev_level){
3866                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3867                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3868                             }
3869                         }
3870                     }
3871                 }else{
3872                     new_coeff=0;
3873                     assert(FFABS(level)==1);
3874
3875                     if(i < last_non_zero){
3876                         int next_i= i + run2 + 1;
3877                         int next_level= block[ perm_scantable[next_i] ] + 64;
3878
3879                         if(next_level&(~127))
3880                             next_level= 0;
3881
3882                         if(next_i < last_non_zero)
3883                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3884                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
3885                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3886                         else
3887                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
3888                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3889                                      - length[UNI_AC_ENC_INDEX(run, 65)];
3890                     }else{
3891                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
3892                         if(prev_level){
3893                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3894                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3895                         }
3896                     }
3897                 }
3898
3899                 score *= lambda;
3900
3901                 unquant_change= new_coeff - old_coeff;
3902                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
3903
3904                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
3905                 if(score<best_score){
3906                     best_score= score;
3907                     best_coeff= i;
3908                     best_change= change;
3909                     best_unquant_change= unquant_change;
3910                 }
3911             }
3912             if(level){
3913                 prev_level= level + 64;
3914                 if(prev_level&(~127))
3915                     prev_level= 0;
3916                 prev_run= run;
3917                 run=0;
3918             }else{
3919                 run++;
3920             }
3921         }
3922 #ifdef REFINE_STATS
3923 STOP_TIMER("iterative step")}
3924 #endif
3925
3926         if(best_change){
3927             int j= perm_scantable[ best_coeff ];
3928
3929             block[j] += best_change;
3930
3931             if(best_coeff > last_non_zero){
3932                 last_non_zero= best_coeff;
3933                 assert(block[j]);
3934 #ifdef REFINE_STATS
3935 after_last++;
3936 #endif
3937             }else{
3938 #ifdef REFINE_STATS
3939 if(block[j]){
3940     if(block[j] - best_change){
3941         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
3942             raise++;
3943         }else{
3944             lower++;
3945         }
3946     }else{
3947         from_zero++;
3948     }
3949 }else{
3950     to_zero++;
3951 }
3952 #endif
3953                 for(; last_non_zero>=start_i; last_non_zero--){
3954                     if(block[perm_scantable[last_non_zero]])
3955                         break;
3956                 }
3957             }
3958 #ifdef REFINE_STATS
3959 count++;
3960 if(256*256*256*64 % count == 0){
3961     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
3962 }
3963 #endif
3964             run=0;
3965             rle_index=0;
3966             for(i=start_i; i<=last_non_zero; i++){
3967                 int j= perm_scantable[i];
3968                 const int level= block[j];
3969
3970                  if(level){
3971                      run_tab[rle_index++]=run;
3972                      run=0;
3973                  }else{
3974                      run++;
3975                  }
3976             }
3977
3978             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
3979         }else{
3980             break;
3981         }
3982     }
3983 #ifdef REFINE_STATS
3984 if(last_non_zero>0){
3985 STOP_TIMER("iterative search")
3986 }
3987 }
3988 #endif
3989
3990     return last_non_zero;
3991 }
3992
3993 int dct_quantize_c(MpegEncContext *s,
3994                         DCTELEM *block, int n,
3995                         int qscale, int *overflow)
3996 {
3997     int i, j, level, last_non_zero, q, start_i;
3998     const int *qmat;
3999     const uint8_t *scantable= s->intra_scantable.scantable;
4000     int bias;
4001     int max=0;
4002     unsigned int threshold1, threshold2;
4003
4004     s->dsp.fdct (block);
4005
4006     if(s->dct_error_sum)
4007         s->denoise_dct(s, block);
4008
4009     if (s->mb_intra) {
4010         if (!s->h263_aic) {
4011             if (n < 4)
4012                 q = s->y_dc_scale;
4013             else
4014                 q = s->c_dc_scale;
4015             q = q << 3;
4016         } else
4017             /* For AIC we skip quant/dequant of INTRADC */
4018             q = 1 << 3;
4019
4020         /* note: block[0] is assumed to be positive */
4021         block[0] = (block[0] + (q >> 1)) / q;
4022         start_i = 1;
4023         last_non_zero = 0;
4024         qmat = s->q_intra_matrix[qscale];
4025         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4026     } else {
4027         start_i = 0;
4028         last_non_zero = -1;
4029         qmat = s->q_inter_matrix[qscale];
4030         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4031     }
4032     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4033     threshold2= (threshold1<<1);
4034     for(i=63;i>=start_i;i--) {
4035         j = scantable[i];
4036         level = block[j] * qmat[j];
4037
4038         if(((unsigned)(level+threshold1))>threshold2){
4039             last_non_zero = i;
4040             break;
4041         }else{
4042             block[j]=0;
4043         }
4044     }
4045     for(i=start_i; i<=last_non_zero; i++) {
4046         j = scantable[i];
4047         level = block[j] * qmat[j];
4048
4049 //        if(   bias+level >= (1<<QMAT_SHIFT)
4050 //           || bias-level >= (1<<QMAT_SHIFT)){
4051         if(((unsigned)(level+threshold1))>threshold2){
4052             if(level>0){
4053                 level= (bias + level)>>QMAT_SHIFT;
4054                 block[j]= level;
4055             }else{
4056                 level= (bias - level)>>QMAT_SHIFT;
4057                 block[j]= -level;
4058             }
4059             max |=level;
4060         }else{
4061             block[j]=0;
4062         }
4063     }
4064     *overflow= s->max_qcoeff < max; //overflow might have happened
4065
4066     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4067     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4068         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4069
4070     return last_non_zero;
4071 }
4072
4073 #define OFFSET(x) offsetof(MpegEncContext, x)
4074 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4075 static const AVOption h263_options[] = {
4076     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4077     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4078     { NULL },
4079 };
4080
4081 static const AVClass h263_class = {
4082     .class_name = "H.263 encoder",
4083     .item_name  = av_default_item_name,
4084     .option     = h263_options,
4085     .version    = LIBAVUTIL_VERSION_INT,
4086 };
4087
4088 AVCodec ff_h263_encoder = {
4089     .name           = "h263",
4090     .type           = AVMEDIA_TYPE_VIDEO,
4091     .id             = CODEC_ID_H263,
4092     .priv_data_size = sizeof(MpegEncContext),
4093     .init           = MPV_encode_init,
4094     .encode         = MPV_encode_picture,
4095     .close          = MPV_encode_end,
4096     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4097     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4098     .priv_class     = &h263_class,
4099 };
4100
4101 static const AVOption h263p_options[] = {
4102     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4103     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4104     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE },
4105     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { 0 }, 0, 1, VE},
4106     { NULL },
4107 };
4108 static const AVClass h263p_class = {
4109     .class_name = "H.263p encoder",
4110     .item_name  = av_default_item_name,
4111     .option     = h263p_options,
4112     .version    = LIBAVUTIL_VERSION_INT,
4113 };
4114
4115 AVCodec ff_h263p_encoder = {
4116     .name           = "h263p",
4117     .type           = AVMEDIA_TYPE_VIDEO,
4118     .id             = CODEC_ID_H263P,
4119     .priv_data_size = sizeof(MpegEncContext),
4120     .init           = MPV_encode_init,
4121     .encode         = MPV_encode_picture,
4122     .close          = MPV_encode_end,
4123     .capabilities = CODEC_CAP_SLICE_THREADS,
4124     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4125     .long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4126     .priv_class     = &h263p_class,
4127 };
4128
4129 AVCodec ff_msmpeg4v2_encoder = {
4130     .name           = "msmpeg4v2",
4131     .type           = AVMEDIA_TYPE_VIDEO,
4132     .id             = CODEC_ID_MSMPEG4V2,
4133     .priv_data_size = sizeof(MpegEncContext),
4134     .init           = MPV_encode_init,
4135     .encode         = MPV_encode_picture,
4136     .close          = MPV_encode_end,
4137     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4138     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4139 };
4140
4141 AVCodec ff_msmpeg4v3_encoder = {
4142     .name           = "msmpeg4",
4143     .type           = AVMEDIA_TYPE_VIDEO,
4144     .id             = CODEC_ID_MSMPEG4V3,
4145     .priv_data_size = sizeof(MpegEncContext),
4146     .init           = MPV_encode_init,
4147     .encode         = MPV_encode_picture,
4148     .close          = MPV_encode_end,
4149     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4150     .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4151 };
4152
4153 AVCodec ff_wmv1_encoder = {
4154     .name           = "wmv1",
4155     .type           = AVMEDIA_TYPE_VIDEO,
4156     .id             = CODEC_ID_WMV1,
4157     .priv_data_size = sizeof(MpegEncContext),
4158     .init           = MPV_encode_init,
4159     .encode         = MPV_encode_picture,
4160     .close          = MPV_encode_end,
4161     .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
4162     .long_name= NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4163 };