]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit 'e87f5e4e5f2e2e36b0b7826d708cda7049877af0'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include <stdint.h>
31
32 #include "libavutil/internal.h"
33 #include "libavutil/intmath.h"
34 #include "libavutil/mathematics.h"
35 #include "libavutil/pixdesc.h"
36 #include "libavutil/opt.h"
37 #include "libavutil/timer.h"
38 #include "avcodec.h"
39 #include "dct.h"
40 #include "idctdsp.h"
41 #include "mpeg12.h"
42 #include "mpegvideo.h"
43 #include "h261.h"
44 #include "h263.h"
45 #include "mjpegenc_common.h"
46 #include "mathops.h"
47 #include "mpegutils.h"
48 #include "mjpegenc.h"
49 #include "msmpeg4.h"
50 #include "pixblockdsp.h"
51 #include "qpeldsp.h"
52 #include "faandct.h"
53 #include "thread.h"
54 #include "aandcttab.h"
55 #include "flv.h"
56 #include "mpeg4video.h"
57 #include "internal.h"
58 #include "bytestream.h"
59 #include <limits.h>
60 #include "sp5x.h"
61
62 #define QUANT_BIAS_SHIFT 8
63
64 #define QMAT_SHIFT_MMX 16
65 #define QMAT_SHIFT 21
66
67 static int encode_picture(MpegEncContext *s, int picture_number);
68 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
69 static int sse_mb(MpegEncContext *s);
70 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
71 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
72
73 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
74 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
75
76 const AVOption ff_mpv_generic_options[] = {
77     FF_MPV_COMMON_OPTS
78     { NULL },
79 };
80
81 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
82                        uint16_t (*qmat16)[2][64],
83                        const uint16_t *quant_matrix,
84                        int bias, int qmin, int qmax, int intra)
85 {
86     FDCTDSPContext *fdsp = &s->fdsp;
87     int qscale;
88     int shift = 0;
89
90     for (qscale = qmin; qscale <= qmax; qscale++) {
91         int i;
92         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
93             fdsp->fdct == ff_jpeg_fdct_islow_10 ||
94             fdsp->fdct == ff_faandct) {
95             for (i = 0; i < 64; i++) {
96                 const int j = s->idsp.idct_permutation[i];
97                 /* 16 <= qscale * quant_matrix[i] <= 7905
98                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
99                  *             19952 <=              x  <= 249205026
100                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
101                  *           3444240 >= (1 << 36) / (x) >= 275 */
102
103                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
104                                         (qscale * quant_matrix[j]));
105             }
106         } else if (fdsp->fdct == ff_fdct_ifast) {
107             for (i = 0; i < 64; i++) {
108                 const int j = s->idsp.idct_permutation[i];
109                 /* 16 <= qscale * quant_matrix[i] <= 7905
110                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
111                  *             19952 <=              x  <= 249205026
112                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
113                  *           3444240 >= (1 << 36) / (x) >= 275 */
114
115                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
116                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
117             }
118         } else {
119             for (i = 0; i < 64; i++) {
120                 const int j = s->idsp.idct_permutation[i];
121                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
122                  * Assume x = qscale * quant_matrix[i]
123                  * So             16 <=              x  <= 7905
124                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
125                  * so          32768 >= (1 << 19) / (x) >= 67 */
126                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
127                                         (qscale * quant_matrix[j]));
128                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
129                 //                    (qscale * quant_matrix[i]);
130                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
131                                        (qscale * quant_matrix[j]);
132
133                 if (qmat16[qscale][0][i] == 0 ||
134                     qmat16[qscale][0][i] == 128 * 256)
135                     qmat16[qscale][0][i] = 128 * 256 - 1;
136                 qmat16[qscale][1][i] =
137                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
138                                 qmat16[qscale][0][i]);
139             }
140         }
141
142         for (i = intra; i < 64; i++) {
143             int64_t max = 8191;
144             if (fdsp->fdct == ff_fdct_ifast) {
145                 max = (8191LL * ff_aanscales[i]) >> 14;
146             }
147             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
148                 shift++;
149             }
150         }
151     }
152     if (shift) {
153         av_log(NULL, AV_LOG_INFO,
154                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
155                QMAT_SHIFT - shift);
156     }
157 }
158
159 static inline void update_qscale(MpegEncContext *s)
160 {
161     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
162                 (FF_LAMBDA_SHIFT + 7);
163     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
164
165     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
166                  FF_LAMBDA_SHIFT;
167 }
168
169 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
170 {
171     int i;
172
173     if (matrix) {
174         put_bits(pb, 1, 1);
175         for (i = 0; i < 64; i++) {
176             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
177         }
178     } else
179         put_bits(pb, 1, 0);
180 }
181
182 /**
183  * init s->current_picture.qscale_table from s->lambda_table
184  */
185 void ff_init_qscale_tab(MpegEncContext *s)
186 {
187     int8_t * const qscale_table = s->current_picture.qscale_table;
188     int i;
189
190     for (i = 0; i < s->mb_num; i++) {
191         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
192         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
193         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
194                                                   s->avctx->qmax);
195     }
196 }
197
198 static void update_duplicate_context_after_me(MpegEncContext *dst,
199                                               MpegEncContext *src)
200 {
201 #define COPY(a) dst->a= src->a
202     COPY(pict_type);
203     COPY(current_picture);
204     COPY(f_code);
205     COPY(b_code);
206     COPY(qscale);
207     COPY(lambda);
208     COPY(lambda2);
209     COPY(picture_in_gop_number);
210     COPY(gop_picture_number);
211     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
212     COPY(progressive_frame);    // FIXME don't set in encode_header
213     COPY(partitioned_frame);    // FIXME don't set in encode_header
214 #undef COPY
215 }
216
217 /**
218  * Set the given MpegEncContext to defaults for encoding.
219  * the changed fields will not depend upon the prior state of the MpegEncContext.
220  */
221 static void mpv_encode_defaults(MpegEncContext *s)
222 {
223     int i;
224     ff_mpv_common_defaults(s);
225
226     for (i = -16; i < 16; i++) {
227         default_fcode_tab[i + MAX_MV] = 1;
228     }
229     s->me.mv_penalty = default_mv_penalty;
230     s->fcode_tab     = default_fcode_tab;
231
232     s->input_picture_number  = 0;
233     s->picture_in_gop_number = 0;
234 }
235
236 av_cold int ff_dct_encode_init(MpegEncContext *s) {
237     if (ARCH_X86)
238         ff_dct_encode_init_x86(s);
239
240     if (CONFIG_H263_ENCODER)
241         ff_h263dsp_init(&s->h263dsp);
242     if (!s->dct_quantize)
243         s->dct_quantize = ff_dct_quantize_c;
244     if (!s->denoise_dct)
245         s->denoise_dct  = denoise_dct_c;
246     s->fast_dct_quantize = s->dct_quantize;
247     if (s->avctx->trellis)
248         s->dct_quantize  = dct_quantize_trellis_c;
249
250     return 0;
251 }
252
253 /* init video encoder */
254 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
255 {
256     MpegEncContext *s = avctx->priv_data;
257     int i, ret, format_supported;
258
259     mpv_encode_defaults(s);
260
261     switch (avctx->codec_id) {
262     case AV_CODEC_ID_MPEG2VIDEO:
263         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
264             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
265             av_log(avctx, AV_LOG_ERROR,
266                    "only YUV420 and YUV422 are supported\n");
267             return -1;
268         }
269         break;
270     case AV_CODEC_ID_MJPEG:
271     case AV_CODEC_ID_AMV:
272         format_supported = 0;
273         /* JPEG color space */
274         if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
275             avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
276             avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
277             (avctx->color_range == AVCOL_RANGE_JPEG &&
278              (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
279               avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
280               avctx->pix_fmt == AV_PIX_FMT_YUV444P)))
281             format_supported = 1;
282         /* MPEG color space */
283         else if (avctx->strict_std_compliance <= FF_COMPLIANCE_UNOFFICIAL &&
284                  (avctx->pix_fmt == AV_PIX_FMT_YUV420P ||
285                   avctx->pix_fmt == AV_PIX_FMT_YUV422P ||
286                   avctx->pix_fmt == AV_PIX_FMT_YUV444P))
287             format_supported = 1;
288
289         if (!format_supported) {
290             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
291             return -1;
292         }
293         break;
294     default:
295         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
296             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
297             return -1;
298         }
299     }
300
301     switch (avctx->pix_fmt) {
302     case AV_PIX_FMT_YUVJ444P:
303     case AV_PIX_FMT_YUV444P:
304         s->chroma_format = CHROMA_444;
305         break;
306     case AV_PIX_FMT_YUVJ422P:
307     case AV_PIX_FMT_YUV422P:
308         s->chroma_format = CHROMA_422;
309         break;
310     case AV_PIX_FMT_YUVJ420P:
311     case AV_PIX_FMT_YUV420P:
312     default:
313         s->chroma_format = CHROMA_420;
314         break;
315     }
316
317     s->bit_rate = avctx->bit_rate;
318     s->width    = avctx->width;
319     s->height   = avctx->height;
320     if (avctx->gop_size > 600 &&
321         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
322         av_log(avctx, AV_LOG_WARNING,
323                "keyframe interval too large!, reducing it from %d to %d\n",
324                avctx->gop_size, 600);
325         avctx->gop_size = 600;
326     }
327     s->gop_size     = avctx->gop_size;
328     s->avctx        = avctx;
329     s->flags        = avctx->flags;
330     s->flags2       = avctx->flags2;
331     if (avctx->max_b_frames > MAX_B_FRAMES) {
332         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
333                "is %d.\n", MAX_B_FRAMES);
334         avctx->max_b_frames = MAX_B_FRAMES;
335     }
336     s->max_b_frames = avctx->max_b_frames;
337     s->codec_id     = avctx->codec->id;
338     s->strict_std_compliance = avctx->strict_std_compliance;
339     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
340     s->mpeg_quant         = avctx->mpeg_quant;
341     s->rtp_mode           = !!avctx->rtp_payload_size;
342     s->intra_dc_precision = avctx->intra_dc_precision;
343
344     // workaround some differences between how applications specify dc precission
345     if (s->intra_dc_precision < 0) {
346         s->intra_dc_precision += 8;
347     } else if (s->intra_dc_precision >= 8)
348         s->intra_dc_precision -= 8;
349
350     if (s->intra_dc_precision < 0) {
351         av_log(avctx, AV_LOG_ERROR,
352                 "intra dc precision must be positive, note some applications use"
353                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
354         return AVERROR(EINVAL);
355     }
356
357     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
358         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
359         return AVERROR(EINVAL);
360     }
361     s->user_specified_pts = AV_NOPTS_VALUE;
362
363     if (s->gop_size <= 1) {
364         s->intra_only = 1;
365         s->gop_size   = 12;
366     } else {
367         s->intra_only = 0;
368     }
369
370     s->me_method = avctx->me_method;
371
372     /* Fixed QSCALE */
373     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
374
375     s->adaptive_quant = (s->avctx->lumi_masking ||
376                          s->avctx->dark_masking ||
377                          s->avctx->temporal_cplx_masking ||
378                          s->avctx->spatial_cplx_masking  ||
379                          s->avctx->p_masking      ||
380                          s->avctx->border_masking ||
381                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
382                         !s->fixed_qscale;
383
384     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
385
386     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
387         switch(avctx->codec_id) {
388         case AV_CODEC_ID_MPEG1VIDEO:
389         case AV_CODEC_ID_MPEG2VIDEO:
390             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
391             break;
392         case AV_CODEC_ID_MPEG4:
393         case AV_CODEC_ID_MSMPEG4V1:
394         case AV_CODEC_ID_MSMPEG4V2:
395         case AV_CODEC_ID_MSMPEG4V3:
396             if       (avctx->rc_max_rate >= 15000000) {
397                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
398             } else if(avctx->rc_max_rate >=  2000000) {
399                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
400             } else if(avctx->rc_max_rate >=   384000) {
401                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
402             } else
403                 avctx->rc_buffer_size = 40;
404             avctx->rc_buffer_size *= 16384;
405             break;
406         }
407         if (avctx->rc_buffer_size) {
408             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
409         }
410     }
411
412     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
413         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
414         return -1;
415     }
416
417     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
418         av_log(avctx, AV_LOG_INFO,
419                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
420     }
421
422     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
423         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
424         return -1;
425     }
426
427     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
428         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
429         return -1;
430     }
431
432     if (avctx->rc_max_rate &&
433         avctx->rc_max_rate == avctx->bit_rate &&
434         avctx->rc_max_rate != avctx->rc_min_rate) {
435         av_log(avctx, AV_LOG_INFO,
436                "impossible bitrate constraints, this will fail\n");
437     }
438
439     if (avctx->rc_buffer_size &&
440         avctx->bit_rate * (int64_t)avctx->time_base.num >
441             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
442         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
443         return -1;
444     }
445
446     if (!s->fixed_qscale &&
447         avctx->bit_rate * av_q2d(avctx->time_base) >
448             avctx->bit_rate_tolerance) {
449         av_log(avctx, AV_LOG_WARNING,
450                "bitrate tolerance %d too small for bitrate %d, overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
451         avctx->bit_rate_tolerance = 5 * avctx->bit_rate * av_q2d(avctx->time_base);
452     }
453
454     if (s->avctx->rc_max_rate &&
455         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
456         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
457          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
458         90000LL * (avctx->rc_buffer_size - 1) >
459             s->avctx->rc_max_rate * 0xFFFFLL) {
460         av_log(avctx, AV_LOG_INFO,
461                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
462                "specified vbv buffer is too large for the given bitrate!\n");
463     }
464
465     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
466         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
467         s->codec_id != AV_CODEC_ID_FLV1) {
468         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
469         return -1;
470     }
471
472     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
473         av_log(avctx, AV_LOG_ERROR,
474                "OBMC is only supported with simple mb decision\n");
475         return -1;
476     }
477
478     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
479         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
480         return -1;
481     }
482
483     if (s->max_b_frames                    &&
484         s->codec_id != AV_CODEC_ID_MPEG4      &&
485         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
486         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
487         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
488         return -1;
489     }
490     if (s->max_b_frames < 0) {
491         av_log(avctx, AV_LOG_ERROR,
492                "max b frames must be 0 or positive for mpegvideo based encoders\n");
493         return -1;
494     }
495
496     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
497          s->codec_id == AV_CODEC_ID_H263  ||
498          s->codec_id == AV_CODEC_ID_H263P) &&
499         (avctx->sample_aspect_ratio.num > 255 ||
500          avctx->sample_aspect_ratio.den > 255)) {
501         av_log(avctx, AV_LOG_WARNING,
502                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
503                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
504         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
505                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
506     }
507
508     if ((s->codec_id == AV_CODEC_ID_H263  ||
509          s->codec_id == AV_CODEC_ID_H263P) &&
510         (avctx->width  > 2048 ||
511          avctx->height > 1152 )) {
512         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
513         return -1;
514     }
515     if ((s->codec_id == AV_CODEC_ID_H263  ||
516          s->codec_id == AV_CODEC_ID_H263P) &&
517         ((avctx->width &3) ||
518          (avctx->height&3) )) {
519         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
520         return -1;
521     }
522
523     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
524         (avctx->width  > 4095 ||
525          avctx->height > 4095 )) {
526         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
527         return -1;
528     }
529
530     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
531         (avctx->width  > 16383 ||
532          avctx->height > 16383 )) {
533         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
534         return -1;
535     }
536
537     if (s->codec_id == AV_CODEC_ID_RV10 &&
538         (avctx->width &15 ||
539          avctx->height&15 )) {
540         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
541         return AVERROR(EINVAL);
542     }
543
544     if (s->codec_id == AV_CODEC_ID_RV20 &&
545         (avctx->width &3 ||
546          avctx->height&3 )) {
547         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
548         return AVERROR(EINVAL);
549     }
550
551     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
552          s->codec_id == AV_CODEC_ID_WMV2) &&
553          avctx->width & 1) {
554          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
555          return -1;
556     }
557
558     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
559         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
560         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
561         return -1;
562     }
563
564     // FIXME mpeg2 uses that too
565     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
566                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
567         av_log(avctx, AV_LOG_ERROR,
568                "mpeg2 style quantization not supported by codec\n");
569         return -1;
570     }
571
572     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
573         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
574         return -1;
575     }
576
577     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
578         s->avctx->mb_decision != FF_MB_DECISION_RD) {
579         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
580         return -1;
581     }
582
583     if (s->avctx->scenechange_threshold < 1000000000 &&
584         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
585         av_log(avctx, AV_LOG_ERROR,
586                "closed gop with scene change detection are not supported yet, "
587                "set threshold to 1000000000\n");
588         return -1;
589     }
590
591     if (s->flags & CODEC_FLAG_LOW_DELAY) {
592         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
593             av_log(avctx, AV_LOG_ERROR,
594                   "low delay forcing is only available for mpeg2\n");
595             return -1;
596         }
597         if (s->max_b_frames != 0) {
598             av_log(avctx, AV_LOG_ERROR,
599                    "b frames cannot be used with low delay\n");
600             return -1;
601         }
602     }
603
604     if (s->q_scale_type == 1) {
605         if (avctx->qmax > 12) {
606             av_log(avctx, AV_LOG_ERROR,
607                    "non linear quant only supports qmax <= 12 currently\n");
608             return -1;
609         }
610     }
611
612     if (s->avctx->thread_count > 1         &&
613         s->codec_id != AV_CODEC_ID_MPEG4      &&
614         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
615         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
616         s->codec_id != AV_CODEC_ID_MJPEG      &&
617         (s->codec_id != AV_CODEC_ID_H263P)) {
618         av_log(avctx, AV_LOG_ERROR,
619                "multi threaded encoding not supported by codec\n");
620         return -1;
621     }
622
623     if (s->avctx->thread_count < 1) {
624         av_log(avctx, AV_LOG_ERROR,
625                "automatic thread number detection not supported by codec, "
626                "patch welcome\n");
627         return -1;
628     }
629
630     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
631         s->rtp_mode = 1;
632
633     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
634         s->h263_slice_structured = 1;
635
636     if (!avctx->time_base.den || !avctx->time_base.num) {
637         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
638         return -1;
639     }
640
641     i = (INT_MAX / 2 + 128) >> 8;
642     if (avctx->mb_threshold >= i) {
643         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
644                i - 1);
645         return -1;
646     }
647
648     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
649         av_log(avctx, AV_LOG_INFO,
650                "notice: b_frame_strategy only affects the first pass\n");
651         avctx->b_frame_strategy = 0;
652     }
653
654     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
655     if (i > 1) {
656         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
657         avctx->time_base.den /= i;
658         avctx->time_base.num /= i;
659         //return -1;
660     }
661
662     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
663         // (a + x * 3 / 8) / x
664         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
665         s->inter_quant_bias = 0;
666     } else {
667         s->intra_quant_bias = 0;
668         // (a - x / 4) / x
669         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
670     }
671
672     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
673         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
674         return AVERROR(EINVAL);
675     }
676
677     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
678         s->intra_quant_bias = avctx->intra_quant_bias;
679     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
680         s->inter_quant_bias = avctx->inter_quant_bias;
681
682     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
683
684     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
685         s->avctx->time_base.den > (1 << 16) - 1) {
686         av_log(avctx, AV_LOG_ERROR,
687                "timebase %d/%d not supported by MPEG 4 standard, "
688                "the maximum admitted value for the timebase denominator "
689                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
690                (1 << 16) - 1);
691         return -1;
692     }
693     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
694
695     switch (avctx->codec->id) {
696     case AV_CODEC_ID_MPEG1VIDEO:
697         s->out_format = FMT_MPEG1;
698         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
699         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
700         break;
701     case AV_CODEC_ID_MPEG2VIDEO:
702         s->out_format = FMT_MPEG1;
703         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
704         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
705         s->rtp_mode   = 1;
706         break;
707     case AV_CODEC_ID_MJPEG:
708     case AV_CODEC_ID_AMV:
709         s->out_format = FMT_MJPEG;
710         s->intra_only = 1; /* force intra only for jpeg */
711         if (!CONFIG_MJPEG_ENCODER ||
712             ff_mjpeg_encode_init(s) < 0)
713             return -1;
714         avctx->delay = 0;
715         s->low_delay = 1;
716         break;
717     case AV_CODEC_ID_H261:
718         if (!CONFIG_H261_ENCODER)
719             return -1;
720         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
721             av_log(avctx, AV_LOG_ERROR,
722                    "The specified picture size of %dx%d is not valid for the "
723                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
724                     s->width, s->height);
725             return -1;
726         }
727         s->out_format = FMT_H261;
728         avctx->delay  = 0;
729         s->low_delay  = 1;
730         break;
731     case AV_CODEC_ID_H263:
732         if (!CONFIG_H263_ENCODER)
733             return -1;
734         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
735                              s->width, s->height) == 8) {
736             av_log(avctx, AV_LOG_ERROR,
737                    "The specified picture size of %dx%d is not valid for "
738                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
739                    "352x288, 704x576, and 1408x1152. "
740                    "Try H.263+.\n", s->width, s->height);
741             return -1;
742         }
743         s->out_format = FMT_H263;
744         avctx->delay  = 0;
745         s->low_delay  = 1;
746         break;
747     case AV_CODEC_ID_H263P:
748         s->out_format = FMT_H263;
749         s->h263_plus  = 1;
750         /* Fx */
751         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
752         s->modified_quant  = s->h263_aic;
753         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
754         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
755
756         /* /Fx */
757         /* These are just to be sure */
758         avctx->delay = 0;
759         s->low_delay = 1;
760         break;
761     case AV_CODEC_ID_FLV1:
762         s->out_format      = FMT_H263;
763         s->h263_flv        = 2; /* format = 1; 11-bit codes */
764         s->unrestricted_mv = 1;
765         s->rtp_mode  = 0; /* don't allow GOB */
766         avctx->delay = 0;
767         s->low_delay = 1;
768         break;
769     case AV_CODEC_ID_RV10:
770         s->out_format = FMT_H263;
771         avctx->delay  = 0;
772         s->low_delay  = 1;
773         break;
774     case AV_CODEC_ID_RV20:
775         s->out_format      = FMT_H263;
776         avctx->delay       = 0;
777         s->low_delay       = 1;
778         s->modified_quant  = 1;
779         s->h263_aic        = 1;
780         s->h263_plus       = 1;
781         s->loop_filter     = 1;
782         s->unrestricted_mv = 0;
783         break;
784     case AV_CODEC_ID_MPEG4:
785         s->out_format      = FMT_H263;
786         s->h263_pred       = 1;
787         s->unrestricted_mv = 1;
788         s->low_delay       = s->max_b_frames ? 0 : 1;
789         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
790         break;
791     case AV_CODEC_ID_MSMPEG4V2:
792         s->out_format      = FMT_H263;
793         s->h263_pred       = 1;
794         s->unrestricted_mv = 1;
795         s->msmpeg4_version = 2;
796         avctx->delay       = 0;
797         s->low_delay       = 1;
798         break;
799     case AV_CODEC_ID_MSMPEG4V3:
800         s->out_format        = FMT_H263;
801         s->h263_pred         = 1;
802         s->unrestricted_mv   = 1;
803         s->msmpeg4_version   = 3;
804         s->flipflop_rounding = 1;
805         avctx->delay         = 0;
806         s->low_delay         = 1;
807         break;
808     case AV_CODEC_ID_WMV1:
809         s->out_format        = FMT_H263;
810         s->h263_pred         = 1;
811         s->unrestricted_mv   = 1;
812         s->msmpeg4_version   = 4;
813         s->flipflop_rounding = 1;
814         avctx->delay         = 0;
815         s->low_delay         = 1;
816         break;
817     case AV_CODEC_ID_WMV2:
818         s->out_format        = FMT_H263;
819         s->h263_pred         = 1;
820         s->unrestricted_mv   = 1;
821         s->msmpeg4_version   = 5;
822         s->flipflop_rounding = 1;
823         avctx->delay         = 0;
824         s->low_delay         = 1;
825         break;
826     default:
827         return -1;
828     }
829
830     avctx->has_b_frames = !s->low_delay;
831
832     s->encoding = 1;
833
834     s->progressive_frame    =
835     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
836                                                 CODEC_FLAG_INTERLACED_ME) ||
837                                 s->alternate_scan);
838
839     /* init */
840     ff_mpv_idct_init(s);
841     if (ff_mpv_common_init(s) < 0)
842         return -1;
843
844     ff_fdctdsp_init(&s->fdsp, avctx);
845     ff_me_cmp_init(&s->mecc, avctx);
846     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
847     ff_pixblockdsp_init(&s->pdsp, avctx);
848     ff_qpeldsp_init(&s->qdsp);
849
850     s->avctx->coded_frame = s->current_picture.f;
851
852     if (s->msmpeg4_version) {
853         FF_ALLOCZ_OR_GOTO(s->avctx, s->ac_stats,
854                           2 * 2 * (MAX_LEVEL + 1) *
855                           (MAX_RUN + 1) * 2 * sizeof(int), fail);
856     }
857     FF_ALLOCZ_OR_GOTO(s->avctx, s->avctx->stats_out, 256, fail);
858
859     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix,   64 * 32 * sizeof(int), fail);
860     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix, 64 * 32 * sizeof(int), fail);
861     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix,   64 * 32 * sizeof(int), fail);
862     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
863     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_chroma_intra_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
864     FF_ALLOCZ_OR_GOTO(s->avctx, s->q_inter_matrix16, 64 * 32 * 2 * sizeof(uint16_t), fail);
865     FF_ALLOCZ_OR_GOTO(s->avctx, s->input_picture,
866                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
867     FF_ALLOCZ_OR_GOTO(s->avctx, s->reordered_input_picture,
868                       MAX_PICTURE_COUNT * sizeof(Picture *), fail);
869
870     if (s->avctx->noise_reduction) {
871         FF_ALLOCZ_OR_GOTO(s->avctx, s->dct_offset,
872                           2 * 64 * sizeof(uint16_t), fail);
873     }
874
875     ff_dct_encode_init(s);
876
877     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
878         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
879
880     s->quant_precision = 5;
881
882     ff_set_cmp(&s->mecc, s->mecc.ildct_cmp,      s->avctx->ildct_cmp);
883     ff_set_cmp(&s->mecc, s->mecc.frame_skip_cmp, s->avctx->frame_skip_cmp);
884
885     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
886         ff_h261_encode_init(s);
887     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
888         ff_h263_encode_init(s);
889     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
890         ff_msmpeg4_encode_init(s);
891     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
892         && s->out_format == FMT_MPEG1)
893         ff_mpeg1_encode_init(s);
894
895     /* init q matrix */
896     for (i = 0; i < 64; i++) {
897         int j = s->idsp.idct_permutation[i];
898         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
899             s->mpeg_quant) {
900             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
901             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
902         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
903             s->intra_matrix[j] =
904             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
905         } else {
906             /* mpeg1/2 */
907             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
908             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
909         }
910         if (s->avctx->intra_matrix)
911             s->intra_matrix[j] = s->avctx->intra_matrix[i];
912         if (s->avctx->inter_matrix)
913             s->inter_matrix[j] = s->avctx->inter_matrix[i];
914     }
915
916     /* precompute matrix */
917     /* for mjpeg, we do include qscale in the matrix */
918     if (s->out_format != FMT_MJPEG) {
919         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
920                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
921                           31, 1);
922         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
923                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
924                           31, 0);
925     }
926
927     if (ff_rate_control_init(s) < 0)
928         return -1;
929
930 #if FF_API_ERROR_RATE
931     FF_DISABLE_DEPRECATION_WARNINGS
932     if (avctx->error_rate)
933         s->error_rate = avctx->error_rate;
934     FF_ENABLE_DEPRECATION_WARNINGS;
935 #endif
936
937 #if FF_API_NORMALIZE_AQP
938     FF_DISABLE_DEPRECATION_WARNINGS
939     if (avctx->flags & CODEC_FLAG_NORMALIZE_AQP)
940         s->mpv_flags |= FF_MPV_FLAG_NAQ;
941     FF_ENABLE_DEPRECATION_WARNINGS;
942 #endif
943
944 #if FF_API_MV0
945     FF_DISABLE_DEPRECATION_WARNINGS
946     if (avctx->flags & CODEC_FLAG_MV0)
947         s->mpv_flags |= FF_MPV_FLAG_MV0;
948     FF_ENABLE_DEPRECATION_WARNINGS
949 #endif
950
951     if (avctx->b_frame_strategy == 2) {
952         for (i = 0; i < s->max_b_frames + 2; i++) {
953             s->tmp_frames[i] = av_frame_alloc();
954             if (!s->tmp_frames[i])
955                 return AVERROR(ENOMEM);
956
957             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
958             s->tmp_frames[i]->width  = s->width  >> avctx->brd_scale;
959             s->tmp_frames[i]->height = s->height >> avctx->brd_scale;
960
961             ret = av_frame_get_buffer(s->tmp_frames[i], 32);
962             if (ret < 0)
963                 return ret;
964         }
965     }
966
967     return 0;
968 fail:
969     ff_mpv_encode_end(avctx);
970     return AVERROR_UNKNOWN;
971 }
972
973 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
974 {
975     MpegEncContext *s = avctx->priv_data;
976     int i;
977
978     ff_rate_control_uninit(s);
979
980     ff_mpv_common_end(s);
981     if (CONFIG_MJPEG_ENCODER &&
982         s->out_format == FMT_MJPEG)
983         ff_mjpeg_encode_close(s);
984
985     av_freep(&avctx->extradata);
986
987     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
988         av_frame_free(&s->tmp_frames[i]);
989
990     ff_free_picture_tables(&s->new_picture);
991     ff_mpeg_unref_picture(s, &s->new_picture);
992
993     av_freep(&s->avctx->stats_out);
994     av_freep(&s->ac_stats);
995
996     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
997     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
998     s->q_chroma_intra_matrix=   NULL;
999     s->q_chroma_intra_matrix16= NULL;
1000     av_freep(&s->q_intra_matrix);
1001     av_freep(&s->q_inter_matrix);
1002     av_freep(&s->q_intra_matrix16);
1003     av_freep(&s->q_inter_matrix16);
1004     av_freep(&s->input_picture);
1005     av_freep(&s->reordered_input_picture);
1006     av_freep(&s->dct_offset);
1007
1008     return 0;
1009 }
1010
1011 static int get_sae(uint8_t *src, int ref, int stride)
1012 {
1013     int x,y;
1014     int acc = 0;
1015
1016     for (y = 0; y < 16; y++) {
1017         for (x = 0; x < 16; x++) {
1018             acc += FFABS(src[x + y * stride] - ref);
1019         }
1020     }
1021
1022     return acc;
1023 }
1024
1025 static int get_intra_count(MpegEncContext *s, uint8_t *src,
1026                            uint8_t *ref, int stride)
1027 {
1028     int x, y, w, h;
1029     int acc = 0;
1030
1031     w = s->width  & ~15;
1032     h = s->height & ~15;
1033
1034     for (y = 0; y < h; y += 16) {
1035         for (x = 0; x < w; x += 16) {
1036             int offset = x + y * stride;
1037             int sad  = s->mecc.sad[0](NULL, src + offset, ref + offset,
1038                                       stride, 16);
1039             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1040             int sae  = get_sae(src + offset, mean, stride);
1041
1042             acc += sae + 500 < sad;
1043         }
1044     }
1045     return acc;
1046 }
1047
1048
1049 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1050 {
1051     Picture *pic = NULL;
1052     int64_t pts;
1053     int i, display_picture_number = 0, ret;
1054     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
1055                                                  (s->low_delay ? 0 : 1);
1056     int direct = 1;
1057
1058     if (pic_arg) {
1059         pts = pic_arg->pts;
1060         display_picture_number = s->input_picture_number++;
1061
1062         if (pts != AV_NOPTS_VALUE) {
1063             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1064                 int64_t last = s->user_specified_pts;
1065
1066                 if (pts <= last) {
1067                     av_log(s->avctx, AV_LOG_ERROR,
1068                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1069                            pts, last);
1070                     return AVERROR(EINVAL);
1071                 }
1072
1073                 if (!s->low_delay && display_picture_number == 1)
1074                     s->dts_delta = pts - last;
1075             }
1076             s->user_specified_pts = pts;
1077         } else {
1078             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1079                 s->user_specified_pts =
1080                 pts = s->user_specified_pts + 1;
1081                 av_log(s->avctx, AV_LOG_INFO,
1082                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1083                        pts);
1084             } else {
1085                 pts = display_picture_number;
1086             }
1087         }
1088     }
1089
1090     if (pic_arg) {
1091         if (!pic_arg->buf[0])
1092             direct = 0;
1093         if (pic_arg->linesize[0] != s->linesize)
1094             direct = 0;
1095         if (pic_arg->linesize[1] != s->uvlinesize)
1096             direct = 0;
1097         if (pic_arg->linesize[2] != s->uvlinesize)
1098             direct = 0;
1099         if ((s->width & 15) || (s->height & 15))
1100             direct = 0;
1101         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1102             direct = 0;
1103         if (s->linesize & (STRIDE_ALIGN-1))
1104             direct = 0;
1105
1106         av_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1107                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1108
1109         if (direct) {
1110             i = ff_find_unused_picture(s, 1);
1111             if (i < 0)
1112                 return i;
1113
1114             pic = &s->picture[i];
1115             pic->reference = 3;
1116
1117             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1118                 return ret;
1119             if (ff_alloc_picture(s, pic, 1) < 0) {
1120                 return -1;
1121             }
1122         } else {
1123             i = ff_find_unused_picture(s, 0);
1124             if (i < 0)
1125                 return i;
1126
1127             pic = &s->picture[i];
1128             pic->reference = 3;
1129
1130             if (ff_alloc_picture(s, pic, 0) < 0) {
1131                 return -1;
1132             }
1133
1134             if (pic->f->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1135                 pic->f->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1136                 pic->f->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1137                 // empty
1138             } else {
1139                 int h_chroma_shift, v_chroma_shift;
1140                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1141                                                  &h_chroma_shift,
1142                                                  &v_chroma_shift);
1143
1144                 for (i = 0; i < 3; i++) {
1145                     int src_stride = pic_arg->linesize[i];
1146                     int dst_stride = i ? s->uvlinesize : s->linesize;
1147                     int h_shift = i ? h_chroma_shift : 0;
1148                     int v_shift = i ? v_chroma_shift : 0;
1149                     int w = s->width  >> h_shift;
1150                     int h = s->height >> v_shift;
1151                     uint8_t *src = pic_arg->data[i];
1152                     uint8_t *dst = pic->f->data[i];
1153                     int vpad = 16;
1154
1155                     if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1156                         && !s->progressive_sequence
1157                         && FFALIGN(s->height, 32) - s->height > 16)
1158                         vpad = 32;
1159
1160                     if (!s->avctx->rc_buffer_size)
1161                         dst += INPLACE_OFFSET;
1162
1163                     if (src_stride == dst_stride)
1164                         memcpy(dst, src, src_stride * h);
1165                     else {
1166                         int h2 = h;
1167                         uint8_t *dst2 = dst;
1168                         while (h2--) {
1169                             memcpy(dst2, src, w);
1170                             dst2 += dst_stride;
1171                             src += src_stride;
1172                         }
1173                     }
1174                     if ((s->width & 15) || (s->height & (vpad-1))) {
1175                         s->mpvencdsp.draw_edges(dst, dst_stride,
1176                                                 w, h,
1177                                                 16>>h_shift,
1178                                                 vpad>>v_shift,
1179                                                 EDGE_BOTTOM);
1180                     }
1181                 }
1182             }
1183         }
1184         ret = av_frame_copy_props(pic->f, pic_arg);
1185         if (ret < 0)
1186             return ret;
1187
1188         pic->f->display_picture_number = display_picture_number;
1189         pic->f->pts = pts; // we set this here to avoid modifiying pic_arg
1190     }
1191
1192     /* shift buffer entries */
1193     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1194         s->input_picture[i - 1] = s->input_picture[i];
1195
1196     s->input_picture[encoding_delay] = (Picture*) pic;
1197
1198     return 0;
1199 }
1200
1201 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1202 {
1203     int x, y, plane;
1204     int score = 0;
1205     int64_t score64 = 0;
1206
1207     for (plane = 0; plane < 3; plane++) {
1208         const int stride = p->f->linesize[plane];
1209         const int bw = plane ? 1 : 2;
1210         for (y = 0; y < s->mb_height * bw; y++) {
1211             for (x = 0; x < s->mb_width * bw; x++) {
1212                 int off = p->shared ? 0 : 16;
1213                 uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1214                 uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1215                 int v = s->mecc.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1216
1217                 switch (FFABS(s->avctx->frame_skip_exp)) {
1218                 case 0: score    =  FFMAX(score, v);          break;
1219                 case 1: score   += FFABS(v);                  break;
1220                 case 2: score64 += v * (int64_t)v;                       break;
1221                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1222                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1223                 }
1224             }
1225         }
1226     }
1227     emms_c();
1228
1229     if (score)
1230         score64 = score;
1231     if (s->avctx->frame_skip_exp < 0)
1232         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1233                       -1.0/s->avctx->frame_skip_exp);
1234
1235     if (score64 < s->avctx->frame_skip_threshold)
1236         return 1;
1237     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1238         return 1;
1239     return 0;
1240 }
1241
1242 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1243 {
1244     AVPacket pkt = { 0 };
1245     int ret, got_output;
1246
1247     av_init_packet(&pkt);
1248     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1249     if (ret < 0)
1250         return ret;
1251
1252     ret = pkt.size;
1253     av_free_packet(&pkt);
1254     return ret;
1255 }
1256
1257 static int estimate_best_b_count(MpegEncContext *s)
1258 {
1259     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1260     AVCodecContext *c = avcodec_alloc_context3(NULL);
1261     const int scale = s->avctx->brd_scale;
1262     int i, j, out_size, p_lambda, b_lambda, lambda2;
1263     int64_t best_rd  = INT64_MAX;
1264     int best_b_count = -1;
1265
1266     av_assert0(scale >= 0 && scale <= 3);
1267
1268     //emms_c();
1269     //s->next_picture_ptr->quality;
1270     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1271     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1272     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1273     if (!b_lambda) // FIXME we should do this somewhere else
1274         b_lambda = p_lambda;
1275     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1276                FF_LAMBDA_SHIFT;
1277
1278     c->width        = s->width  >> scale;
1279     c->height       = s->height >> scale;
1280     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR;
1281     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1282     c->mb_decision  = s->avctx->mb_decision;
1283     c->me_cmp       = s->avctx->me_cmp;
1284     c->mb_cmp       = s->avctx->mb_cmp;
1285     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1286     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1287     c->time_base    = s->avctx->time_base;
1288     c->max_b_frames = s->max_b_frames;
1289
1290     if (avcodec_open2(c, codec, NULL) < 0)
1291         return -1;
1292
1293     for (i = 0; i < s->max_b_frames + 2; i++) {
1294         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1295                                                 s->next_picture_ptr;
1296         uint8_t *data[4];
1297
1298         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1299             pre_input = *pre_input_ptr;
1300             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1301
1302             if (!pre_input.shared && i) {
1303                 data[0] += INPLACE_OFFSET;
1304                 data[1] += INPLACE_OFFSET;
1305                 data[2] += INPLACE_OFFSET;
1306             }
1307
1308             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1309                                        s->tmp_frames[i]->linesize[0],
1310                                        data[0],
1311                                        pre_input.f->linesize[0],
1312                                        c->width, c->height);
1313             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1314                                        s->tmp_frames[i]->linesize[1],
1315                                        data[1],
1316                                        pre_input.f->linesize[1],
1317                                        c->width >> 1, c->height >> 1);
1318             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1319                                        s->tmp_frames[i]->linesize[2],
1320                                        data[2],
1321                                        pre_input.f->linesize[2],
1322                                        c->width >> 1, c->height >> 1);
1323         }
1324     }
1325
1326     for (j = 0; j < s->max_b_frames + 1; j++) {
1327         int64_t rd = 0;
1328
1329         if (!s->input_picture[j])
1330             break;
1331
1332         c->error[0] = c->error[1] = c->error[2] = 0;
1333
1334         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1335         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1336
1337         out_size = encode_frame(c, s->tmp_frames[0]);
1338
1339         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1340
1341         for (i = 0; i < s->max_b_frames + 1; i++) {
1342             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1343
1344             s->tmp_frames[i + 1]->pict_type = is_p ?
1345                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1346             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1347
1348             out_size = encode_frame(c, s->tmp_frames[i + 1]);
1349
1350             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1351         }
1352
1353         /* get the delayed frames */
1354         while (out_size) {
1355             out_size = encode_frame(c, NULL);
1356             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1357         }
1358
1359         rd += c->error[0] + c->error[1] + c->error[2];
1360
1361         if (rd < best_rd) {
1362             best_rd = rd;
1363             best_b_count = j;
1364         }
1365     }
1366
1367     avcodec_close(c);
1368     av_freep(&c);
1369
1370     return best_b_count;
1371 }
1372
1373 static int select_input_picture(MpegEncContext *s)
1374 {
1375     int i, ret;
1376
1377     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1378         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1379     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1380
1381     /* set next picture type & ordering */
1382     if (!s->reordered_input_picture[0] && s->input_picture[0]) {
1383         if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1384             if (s->picture_in_gop_number < s->gop_size &&
1385                 s->next_picture_ptr &&
1386                 skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1387                 // FIXME check that te gop check above is +-1 correct
1388                 av_frame_unref(s->input_picture[0]->f);
1389
1390                 ff_vbv_update(s, 0);
1391
1392                 goto no_output_pic;
1393             }
1394         }
1395
1396         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1397             !s->next_picture_ptr || s->intra_only) {
1398             s->reordered_input_picture[0] = s->input_picture[0];
1399             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1400             s->reordered_input_picture[0]->f->coded_picture_number =
1401                 s->coded_picture_number++;
1402         } else {
1403             int b_frames;
1404
1405             if (s->flags & CODEC_FLAG_PASS2) {
1406                 for (i = 0; i < s->max_b_frames + 1; i++) {
1407                     int pict_num = s->input_picture[0]->f->display_picture_number + i;
1408
1409                     if (pict_num >= s->rc_context.num_entries)
1410                         break;
1411                     if (!s->input_picture[i]) {
1412                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1413                         break;
1414                     }
1415
1416                     s->input_picture[i]->f->pict_type =
1417                         s->rc_context.entry[pict_num].new_pict_type;
1418                 }
1419             }
1420
1421             if (s->avctx->b_frame_strategy == 0) {
1422                 b_frames = s->max_b_frames;
1423                 while (b_frames && !s->input_picture[b_frames])
1424                     b_frames--;
1425             } else if (s->avctx->b_frame_strategy == 1) {
1426                 for (i = 1; i < s->max_b_frames + 1; i++) {
1427                     if (s->input_picture[i] &&
1428                         s->input_picture[i]->b_frame_score == 0) {
1429                         s->input_picture[i]->b_frame_score =
1430                             get_intra_count(s,
1431                                             s->input_picture[i    ]->f->data[0],
1432                                             s->input_picture[i - 1]->f->data[0],
1433                                             s->linesize) + 1;
1434                     }
1435                 }
1436                 for (i = 0; i < s->max_b_frames + 1; i++) {
1437                     if (!s->input_picture[i] ||
1438                         s->input_picture[i]->b_frame_score - 1 >
1439                             s->mb_num / s->avctx->b_sensitivity)
1440                         break;
1441                 }
1442
1443                 b_frames = FFMAX(0, i - 1);
1444
1445                 /* reset scores */
1446                 for (i = 0; i < b_frames + 1; i++) {
1447                     s->input_picture[i]->b_frame_score = 0;
1448                 }
1449             } else if (s->avctx->b_frame_strategy == 2) {
1450                 b_frames = estimate_best_b_count(s);
1451             } else {
1452                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1453                 b_frames = 0;
1454             }
1455
1456             emms_c();
1457
1458             for (i = b_frames - 1; i >= 0; i--) {
1459                 int type = s->input_picture[i]->f->pict_type;
1460                 if (type && type != AV_PICTURE_TYPE_B)
1461                     b_frames = i;
1462             }
1463             if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1464                 b_frames == s->max_b_frames) {
1465                 av_log(s->avctx, AV_LOG_ERROR,
1466                        "warning, too many b frames in a row\n");
1467             }
1468
1469             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1470                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1471                     s->gop_size > s->picture_in_gop_number) {
1472                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1473                 } else {
1474                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1475                         b_frames = 0;
1476                     s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1477                 }
1478             }
1479
1480             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1481                 s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1482                 b_frames--;
1483
1484             s->reordered_input_picture[0] = s->input_picture[b_frames];
1485             if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1486                 s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1487             s->reordered_input_picture[0]->f->coded_picture_number =
1488                 s->coded_picture_number++;
1489             for (i = 0; i < b_frames; i++) {
1490                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1491                 s->reordered_input_picture[i + 1]->f->pict_type =
1492                     AV_PICTURE_TYPE_B;
1493                 s->reordered_input_picture[i + 1]->f->coded_picture_number =
1494                     s->coded_picture_number++;
1495             }
1496         }
1497     }
1498 no_output_pic:
1499     if (s->reordered_input_picture[0]) {
1500         s->reordered_input_picture[0]->reference =
1501            s->reordered_input_picture[0]->f->pict_type !=
1502                AV_PICTURE_TYPE_B ? 3 : 0;
1503
1504         ff_mpeg_unref_picture(s, &s->new_picture);
1505         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1506             return ret;
1507
1508         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1509             // input is a shared pix, so we can't modifiy it -> alloc a new
1510             // one & ensure that the shared one is reuseable
1511
1512             Picture *pic;
1513             int i = ff_find_unused_picture(s, 0);
1514             if (i < 0)
1515                 return i;
1516             pic = &s->picture[i];
1517
1518             pic->reference = s->reordered_input_picture[0]->reference;
1519             if (ff_alloc_picture(s, pic, 0) < 0) {
1520                 return -1;
1521             }
1522
1523             ret = av_frame_copy_props(pic->f, s->reordered_input_picture[0]->f);
1524             if (ret < 0)
1525                 return ret;
1526
1527             /* mark us unused / free shared pic */
1528             av_frame_unref(s->reordered_input_picture[0]->f);
1529             s->reordered_input_picture[0]->shared = 0;
1530
1531             s->current_picture_ptr = pic;
1532         } else {
1533             // input is not a shared pix -> reuse buffer for current_pix
1534             s->current_picture_ptr = s->reordered_input_picture[0];
1535             for (i = 0; i < 4; i++) {
1536                 s->new_picture.f->data[i] += INPLACE_OFFSET;
1537             }
1538         }
1539         ff_mpeg_unref_picture(s, &s->current_picture);
1540         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1541                                        s->current_picture_ptr)) < 0)
1542             return ret;
1543
1544         s->picture_number = s->new_picture.f->display_picture_number;
1545     } else {
1546         ff_mpeg_unref_picture(s, &s->new_picture);
1547     }
1548     return 0;
1549 }
1550
1551 static void frame_end(MpegEncContext *s)
1552 {
1553     if (s->unrestricted_mv &&
1554         s->current_picture.reference &&
1555         !s->intra_only) {
1556         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt);
1557         int hshift = desc->log2_chroma_w;
1558         int vshift = desc->log2_chroma_h;
1559         s->mpvencdsp.draw_edges(s->current_picture.f->data[0],
1560                                 s->current_picture.f->linesize[0],
1561                                 s->h_edge_pos, s->v_edge_pos,
1562                                 EDGE_WIDTH, EDGE_WIDTH,
1563                                 EDGE_TOP | EDGE_BOTTOM);
1564         s->mpvencdsp.draw_edges(s->current_picture.f->data[1],
1565                                 s->current_picture.f->linesize[1],
1566                                 s->h_edge_pos >> hshift,
1567                                 s->v_edge_pos >> vshift,
1568                                 EDGE_WIDTH >> hshift,
1569                                 EDGE_WIDTH >> vshift,
1570                                 EDGE_TOP | EDGE_BOTTOM);
1571         s->mpvencdsp.draw_edges(s->current_picture.f->data[2],
1572                                 s->current_picture.f->linesize[2],
1573                                 s->h_edge_pos >> hshift,
1574                                 s->v_edge_pos >> vshift,
1575                                 EDGE_WIDTH >> hshift,
1576                                 EDGE_WIDTH >> vshift,
1577                                 EDGE_TOP | EDGE_BOTTOM);
1578     }
1579
1580     emms_c();
1581
1582     s->last_pict_type                 = s->pict_type;
1583     s->last_lambda_for [s->pict_type] = s->current_picture_ptr->f->quality;
1584     if (s->pict_type!= AV_PICTURE_TYPE_B)
1585         s->last_non_b_pict_type = s->pict_type;
1586
1587     s->avctx->coded_frame = s->current_picture_ptr->f;
1588
1589 }
1590
1591 static void update_noise_reduction(MpegEncContext *s)
1592 {
1593     int intra, i;
1594
1595     for (intra = 0; intra < 2; intra++) {
1596         if (s->dct_count[intra] > (1 << 16)) {
1597             for (i = 0; i < 64; i++) {
1598                 s->dct_error_sum[intra][i] >>= 1;
1599             }
1600             s->dct_count[intra] >>= 1;
1601         }
1602
1603         for (i = 0; i < 64; i++) {
1604             s->dct_offset[intra][i] = (s->avctx->noise_reduction *
1605                                        s->dct_count[intra] +
1606                                        s->dct_error_sum[intra][i] / 2) /
1607                                       (s->dct_error_sum[intra][i] + 1);
1608         }
1609     }
1610 }
1611
1612 static int frame_start(MpegEncContext *s)
1613 {
1614     int ret;
1615
1616     /* mark & release old frames */
1617     if (s->pict_type != AV_PICTURE_TYPE_B && s->last_picture_ptr &&
1618         s->last_picture_ptr != s->next_picture_ptr &&
1619         s->last_picture_ptr->f->buf[0]) {
1620         ff_mpeg_unref_picture(s, s->last_picture_ptr);
1621     }
1622
1623     s->current_picture_ptr->f->pict_type = s->pict_type;
1624     s->current_picture_ptr->f->key_frame = s->pict_type == AV_PICTURE_TYPE_I;
1625
1626     ff_mpeg_unref_picture(s, &s->current_picture);
1627     if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1628                                    s->current_picture_ptr)) < 0)
1629         return ret;
1630
1631     if (s->pict_type != AV_PICTURE_TYPE_B) {
1632         s->last_picture_ptr = s->next_picture_ptr;
1633         if (!s->droppable)
1634             s->next_picture_ptr = s->current_picture_ptr;
1635     }
1636
1637     if (s->last_picture_ptr) {
1638         ff_mpeg_unref_picture(s, &s->last_picture);
1639         if (s->last_picture_ptr->f->buf[0] &&
1640             (ret = ff_mpeg_ref_picture(s, &s->last_picture,
1641                                        s->last_picture_ptr)) < 0)
1642             return ret;
1643     }
1644     if (s->next_picture_ptr) {
1645         ff_mpeg_unref_picture(s, &s->next_picture);
1646         if (s->next_picture_ptr->f->buf[0] &&
1647             (ret = ff_mpeg_ref_picture(s, &s->next_picture,
1648                                        s->next_picture_ptr)) < 0)
1649             return ret;
1650     }
1651
1652     if (s->picture_structure!= PICT_FRAME) {
1653         int i;
1654         for (i = 0; i < 4; i++) {
1655             if (s->picture_structure == PICT_BOTTOM_FIELD) {
1656                 s->current_picture.f->data[i] +=
1657                     s->current_picture.f->linesize[i];
1658             }
1659             s->current_picture.f->linesize[i] *= 2;
1660             s->last_picture.f->linesize[i]    *= 2;
1661             s->next_picture.f->linesize[i]    *= 2;
1662         }
1663     }
1664
1665     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
1666         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1667         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1668     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
1669         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1670         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1671     } else {
1672         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1673         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1674     }
1675
1676     if (s->dct_error_sum) {
1677         av_assert2(s->avctx->noise_reduction && s->encoding);
1678         update_noise_reduction(s);
1679     }
1680
1681     return 0;
1682 }
1683
1684 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1685                           const AVFrame *pic_arg, int *got_packet)
1686 {
1687     MpegEncContext *s = avctx->priv_data;
1688     int i, stuffing_count, ret;
1689     int context_count = s->slice_context_count;
1690
1691     s->picture_in_gop_number++;
1692
1693     if (load_input_picture(s, pic_arg) < 0)
1694         return -1;
1695
1696     if (select_input_picture(s) < 0) {
1697         return -1;
1698     }
1699
1700     /* output? */
1701     if (s->new_picture.f->data[0]) {
1702         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1703             return ret;
1704         if (s->mb_info) {
1705             s->mb_info_ptr = av_packet_new_side_data(pkt,
1706                                  AV_PKT_DATA_H263_MB_INFO,
1707                                  s->mb_width*s->mb_height*12);
1708             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1709         }
1710
1711         for (i = 0; i < context_count; i++) {
1712             int start_y = s->thread_context[i]->start_mb_y;
1713             int   end_y = s->thread_context[i]->  end_mb_y;
1714             int h       = s->mb_height;
1715             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1716             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1717
1718             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1719         }
1720
1721         s->pict_type = s->new_picture.f->pict_type;
1722         //emms_c();
1723         ret = frame_start(s);
1724         if (ret < 0)
1725             return ret;
1726 vbv_retry:
1727         if (encode_picture(s, s->picture_number) < 0)
1728             return -1;
1729
1730         avctx->header_bits = s->header_bits;
1731         avctx->mv_bits     = s->mv_bits;
1732         avctx->misc_bits   = s->misc_bits;
1733         avctx->i_tex_bits  = s->i_tex_bits;
1734         avctx->p_tex_bits  = s->p_tex_bits;
1735         avctx->i_count     = s->i_count;
1736         // FIXME f/b_count in avctx
1737         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1738         avctx->skip_count  = s->skip_count;
1739
1740         frame_end(s);
1741
1742         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1743             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1744
1745         if (avctx->rc_buffer_size) {
1746             RateControlContext *rcc = &s->rc_context;
1747             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1748
1749             if (put_bits_count(&s->pb) > max_size &&
1750                 s->lambda < s->avctx->lmax) {
1751                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1752                                        (s->qscale + 1) / s->qscale);
1753                 if (s->adaptive_quant) {
1754                     int i;
1755                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1756                         s->lambda_table[i] =
1757                             FFMAX(s->lambda_table[i] + 1,
1758                                   s->lambda_table[i] * (s->qscale + 1) /
1759                                   s->qscale);
1760                 }
1761                 s->mb_skipped = 0;        // done in frame_start()
1762                 // done in encode_picture() so we must undo it
1763                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1764                     if (s->flipflop_rounding          ||
1765                         s->codec_id == AV_CODEC_ID_H263P ||
1766                         s->codec_id == AV_CODEC_ID_MPEG4)
1767                         s->no_rounding ^= 1;
1768                 }
1769                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1770                     s->time_base       = s->last_time_base;
1771                     s->last_non_b_time = s->time - s->pp_time;
1772                 }
1773                 for (i = 0; i < context_count; i++) {
1774                     PutBitContext *pb = &s->thread_context[i]->pb;
1775                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1776                 }
1777                 goto vbv_retry;
1778             }
1779
1780             av_assert0(s->avctx->rc_max_rate);
1781         }
1782
1783         if (s->flags & CODEC_FLAG_PASS1)
1784             ff_write_pass1_stats(s);
1785
1786         for (i = 0; i < 4; i++) {
1787             s->current_picture_ptr->f->error[i] =
1788             s->current_picture.f->error[i] =
1789                 s->current_picture.error[i];
1790             avctx->error[i] += s->current_picture_ptr->f->error[i];
1791         }
1792
1793         if (s->flags & CODEC_FLAG_PASS1)
1794             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1795                    avctx->i_tex_bits + avctx->p_tex_bits ==
1796                        put_bits_count(&s->pb));
1797         flush_put_bits(&s->pb);
1798         s->frame_bits  = put_bits_count(&s->pb);
1799
1800         stuffing_count = ff_vbv_update(s, s->frame_bits);
1801         s->stuffing_bits = 8*stuffing_count;
1802         if (stuffing_count) {
1803             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1804                     stuffing_count + 50) {
1805                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1806                 return -1;
1807             }
1808
1809             switch (s->codec_id) {
1810             case AV_CODEC_ID_MPEG1VIDEO:
1811             case AV_CODEC_ID_MPEG2VIDEO:
1812                 while (stuffing_count--) {
1813                     put_bits(&s->pb, 8, 0);
1814                 }
1815             break;
1816             case AV_CODEC_ID_MPEG4:
1817                 put_bits(&s->pb, 16, 0);
1818                 put_bits(&s->pb, 16, 0x1C3);
1819                 stuffing_count -= 4;
1820                 while (stuffing_count--) {
1821                     put_bits(&s->pb, 8, 0xFF);
1822                 }
1823             break;
1824             default:
1825                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1826             }
1827             flush_put_bits(&s->pb);
1828             s->frame_bits  = put_bits_count(&s->pb);
1829         }
1830
1831         /* update mpeg1/2 vbv_delay for CBR */
1832         if (s->avctx->rc_max_rate                          &&
1833             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1834             s->out_format == FMT_MPEG1                     &&
1835             90000LL * (avctx->rc_buffer_size - 1) <=
1836                 s->avctx->rc_max_rate * 0xFFFFLL) {
1837             int vbv_delay, min_delay;
1838             double inbits  = s->avctx->rc_max_rate *
1839                              av_q2d(s->avctx->time_base);
1840             int    minbits = s->frame_bits - 8 *
1841                              (s->vbv_delay_ptr - s->pb.buf - 1);
1842             double bits    = s->rc_context.buffer_index + minbits - inbits;
1843
1844             if (bits < 0)
1845                 av_log(s->avctx, AV_LOG_ERROR,
1846                        "Internal error, negative bits\n");
1847
1848             assert(s->repeat_first_field == 0);
1849
1850             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1851             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1852                         s->avctx->rc_max_rate;
1853
1854             vbv_delay = FFMAX(vbv_delay, min_delay);
1855
1856             av_assert0(vbv_delay < 0xFFFF);
1857
1858             s->vbv_delay_ptr[0] &= 0xF8;
1859             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1860             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1861             s->vbv_delay_ptr[2] &= 0x07;
1862             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1863             avctx->vbv_delay     = vbv_delay * 300;
1864         }
1865         s->total_bits     += s->frame_bits;
1866         avctx->frame_bits  = s->frame_bits;
1867
1868         pkt->pts = s->current_picture.f->pts;
1869         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1870             if (!s->current_picture.f->coded_picture_number)
1871                 pkt->dts = pkt->pts - s->dts_delta;
1872             else
1873                 pkt->dts = s->reordered_pts;
1874             s->reordered_pts = pkt->pts;
1875         } else
1876             pkt->dts = pkt->pts;
1877         if (s->current_picture.f->key_frame)
1878             pkt->flags |= AV_PKT_FLAG_KEY;
1879         if (s->mb_info)
1880             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1881     } else {
1882         s->frame_bits = 0;
1883     }
1884
1885     /* release non-reference frames */
1886     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1887         if (!s->picture[i].reference)
1888             ff_mpeg_unref_picture(s, &s->picture[i]);
1889     }
1890
1891     av_assert1((s->frame_bits & 7) == 0);
1892
1893     pkt->size = s->frame_bits / 8;
1894     *got_packet = !!pkt->size;
1895     return 0;
1896 }
1897
1898 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1899                                                 int n, int threshold)
1900 {
1901     static const char tab[64] = {
1902         3, 2, 2, 1, 1, 1, 1, 1,
1903         1, 1, 1, 1, 1, 1, 1, 1,
1904         1, 1, 1, 1, 1, 1, 1, 1,
1905         0, 0, 0, 0, 0, 0, 0, 0,
1906         0, 0, 0, 0, 0, 0, 0, 0,
1907         0, 0, 0, 0, 0, 0, 0, 0,
1908         0, 0, 0, 0, 0, 0, 0, 0,
1909         0, 0, 0, 0, 0, 0, 0, 0
1910     };
1911     int score = 0;
1912     int run = 0;
1913     int i;
1914     int16_t *block = s->block[n];
1915     const int last_index = s->block_last_index[n];
1916     int skip_dc;
1917
1918     if (threshold < 0) {
1919         skip_dc = 0;
1920         threshold = -threshold;
1921     } else
1922         skip_dc = 1;
1923
1924     /* Are all we could set to zero already zero? */
1925     if (last_index <= skip_dc - 1)
1926         return;
1927
1928     for (i = 0; i <= last_index; i++) {
1929         const int j = s->intra_scantable.permutated[i];
1930         const int level = FFABS(block[j]);
1931         if (level == 1) {
1932             if (skip_dc && i == 0)
1933                 continue;
1934             score += tab[run];
1935             run = 0;
1936         } else if (level > 1) {
1937             return;
1938         } else {
1939             run++;
1940         }
1941     }
1942     if (score >= threshold)
1943         return;
1944     for (i = skip_dc; i <= last_index; i++) {
1945         const int j = s->intra_scantable.permutated[i];
1946         block[j] = 0;
1947     }
1948     if (block[0])
1949         s->block_last_index[n] = 0;
1950     else
1951         s->block_last_index[n] = -1;
1952 }
1953
1954 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1955                                int last_index)
1956 {
1957     int i;
1958     const int maxlevel = s->max_qcoeff;
1959     const int minlevel = s->min_qcoeff;
1960     int overflow = 0;
1961
1962     if (s->mb_intra) {
1963         i = 1; // skip clipping of intra dc
1964     } else
1965         i = 0;
1966
1967     for (; i <= last_index; i++) {
1968         const int j = s->intra_scantable.permutated[i];
1969         int level = block[j];
1970
1971         if (level > maxlevel) {
1972             level = maxlevel;
1973             overflow++;
1974         } else if (level < minlevel) {
1975             level = minlevel;
1976             overflow++;
1977         }
1978
1979         block[j] = level;
1980     }
1981
1982     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1983         av_log(s->avctx, AV_LOG_INFO,
1984                "warning, clipping %d dct coefficients to %d..%d\n",
1985                overflow, minlevel, maxlevel);
1986 }
1987
1988 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1989 {
1990     int x, y;
1991     // FIXME optimize
1992     for (y = 0; y < 8; y++) {
1993         for (x = 0; x < 8; x++) {
1994             int x2, y2;
1995             int sum = 0;
1996             int sqr = 0;
1997             int count = 0;
1998
1999             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2000                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2001                     int v = ptr[x2 + y2 * stride];
2002                     sum += v;
2003                     sqr += v * v;
2004                     count++;
2005                 }
2006             }
2007             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2008         }
2009     }
2010 }
2011
2012 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2013                                                 int motion_x, int motion_y,
2014                                                 int mb_block_height,
2015                                                 int mb_block_width,
2016                                                 int mb_block_count)
2017 {
2018     int16_t weight[12][64];
2019     int16_t orig[12][64];
2020     const int mb_x = s->mb_x;
2021     const int mb_y = s->mb_y;
2022     int i;
2023     int skip_dct[12];
2024     int dct_offset = s->linesize * 8; // default for progressive frames
2025     int uv_dct_offset = s->uvlinesize * 8;
2026     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2027     ptrdiff_t wrap_y, wrap_c;
2028
2029     for (i = 0; i < mb_block_count; i++)
2030         skip_dct[i] = s->skipdct;
2031
2032     if (s->adaptive_quant) {
2033         const int last_qp = s->qscale;
2034         const int mb_xy = mb_x + mb_y * s->mb_stride;
2035
2036         s->lambda = s->lambda_table[mb_xy];
2037         update_qscale(s);
2038
2039         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2040             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
2041             s->dquant = s->qscale - last_qp;
2042
2043             if (s->out_format == FMT_H263) {
2044                 s->dquant = av_clip(s->dquant, -2, 2);
2045
2046                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2047                     if (!s->mb_intra) {
2048                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2049                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2050                                 s->dquant = 0;
2051                         }
2052                         if (s->mv_type == MV_TYPE_8X8)
2053                             s->dquant = 0;
2054                     }
2055                 }
2056             }
2057         }
2058         ff_set_qscale(s, last_qp + s->dquant);
2059     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2060         ff_set_qscale(s, s->qscale + s->dquant);
2061
2062     wrap_y = s->linesize;
2063     wrap_c = s->uvlinesize;
2064     ptr_y  = s->new_picture.f->data[0] +
2065              (mb_y * 16 * wrap_y)              + mb_x * 16;
2066     ptr_cb = s->new_picture.f->data[1] +
2067              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2068     ptr_cr = s->new_picture.f->data[2] +
2069              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2070
2071     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2072         uint8_t *ebuf = s->edge_emu_buffer + 36 * wrap_y;
2073         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
2074         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
2075         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2076                                  wrap_y, wrap_y,
2077                                  16, 16, mb_x * 16, mb_y * 16,
2078                                  s->width, s->height);
2079         ptr_y = ebuf;
2080         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2081                                  wrap_c, wrap_c,
2082                                  mb_block_width, mb_block_height,
2083                                  mb_x * mb_block_width, mb_y * mb_block_height,
2084                                  cw, ch);
2085         ptr_cb = ebuf + 16 * wrap_y;
2086         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2087                                  wrap_c, wrap_c,
2088                                  mb_block_width, mb_block_height,
2089                                  mb_x * mb_block_width, mb_y * mb_block_height,
2090                                  cw, ch);
2091         ptr_cr = ebuf + 16 * wrap_y + 16;
2092     }
2093
2094     if (s->mb_intra) {
2095         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2096             int progressive_score, interlaced_score;
2097
2098             s->interlaced_dct = 0;
2099             progressive_score = s->mecc.ildct_cmp[4](s, ptr_y, NULL, wrap_y, 8) +
2100                                 s->mecc.ildct_cmp[4](s, ptr_y + wrap_y * 8,
2101                                                      NULL, wrap_y, 8) - 400;
2102
2103             if (progressive_score > 0) {
2104                 interlaced_score = s->mecc.ildct_cmp[4](s, ptr_y,
2105                                                         NULL, wrap_y * 2, 8) +
2106                                    s->mecc.ildct_cmp[4](s, ptr_y + wrap_y,
2107                                                         NULL, wrap_y * 2, 8);
2108                 if (progressive_score > interlaced_score) {
2109                     s->interlaced_dct = 1;
2110
2111                     dct_offset = wrap_y;
2112                     uv_dct_offset = wrap_c;
2113                     wrap_y <<= 1;
2114                     if (s->chroma_format == CHROMA_422 ||
2115                         s->chroma_format == CHROMA_444)
2116                         wrap_c <<= 1;
2117                 }
2118             }
2119         }
2120
2121         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2122         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2123         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2124         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2125
2126         if (s->flags & CODEC_FLAG_GRAY) {
2127             skip_dct[4] = 1;
2128             skip_dct[5] = 1;
2129         } else {
2130             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2131             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2132             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
2133                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2134                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2135             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
2136                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2137                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2138                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2139                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2140                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2141                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2142             }
2143         }
2144     } else {
2145         op_pixels_func (*op_pix)[4];
2146         qpel_mc_func (*op_qpix)[16];
2147         uint8_t *dest_y, *dest_cb, *dest_cr;
2148
2149         dest_y  = s->dest[0];
2150         dest_cb = s->dest[1];
2151         dest_cr = s->dest[2];
2152
2153         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2154             op_pix  = s->hdsp.put_pixels_tab;
2155             op_qpix = s->qdsp.put_qpel_pixels_tab;
2156         } else {
2157             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2158             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2159         }
2160
2161         if (s->mv_dir & MV_DIR_FORWARD) {
2162             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2163                           s->last_picture.f->data,
2164                           op_pix, op_qpix);
2165             op_pix  = s->hdsp.avg_pixels_tab;
2166             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2167         }
2168         if (s->mv_dir & MV_DIR_BACKWARD) {
2169             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2170                           s->next_picture.f->data,
2171                           op_pix, op_qpix);
2172         }
2173
2174         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
2175             int progressive_score, interlaced_score;
2176
2177             s->interlaced_dct = 0;
2178             progressive_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2179                                 s->mecc.ildct_cmp[0](s, dest_y + wrap_y * 8,
2180                                                      ptr_y + wrap_y * 8,
2181                                                      wrap_y, 8) - 400;
2182
2183             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2184                 progressive_score -= 400;
2185
2186             if (progressive_score > 0) {
2187                 interlaced_score = s->mecc.ildct_cmp[0](s, dest_y, ptr_y,
2188                                                         wrap_y * 2, 8) +
2189                                    s->mecc.ildct_cmp[0](s, dest_y + wrap_y,
2190                                                         ptr_y + wrap_y,
2191                                                         wrap_y * 2, 8);
2192
2193                 if (progressive_score > interlaced_score) {
2194                     s->interlaced_dct = 1;
2195
2196                     dct_offset = wrap_y;
2197                     uv_dct_offset = wrap_c;
2198                     wrap_y <<= 1;
2199                     if (s->chroma_format == CHROMA_422)
2200                         wrap_c <<= 1;
2201                 }
2202             }
2203         }
2204
2205         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2206         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2207         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2208                             dest_y + dct_offset, wrap_y);
2209         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2210                             dest_y + dct_offset + 8, wrap_y);
2211
2212         if (s->flags & CODEC_FLAG_GRAY) {
2213             skip_dct[4] = 1;
2214             skip_dct[5] = 1;
2215         } else {
2216             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2217             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2218             if (!s->chroma_y_shift) { /* 422 */
2219                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2220                                     dest_cb + uv_dct_offset, wrap_c);
2221                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2222                                     dest_cr + uv_dct_offset, wrap_c);
2223             }
2224         }
2225         /* pre quantization */
2226         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
2227                 2 * s->qscale * s->qscale) {
2228             // FIXME optimize
2229             if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2230                 skip_dct[0] = 1;
2231             if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2232                 skip_dct[1] = 1;
2233             if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2234                                wrap_y, 8) < 20 * s->qscale)
2235                 skip_dct[2] = 1;
2236             if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2237                                wrap_y, 8) < 20 * s->qscale)
2238                 skip_dct[3] = 1;
2239             if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2240                 skip_dct[4] = 1;
2241             if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2242                 skip_dct[5] = 1;
2243             if (!s->chroma_y_shift) { /* 422 */
2244                 if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset,
2245                                    dest_cb + uv_dct_offset,
2246                                    wrap_c, 8) < 20 * s->qscale)
2247                     skip_dct[6] = 1;
2248                 if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset,
2249                                    dest_cr + uv_dct_offset,
2250                                    wrap_c, 8) < 20 * s->qscale)
2251                     skip_dct[7] = 1;
2252             }
2253         }
2254     }
2255
2256     if (s->quantizer_noise_shaping) {
2257         if (!skip_dct[0])
2258             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2259         if (!skip_dct[1])
2260             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2261         if (!skip_dct[2])
2262             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2263         if (!skip_dct[3])
2264             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2265         if (!skip_dct[4])
2266             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2267         if (!skip_dct[5])
2268             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2269         if (!s->chroma_y_shift) { /* 422 */
2270             if (!skip_dct[6])
2271                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2272                                   wrap_c);
2273             if (!skip_dct[7])
2274                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2275                                   wrap_c);
2276         }
2277         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2278     }
2279
2280     /* DCT & quantize */
2281     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2282     {
2283         for (i = 0; i < mb_block_count; i++) {
2284             if (!skip_dct[i]) {
2285                 int overflow;
2286                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2287                 // FIXME we could decide to change to quantizer instead of
2288                 // clipping
2289                 // JS: I don't think that would be a good idea it could lower
2290                 //     quality instead of improve it. Just INTRADC clipping
2291                 //     deserves changes in quantizer
2292                 if (overflow)
2293                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2294             } else
2295                 s->block_last_index[i] = -1;
2296         }
2297         if (s->quantizer_noise_shaping) {
2298             for (i = 0; i < mb_block_count; i++) {
2299                 if (!skip_dct[i]) {
2300                     s->block_last_index[i] =
2301                         dct_quantize_refine(s, s->block[i], weight[i],
2302                                             orig[i], i, s->qscale);
2303                 }
2304             }
2305         }
2306
2307         if (s->luma_elim_threshold && !s->mb_intra)
2308             for (i = 0; i < 4; i++)
2309                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2310         if (s->chroma_elim_threshold && !s->mb_intra)
2311             for (i = 4; i < mb_block_count; i++)
2312                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2313
2314         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2315             for (i = 0; i < mb_block_count; i++) {
2316                 if (s->block_last_index[i] == -1)
2317                     s->coded_score[i] = INT_MAX / 256;
2318             }
2319         }
2320     }
2321
2322     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2323         s->block_last_index[4] =
2324         s->block_last_index[5] = 0;
2325         s->block[4][0] =
2326         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2327         if (!s->chroma_y_shift) { /* 422 / 444 */
2328             for (i=6; i<12; i++) {
2329                 s->block_last_index[i] = 0;
2330                 s->block[i][0] = s->block[4][0];
2331             }
2332         }
2333     }
2334
2335     // non c quantize code returns incorrect block_last_index FIXME
2336     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2337         for (i = 0; i < mb_block_count; i++) {
2338             int j;
2339             if (s->block_last_index[i] > 0) {
2340                 for (j = 63; j > 0; j--) {
2341                     if (s->block[i][s->intra_scantable.permutated[j]])
2342                         break;
2343                 }
2344                 s->block_last_index[i] = j;
2345             }
2346         }
2347     }
2348
2349     /* huffman encode */
2350     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2351     case AV_CODEC_ID_MPEG1VIDEO:
2352     case AV_CODEC_ID_MPEG2VIDEO:
2353         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2354             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2355         break;
2356     case AV_CODEC_ID_MPEG4:
2357         if (CONFIG_MPEG4_ENCODER)
2358             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2359         break;
2360     case AV_CODEC_ID_MSMPEG4V2:
2361     case AV_CODEC_ID_MSMPEG4V3:
2362     case AV_CODEC_ID_WMV1:
2363         if (CONFIG_MSMPEG4_ENCODER)
2364             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2365         break;
2366     case AV_CODEC_ID_WMV2:
2367         if (CONFIG_WMV2_ENCODER)
2368             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2369         break;
2370     case AV_CODEC_ID_H261:
2371         if (CONFIG_H261_ENCODER)
2372             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2373         break;
2374     case AV_CODEC_ID_H263:
2375     case AV_CODEC_ID_H263P:
2376     case AV_CODEC_ID_FLV1:
2377     case AV_CODEC_ID_RV10:
2378     case AV_CODEC_ID_RV20:
2379         if (CONFIG_H263_ENCODER)
2380             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2381         break;
2382     case AV_CODEC_ID_MJPEG:
2383     case AV_CODEC_ID_AMV:
2384         if (CONFIG_MJPEG_ENCODER)
2385             ff_mjpeg_encode_mb(s, s->block);
2386         break;
2387     default:
2388         av_assert1(0);
2389     }
2390 }
2391
2392 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2393 {
2394     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2395     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2396     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2397 }
2398
2399 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2400     int i;
2401
2402     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2403
2404     /* mpeg1 */
2405     d->mb_skip_run= s->mb_skip_run;
2406     for(i=0; i<3; i++)
2407         d->last_dc[i] = s->last_dc[i];
2408
2409     /* statistics */
2410     d->mv_bits= s->mv_bits;
2411     d->i_tex_bits= s->i_tex_bits;
2412     d->p_tex_bits= s->p_tex_bits;
2413     d->i_count= s->i_count;
2414     d->f_count= s->f_count;
2415     d->b_count= s->b_count;
2416     d->skip_count= s->skip_count;
2417     d->misc_bits= s->misc_bits;
2418     d->last_bits= 0;
2419
2420     d->mb_skipped= 0;
2421     d->qscale= s->qscale;
2422     d->dquant= s->dquant;
2423
2424     d->esc3_level_length= s->esc3_level_length;
2425 }
2426
2427 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2428     int i;
2429
2430     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2431     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2432
2433     /* mpeg1 */
2434     d->mb_skip_run= s->mb_skip_run;
2435     for(i=0; i<3; i++)
2436         d->last_dc[i] = s->last_dc[i];
2437
2438     /* statistics */
2439     d->mv_bits= s->mv_bits;
2440     d->i_tex_bits= s->i_tex_bits;
2441     d->p_tex_bits= s->p_tex_bits;
2442     d->i_count= s->i_count;
2443     d->f_count= s->f_count;
2444     d->b_count= s->b_count;
2445     d->skip_count= s->skip_count;
2446     d->misc_bits= s->misc_bits;
2447
2448     d->mb_intra= s->mb_intra;
2449     d->mb_skipped= s->mb_skipped;
2450     d->mv_type= s->mv_type;
2451     d->mv_dir= s->mv_dir;
2452     d->pb= s->pb;
2453     if(s->data_partitioning){
2454         d->pb2= s->pb2;
2455         d->tex_pb= s->tex_pb;
2456     }
2457     d->block= s->block;
2458     for(i=0; i<8; i++)
2459         d->block_last_index[i]= s->block_last_index[i];
2460     d->interlaced_dct= s->interlaced_dct;
2461     d->qscale= s->qscale;
2462
2463     d->esc3_level_length= s->esc3_level_length;
2464 }
2465
2466 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2467                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2468                            int *dmin, int *next_block, int motion_x, int motion_y)
2469 {
2470     int score;
2471     uint8_t *dest_backup[3];
2472
2473     copy_context_before_encode(s, backup, type);
2474
2475     s->block= s->blocks[*next_block];
2476     s->pb= pb[*next_block];
2477     if(s->data_partitioning){
2478         s->pb2   = pb2   [*next_block];
2479         s->tex_pb= tex_pb[*next_block];
2480     }
2481
2482     if(*next_block){
2483         memcpy(dest_backup, s->dest, sizeof(s->dest));
2484         s->dest[0] = s->rd_scratchpad;
2485         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2486         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2487         av_assert0(s->linesize >= 32); //FIXME
2488     }
2489
2490     encode_mb(s, motion_x, motion_y);
2491
2492     score= put_bits_count(&s->pb);
2493     if(s->data_partitioning){
2494         score+= put_bits_count(&s->pb2);
2495         score+= put_bits_count(&s->tex_pb);
2496     }
2497
2498     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2499         ff_mpv_decode_mb(s, s->block);
2500
2501         score *= s->lambda2;
2502         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2503     }
2504
2505     if(*next_block){
2506         memcpy(s->dest, dest_backup, sizeof(s->dest));
2507     }
2508
2509     if(score<*dmin){
2510         *dmin= score;
2511         *next_block^=1;
2512
2513         copy_context_after_encode(best, s, type);
2514     }
2515 }
2516
2517 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2518     uint32_t *sq = ff_square_tab + 256;
2519     int acc=0;
2520     int x,y;
2521
2522     if(w==16 && h==16)
2523         return s->mecc.sse[0](NULL, src1, src2, stride, 16);
2524     else if(w==8 && h==8)
2525         return s->mecc.sse[1](NULL, src1, src2, stride, 8);
2526
2527     for(y=0; y<h; y++){
2528         for(x=0; x<w; x++){
2529             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2530         }
2531     }
2532
2533     av_assert2(acc>=0);
2534
2535     return acc;
2536 }
2537
2538 static int sse_mb(MpegEncContext *s){
2539     int w= 16;
2540     int h= 16;
2541
2542     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2543     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2544
2545     if(w==16 && h==16)
2546       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2547         return s->mecc.nsse[0](s, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2548                s->mecc.nsse[1](s, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2549                s->mecc.nsse[1](s, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2550       }else{
2551         return s->mecc.sse[0](NULL, s->new_picture.f->data[0] + s->mb_x * 16 + s->mb_y * s->linesize   * 16, s->dest[0], s->linesize,   16) +
2552                s->mecc.sse[1](NULL, s->new_picture.f->data[1] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[1], s->uvlinesize,  8) +
2553                s->mecc.sse[1](NULL, s->new_picture.f->data[2] + s->mb_x *  8 + s->mb_y * s->uvlinesize *  8, s->dest[2], s->uvlinesize,  8);
2554       }
2555     else
2556         return  sse(s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2557                +sse(s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2558                +sse(s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2559 }
2560
2561 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2562     MpegEncContext *s= *(void**)arg;
2563
2564
2565     s->me.pre_pass=1;
2566     s->me.dia_size= s->avctx->pre_dia_size;
2567     s->first_slice_line=1;
2568     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2569         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2570             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2571         }
2572         s->first_slice_line=0;
2573     }
2574
2575     s->me.pre_pass=0;
2576
2577     return 0;
2578 }
2579
2580 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2581     MpegEncContext *s= *(void**)arg;
2582
2583     ff_check_alignment();
2584
2585     s->me.dia_size= s->avctx->dia_size;
2586     s->first_slice_line=1;
2587     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2588         s->mb_x=0; //for block init below
2589         ff_init_block_index(s);
2590         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2591             s->block_index[0]+=2;
2592             s->block_index[1]+=2;
2593             s->block_index[2]+=2;
2594             s->block_index[3]+=2;
2595
2596             /* compute motion vector & mb_type and store in context */
2597             if(s->pict_type==AV_PICTURE_TYPE_B)
2598                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2599             else
2600                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2601         }
2602         s->first_slice_line=0;
2603     }
2604     return 0;
2605 }
2606
2607 static int mb_var_thread(AVCodecContext *c, void *arg){
2608     MpegEncContext *s= *(void**)arg;
2609     int mb_x, mb_y;
2610
2611     ff_check_alignment();
2612
2613     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2614         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2615             int xx = mb_x * 16;
2616             int yy = mb_y * 16;
2617             uint8_t *pix = s->new_picture.f->data[0] + (yy * s->linesize) + xx;
2618             int varc;
2619             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2620
2621             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2622                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2623
2624             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2625             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2626             s->me.mb_var_sum_temp    += varc;
2627         }
2628     }
2629     return 0;
2630 }
2631
2632 static void write_slice_end(MpegEncContext *s){
2633     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2634         if(s->partitioned_frame){
2635             ff_mpeg4_merge_partitions(s);
2636         }
2637
2638         ff_mpeg4_stuffing(&s->pb);
2639     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2640         ff_mjpeg_encode_stuffing(s);
2641     }
2642
2643     avpriv_align_put_bits(&s->pb);
2644     flush_put_bits(&s->pb);
2645
2646     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2647         s->misc_bits+= get_bits_diff(s);
2648 }
2649
2650 static void write_mb_info(MpegEncContext *s)
2651 {
2652     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2653     int offset = put_bits_count(&s->pb);
2654     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2655     int gobn = s->mb_y / s->gob_index;
2656     int pred_x, pred_y;
2657     if (CONFIG_H263_ENCODER)
2658         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2659     bytestream_put_le32(&ptr, offset);
2660     bytestream_put_byte(&ptr, s->qscale);
2661     bytestream_put_byte(&ptr, gobn);
2662     bytestream_put_le16(&ptr, mba);
2663     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2664     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2665     /* 4MV not implemented */
2666     bytestream_put_byte(&ptr, 0); /* hmv2 */
2667     bytestream_put_byte(&ptr, 0); /* vmv2 */
2668 }
2669
2670 static void update_mb_info(MpegEncContext *s, int startcode)
2671 {
2672     if (!s->mb_info)
2673         return;
2674     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2675         s->mb_info_size += 12;
2676         s->prev_mb_info = s->last_mb_info;
2677     }
2678     if (startcode) {
2679         s->prev_mb_info = put_bits_count(&s->pb)/8;
2680         /* This might have incremented mb_info_size above, and we return without
2681          * actually writing any info into that slot yet. But in that case,
2682          * this will be called again at the start of the after writing the
2683          * start code, actually writing the mb info. */
2684         return;
2685     }
2686
2687     s->last_mb_info = put_bits_count(&s->pb)/8;
2688     if (!s->mb_info_size)
2689         s->mb_info_size += 12;
2690     write_mb_info(s);
2691 }
2692
2693 static int encode_thread(AVCodecContext *c, void *arg){
2694     MpegEncContext *s= *(void**)arg;
2695     int mb_x, mb_y, pdif = 0;
2696     int chr_h= 16>>s->chroma_y_shift;
2697     int i, j;
2698     MpegEncContext best_s, backup_s;
2699     uint8_t bit_buf[2][MAX_MB_BYTES];
2700     uint8_t bit_buf2[2][MAX_MB_BYTES];
2701     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2702     PutBitContext pb[2], pb2[2], tex_pb[2];
2703
2704     ff_check_alignment();
2705
2706     for(i=0; i<2; i++){
2707         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2708         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2709         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2710     }
2711
2712     s->last_bits= put_bits_count(&s->pb);
2713     s->mv_bits=0;
2714     s->misc_bits=0;
2715     s->i_tex_bits=0;
2716     s->p_tex_bits=0;
2717     s->i_count=0;
2718     s->f_count=0;
2719     s->b_count=0;
2720     s->skip_count=0;
2721
2722     for(i=0; i<3; i++){
2723         /* init last dc values */
2724         /* note: quant matrix value (8) is implied here */
2725         s->last_dc[i] = 128 << s->intra_dc_precision;
2726
2727         s->current_picture.error[i] = 0;
2728     }
2729     if(s->codec_id==AV_CODEC_ID_AMV){
2730         s->last_dc[0] = 128*8/13;
2731         s->last_dc[1] = 128*8/14;
2732         s->last_dc[2] = 128*8/14;
2733     }
2734     s->mb_skip_run = 0;
2735     memset(s->last_mv, 0, sizeof(s->last_mv));
2736
2737     s->last_mv_dir = 0;
2738
2739     switch(s->codec_id){
2740     case AV_CODEC_ID_H263:
2741     case AV_CODEC_ID_H263P:
2742     case AV_CODEC_ID_FLV1:
2743         if (CONFIG_H263_ENCODER)
2744             s->gob_index = ff_h263_get_gob_height(s);
2745         break;
2746     case AV_CODEC_ID_MPEG4:
2747         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2748             ff_mpeg4_init_partitions(s);
2749         break;
2750     }
2751
2752     s->resync_mb_x=0;
2753     s->resync_mb_y=0;
2754     s->first_slice_line = 1;
2755     s->ptr_lastgob = s->pb.buf;
2756     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2757         s->mb_x=0;
2758         s->mb_y= mb_y;
2759
2760         ff_set_qscale(s, s->qscale);
2761         ff_init_block_index(s);
2762
2763         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2764             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2765             int mb_type= s->mb_type[xy];
2766 //            int d;
2767             int dmin= INT_MAX;
2768             int dir;
2769
2770             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2771                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2772                 return -1;
2773             }
2774             if(s->data_partitioning){
2775                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2776                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2777                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2778                     return -1;
2779                 }
2780             }
2781
2782             s->mb_x = mb_x;
2783             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2784             ff_update_block_index(s);
2785
2786             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2787                 ff_h261_reorder_mb_index(s);
2788                 xy= s->mb_y*s->mb_stride + s->mb_x;
2789                 mb_type= s->mb_type[xy];
2790             }
2791
2792             /* write gob / video packet header  */
2793             if(s->rtp_mode){
2794                 int current_packet_size, is_gob_start;
2795
2796                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2797
2798                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2799
2800                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2801
2802                 switch(s->codec_id){
2803                 case AV_CODEC_ID_H261:
2804                     is_gob_start=0;//FIXME
2805                     break;
2806                 case AV_CODEC_ID_H263:
2807                 case AV_CODEC_ID_H263P:
2808                     if(!s->h263_slice_structured)
2809                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2810                     break;
2811                 case AV_CODEC_ID_MPEG2VIDEO:
2812                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2813                 case AV_CODEC_ID_MPEG1VIDEO:
2814                     if(s->mb_skip_run) is_gob_start=0;
2815                     break;
2816                 case AV_CODEC_ID_MJPEG:
2817                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2818                     break;
2819                 }
2820
2821                 if(is_gob_start){
2822                     if(s->start_mb_y != mb_y || mb_x!=0){
2823                         write_slice_end(s);
2824
2825                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2826                             ff_mpeg4_init_partitions(s);
2827                         }
2828                     }
2829
2830                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2831                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2832
2833                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
2834                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2835                         int d = 100 / s->error_rate;
2836                         if(r % d == 0){
2837                             current_packet_size=0;
2838                             s->pb.buf_ptr= s->ptr_lastgob;
2839                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2840                         }
2841                     }
2842
2843                     if (s->avctx->rtp_callback){
2844                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2845                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2846                     }
2847                     update_mb_info(s, 1);
2848
2849                     switch(s->codec_id){
2850                     case AV_CODEC_ID_MPEG4:
2851                         if (CONFIG_MPEG4_ENCODER) {
2852                             ff_mpeg4_encode_video_packet_header(s);
2853                             ff_mpeg4_clean_buffers(s);
2854                         }
2855                     break;
2856                     case AV_CODEC_ID_MPEG1VIDEO:
2857                     case AV_CODEC_ID_MPEG2VIDEO:
2858                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2859                             ff_mpeg1_encode_slice_header(s);
2860                             ff_mpeg1_clean_buffers(s);
2861                         }
2862                     break;
2863                     case AV_CODEC_ID_H263:
2864                     case AV_CODEC_ID_H263P:
2865                         if (CONFIG_H263_ENCODER)
2866                             ff_h263_encode_gob_header(s, mb_y);
2867                     break;
2868                     }
2869
2870                     if(s->flags&CODEC_FLAG_PASS1){
2871                         int bits= put_bits_count(&s->pb);
2872                         s->misc_bits+= bits - s->last_bits;
2873                         s->last_bits= bits;
2874                     }
2875
2876                     s->ptr_lastgob += current_packet_size;
2877                     s->first_slice_line=1;
2878                     s->resync_mb_x=mb_x;
2879                     s->resync_mb_y=mb_y;
2880                 }
2881             }
2882
2883             if(  (s->resync_mb_x   == s->mb_x)
2884                && s->resync_mb_y+1 == s->mb_y){
2885                 s->first_slice_line=0;
2886             }
2887
2888             s->mb_skipped=0;
2889             s->dquant=0; //only for QP_RD
2890
2891             update_mb_info(s, 0);
2892
2893             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2894                 int next_block=0;
2895                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2896
2897                 copy_context_before_encode(&backup_s, s, -1);
2898                 backup_s.pb= s->pb;
2899                 best_s.data_partitioning= s->data_partitioning;
2900                 best_s.partitioned_frame= s->partitioned_frame;
2901                 if(s->data_partitioning){
2902                     backup_s.pb2= s->pb2;
2903                     backup_s.tex_pb= s->tex_pb;
2904                 }
2905
2906                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2907                     s->mv_dir = MV_DIR_FORWARD;
2908                     s->mv_type = MV_TYPE_16X16;
2909                     s->mb_intra= 0;
2910                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2911                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2912                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2913                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2914                 }
2915                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2916                     s->mv_dir = MV_DIR_FORWARD;
2917                     s->mv_type = MV_TYPE_FIELD;
2918                     s->mb_intra= 0;
2919                     for(i=0; i<2; i++){
2920                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2921                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2922                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2923                     }
2924                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2925                                  &dmin, &next_block, 0, 0);
2926                 }
2927                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2928                     s->mv_dir = MV_DIR_FORWARD;
2929                     s->mv_type = MV_TYPE_16X16;
2930                     s->mb_intra= 0;
2931                     s->mv[0][0][0] = 0;
2932                     s->mv[0][0][1] = 0;
2933                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2934                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2935                 }
2936                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2937                     s->mv_dir = MV_DIR_FORWARD;
2938                     s->mv_type = MV_TYPE_8X8;
2939                     s->mb_intra= 0;
2940                     for(i=0; i<4; i++){
2941                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2942                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2943                     }
2944                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2945                                  &dmin, &next_block, 0, 0);
2946                 }
2947                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2948                     s->mv_dir = MV_DIR_FORWARD;
2949                     s->mv_type = MV_TYPE_16X16;
2950                     s->mb_intra= 0;
2951                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2952                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2953                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2954                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2955                 }
2956                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2957                     s->mv_dir = MV_DIR_BACKWARD;
2958                     s->mv_type = MV_TYPE_16X16;
2959                     s->mb_intra= 0;
2960                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2961                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2962                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2963                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2964                 }
2965                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2966                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2967                     s->mv_type = MV_TYPE_16X16;
2968                     s->mb_intra= 0;
2969                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2970                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2971                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2972                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2973                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2974                                  &dmin, &next_block, 0, 0);
2975                 }
2976                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2977                     s->mv_dir = MV_DIR_FORWARD;
2978                     s->mv_type = MV_TYPE_FIELD;
2979                     s->mb_intra= 0;
2980                     for(i=0; i<2; i++){
2981                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2982                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2983                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2984                     }
2985                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2986                                  &dmin, &next_block, 0, 0);
2987                 }
2988                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2989                     s->mv_dir = MV_DIR_BACKWARD;
2990                     s->mv_type = MV_TYPE_FIELD;
2991                     s->mb_intra= 0;
2992                     for(i=0; i<2; i++){
2993                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2994                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2995                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2996                     }
2997                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2998                                  &dmin, &next_block, 0, 0);
2999                 }
3000                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3001                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3002                     s->mv_type = MV_TYPE_FIELD;
3003                     s->mb_intra= 0;
3004                     for(dir=0; dir<2; dir++){
3005                         for(i=0; i<2; i++){
3006                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3007                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3008                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3009                         }
3010                     }
3011                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
3012                                  &dmin, &next_block, 0, 0);
3013                 }
3014                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3015                     s->mv_dir = 0;
3016                     s->mv_type = MV_TYPE_16X16;
3017                     s->mb_intra= 1;
3018                     s->mv[0][0][0] = 0;
3019                     s->mv[0][0][1] = 0;
3020                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
3021                                  &dmin, &next_block, 0, 0);
3022                     if(s->h263_pred || s->h263_aic){
3023                         if(best_s.mb_intra)
3024                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3025                         else
3026                             ff_clean_intra_table_entries(s); //old mode?
3027                     }
3028                 }
3029
3030                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3031                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3032                         const int last_qp= backup_s.qscale;
3033                         int qpi, qp, dc[6];
3034                         int16_t ac[6][16];
3035                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3036                         static const int dquant_tab[4]={-1,1,-2,2};
3037                         int storecoefs = s->mb_intra && s->dc_val[0];
3038
3039                         av_assert2(backup_s.dquant == 0);
3040
3041                         //FIXME intra
3042                         s->mv_dir= best_s.mv_dir;
3043                         s->mv_type = MV_TYPE_16X16;
3044                         s->mb_intra= best_s.mb_intra;
3045                         s->mv[0][0][0] = best_s.mv[0][0][0];
3046                         s->mv[0][0][1] = best_s.mv[0][0][1];
3047                         s->mv[1][0][0] = best_s.mv[1][0][0];
3048                         s->mv[1][0][1] = best_s.mv[1][0][1];
3049
3050                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3051                         for(; qpi<4; qpi++){
3052                             int dquant= dquant_tab[qpi];
3053                             qp= last_qp + dquant;
3054                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3055                                 continue;
3056                             backup_s.dquant= dquant;
3057                             if(storecoefs){
3058                                 for(i=0; i<6; i++){
3059                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3060                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3061                                 }
3062                             }
3063
3064                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3065                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3066                             if(best_s.qscale != qp){
3067                                 if(storecoefs){
3068                                     for(i=0; i<6; i++){
3069                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3070                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3071                                     }
3072                                 }
3073                             }
3074                         }
3075                     }
3076                 }
3077                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3078                     int mx= s->b_direct_mv_table[xy][0];
3079                     int my= s->b_direct_mv_table[xy][1];
3080
3081                     backup_s.dquant = 0;
3082                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3083                     s->mb_intra= 0;
3084                     ff_mpeg4_set_direct_mv(s, mx, my);
3085                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3086                                  &dmin, &next_block, mx, my);
3087                 }
3088                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3089                     backup_s.dquant = 0;
3090                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3091                     s->mb_intra= 0;
3092                     ff_mpeg4_set_direct_mv(s, 0, 0);
3093                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
3094                                  &dmin, &next_block, 0, 0);
3095                 }
3096                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3097                     int coded=0;
3098                     for(i=0; i<6; i++)
3099                         coded |= s->block_last_index[i];
3100                     if(coded){
3101                         int mx,my;
3102                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3103                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3104                             mx=my=0; //FIXME find the one we actually used
3105                             ff_mpeg4_set_direct_mv(s, mx, my);
3106                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3107                             mx= s->mv[1][0][0];
3108                             my= s->mv[1][0][1];
3109                         }else{
3110                             mx= s->mv[0][0][0];
3111                             my= s->mv[0][0][1];
3112                         }
3113
3114                         s->mv_dir= best_s.mv_dir;
3115                         s->mv_type = best_s.mv_type;
3116                         s->mb_intra= 0;
3117 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3118                         s->mv[0][0][1] = best_s.mv[0][0][1];
3119                         s->mv[1][0][0] = best_s.mv[1][0][0];
3120                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3121                         backup_s.dquant= 0;
3122                         s->skipdct=1;
3123                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
3124                                         &dmin, &next_block, mx, my);
3125                         s->skipdct=0;
3126                     }
3127                 }
3128
3129                 s->current_picture.qscale_table[xy] = best_s.qscale;
3130
3131                 copy_context_after_encode(s, &best_s, -1);
3132
3133                 pb_bits_count= put_bits_count(&s->pb);
3134                 flush_put_bits(&s->pb);
3135                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3136                 s->pb= backup_s.pb;
3137
3138                 if(s->data_partitioning){
3139                     pb2_bits_count= put_bits_count(&s->pb2);
3140                     flush_put_bits(&s->pb2);
3141                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3142                     s->pb2= backup_s.pb2;
3143
3144                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3145                     flush_put_bits(&s->tex_pb);
3146                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3147                     s->tex_pb= backup_s.tex_pb;
3148                 }
3149                 s->last_bits= put_bits_count(&s->pb);
3150
3151                 if (CONFIG_H263_ENCODER &&
3152                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3153                     ff_h263_update_motion_val(s);
3154
3155                 if(next_block==0){ //FIXME 16 vs linesize16
3156                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
3157                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3158                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3159                 }
3160
3161                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3162                     ff_mpv_decode_mb(s, s->block);
3163             } else {
3164                 int motion_x = 0, motion_y = 0;
3165                 s->mv_type=MV_TYPE_16X16;
3166                 // only one MB-Type possible
3167
3168                 switch(mb_type){
3169                 case CANDIDATE_MB_TYPE_INTRA:
3170                     s->mv_dir = 0;
3171                     s->mb_intra= 1;
3172                     motion_x= s->mv[0][0][0] = 0;
3173                     motion_y= s->mv[0][0][1] = 0;
3174                     break;
3175                 case CANDIDATE_MB_TYPE_INTER:
3176                     s->mv_dir = MV_DIR_FORWARD;
3177                     s->mb_intra= 0;
3178                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3179                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3180                     break;
3181                 case CANDIDATE_MB_TYPE_INTER_I:
3182                     s->mv_dir = MV_DIR_FORWARD;
3183                     s->mv_type = MV_TYPE_FIELD;
3184                     s->mb_intra= 0;
3185                     for(i=0; i<2; i++){
3186                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3187                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3188                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3189                     }
3190                     break;
3191                 case CANDIDATE_MB_TYPE_INTER4V:
3192                     s->mv_dir = MV_DIR_FORWARD;
3193                     s->mv_type = MV_TYPE_8X8;
3194                     s->mb_intra= 0;
3195                     for(i=0; i<4; i++){
3196                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
3197                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
3198                     }
3199                     break;
3200                 case CANDIDATE_MB_TYPE_DIRECT:
3201                     if (CONFIG_MPEG4_ENCODER) {
3202                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3203                         s->mb_intra= 0;
3204                         motion_x=s->b_direct_mv_table[xy][0];
3205                         motion_y=s->b_direct_mv_table[xy][1];
3206                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3207                     }
3208                     break;
3209                 case CANDIDATE_MB_TYPE_DIRECT0:
3210                     if (CONFIG_MPEG4_ENCODER) {
3211                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3212                         s->mb_intra= 0;
3213                         ff_mpeg4_set_direct_mv(s, 0, 0);
3214                     }
3215                     break;
3216                 case CANDIDATE_MB_TYPE_BIDIR:
3217                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3218                     s->mb_intra= 0;
3219                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3220                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3221                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3222                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3223                     break;
3224                 case CANDIDATE_MB_TYPE_BACKWARD:
3225                     s->mv_dir = MV_DIR_BACKWARD;
3226                     s->mb_intra= 0;
3227                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3228                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3229                     break;
3230                 case CANDIDATE_MB_TYPE_FORWARD:
3231                     s->mv_dir = MV_DIR_FORWARD;
3232                     s->mb_intra= 0;
3233                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3234                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3235                     break;
3236                 case CANDIDATE_MB_TYPE_FORWARD_I:
3237                     s->mv_dir = MV_DIR_FORWARD;
3238                     s->mv_type = MV_TYPE_FIELD;
3239                     s->mb_intra= 0;
3240                     for(i=0; i<2; i++){
3241                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3242                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3243                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3244                     }
3245                     break;
3246                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3247                     s->mv_dir = MV_DIR_BACKWARD;
3248                     s->mv_type = MV_TYPE_FIELD;
3249                     s->mb_intra= 0;
3250                     for(i=0; i<2; i++){
3251                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3252                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3253                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3254                     }
3255                     break;
3256                 case CANDIDATE_MB_TYPE_BIDIR_I:
3257                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3258                     s->mv_type = MV_TYPE_FIELD;
3259                     s->mb_intra= 0;
3260                     for(dir=0; dir<2; dir++){
3261                         for(i=0; i<2; i++){
3262                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3263                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3264                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3265                         }
3266                     }
3267                     break;
3268                 default:
3269                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3270                 }
3271
3272                 encode_mb(s, motion_x, motion_y);
3273
3274                 // RAL: Update last macroblock type
3275                 s->last_mv_dir = s->mv_dir;
3276
3277                 if (CONFIG_H263_ENCODER &&
3278                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3279                     ff_h263_update_motion_val(s);
3280
3281                 ff_mpv_decode_mb(s, s->block);
3282             }
3283
3284             /* clean the MV table in IPS frames for direct mode in B frames */
3285             if(s->mb_intra /* && I,P,S_TYPE */){
3286                 s->p_mv_table[xy][0]=0;
3287                 s->p_mv_table[xy][1]=0;
3288             }
3289
3290             if(s->flags&CODEC_FLAG_PSNR){
3291                 int w= 16;
3292                 int h= 16;
3293
3294                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3295                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3296
3297                 s->current_picture.error[0] += sse(
3298                     s, s->new_picture.f->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3299                     s->dest[0], w, h, s->linesize);
3300                 s->current_picture.error[1] += sse(
3301                     s, s->new_picture.f->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3302                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3303                 s->current_picture.error[2] += sse(
3304                     s, s->new_picture.f->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3305                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3306             }
3307             if(s->loop_filter){
3308                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3309                     ff_h263_loop_filter(s);
3310             }
3311             av_dlog(s->avctx, "MB %d %d bits\n",
3312                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3313         }
3314     }
3315
3316     //not beautiful here but we must write it before flushing so it has to be here
3317     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3318         ff_msmpeg4_encode_ext_header(s);
3319
3320     write_slice_end(s);
3321
3322     /* Send the last GOB if RTP */
3323     if (s->avctx->rtp_callback) {
3324         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3325         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3326         /* Call the RTP callback to send the last GOB */
3327         emms_c();
3328         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3329     }
3330
3331     return 0;
3332 }
3333
3334 #define MERGE(field) dst->field += src->field; src->field=0
3335 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3336     MERGE(me.scene_change_score);
3337     MERGE(me.mc_mb_var_sum_temp);
3338     MERGE(me.mb_var_sum_temp);
3339 }
3340
3341 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3342     int i;
3343
3344     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3345     MERGE(dct_count[1]);
3346     MERGE(mv_bits);
3347     MERGE(i_tex_bits);
3348     MERGE(p_tex_bits);
3349     MERGE(i_count);
3350     MERGE(f_count);
3351     MERGE(b_count);
3352     MERGE(skip_count);
3353     MERGE(misc_bits);
3354     MERGE(er.error_count);
3355     MERGE(padding_bug_score);
3356     MERGE(current_picture.error[0]);
3357     MERGE(current_picture.error[1]);
3358     MERGE(current_picture.error[2]);
3359
3360     if(dst->avctx->noise_reduction){
3361         for(i=0; i<64; i++){
3362             MERGE(dct_error_sum[0][i]);
3363             MERGE(dct_error_sum[1][i]);
3364         }
3365     }
3366
3367     assert(put_bits_count(&src->pb) % 8 ==0);
3368     assert(put_bits_count(&dst->pb) % 8 ==0);
3369     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3370     flush_put_bits(&dst->pb);
3371 }
3372
3373 static int estimate_qp(MpegEncContext *s, int dry_run){
3374     if (s->next_lambda){
3375         s->current_picture_ptr->f->quality =
3376         s->current_picture.f->quality = s->next_lambda;
3377         if(!dry_run) s->next_lambda= 0;
3378     } else if (!s->fixed_qscale) {
3379         s->current_picture_ptr->f->quality =
3380         s->current_picture.f->quality = ff_rate_estimate_qscale(s, dry_run);
3381         if (s->current_picture.f->quality < 0)
3382             return -1;
3383     }
3384
3385     if(s->adaptive_quant){
3386         switch(s->codec_id){
3387         case AV_CODEC_ID_MPEG4:
3388             if (CONFIG_MPEG4_ENCODER)
3389                 ff_clean_mpeg4_qscales(s);
3390             break;
3391         case AV_CODEC_ID_H263:
3392         case AV_CODEC_ID_H263P:
3393         case AV_CODEC_ID_FLV1:
3394             if (CONFIG_H263_ENCODER)
3395                 ff_clean_h263_qscales(s);
3396             break;
3397         default:
3398             ff_init_qscale_tab(s);
3399         }
3400
3401         s->lambda= s->lambda_table[0];
3402         //FIXME broken
3403     }else
3404         s->lambda = s->current_picture.f->quality;
3405     update_qscale(s);
3406     return 0;
3407 }
3408
3409 /* must be called before writing the header */
3410 static void set_frame_distances(MpegEncContext * s){
3411     av_assert1(s->current_picture_ptr->f->pts != AV_NOPTS_VALUE);
3412     s->time = s->current_picture_ptr->f->pts * s->avctx->time_base.num;
3413
3414     if(s->pict_type==AV_PICTURE_TYPE_B){
3415         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3416         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3417     }else{
3418         s->pp_time= s->time - s->last_non_b_time;
3419         s->last_non_b_time= s->time;
3420         assert(s->picture_number==0 || s->pp_time > 0);
3421     }
3422 }
3423
3424 static int encode_picture(MpegEncContext *s, int picture_number)
3425 {
3426     int i, ret;
3427     int bits;
3428     int context_count = s->slice_context_count;
3429
3430     s->picture_number = picture_number;
3431
3432     /* Reset the average MB variance */
3433     s->me.mb_var_sum_temp    =
3434     s->me.mc_mb_var_sum_temp = 0;
3435
3436     /* we need to initialize some time vars before we can encode b-frames */
3437     // RAL: Condition added for MPEG1VIDEO
3438     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3439         set_frame_distances(s);
3440     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3441         ff_set_mpeg4_time(s);
3442
3443     s->me.scene_change_score=0;
3444
3445 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3446
3447     if(s->pict_type==AV_PICTURE_TYPE_I){
3448         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3449         else                        s->no_rounding=0;
3450     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3451         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3452             s->no_rounding ^= 1;
3453     }
3454
3455     if(s->flags & CODEC_FLAG_PASS2){
3456         if (estimate_qp(s,1) < 0)
3457             return -1;
3458         ff_get_2pass_fcode(s);
3459     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3460         if(s->pict_type==AV_PICTURE_TYPE_B)
3461             s->lambda= s->last_lambda_for[s->pict_type];
3462         else
3463             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3464         update_qscale(s);
3465     }
3466
3467     if(s->codec_id != AV_CODEC_ID_AMV && s->codec_id != AV_CODEC_ID_MJPEG){
3468         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3469         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3470         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3471         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3472     }
3473
3474     s->mb_intra=0; //for the rate distortion & bit compare functions
3475     for(i=1; i<context_count; i++){
3476         ret = ff_update_duplicate_context(s->thread_context[i], s);
3477         if (ret < 0)
3478             return ret;
3479     }
3480
3481     if(ff_init_me(s)<0)
3482         return -1;
3483
3484     /* Estimate motion for every MB */
3485     if(s->pict_type != AV_PICTURE_TYPE_I){
3486         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3487         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3488         if (s->pict_type != AV_PICTURE_TYPE_B) {
3489             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3490                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3491             }
3492         }
3493
3494         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3495     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3496         /* I-Frame */
3497         for(i=0; i<s->mb_stride*s->mb_height; i++)
3498             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3499
3500         if(!s->fixed_qscale){
3501             /* finding spatial complexity for I-frame rate control */
3502             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3503         }
3504     }
3505     for(i=1; i<context_count; i++){
3506         merge_context_after_me(s, s->thread_context[i]);
3507     }
3508     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3509     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3510     emms_c();
3511
3512     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3513         s->pict_type= AV_PICTURE_TYPE_I;
3514         for(i=0; i<s->mb_stride*s->mb_height; i++)
3515             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3516         if(s->msmpeg4_version >= 3)
3517             s->no_rounding=1;
3518         av_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3519                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3520     }
3521
3522     if(!s->umvplus){
3523         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3524             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3525
3526             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3527                 int a,b;
3528                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3529                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3530                 s->f_code= FFMAX3(s->f_code, a, b);
3531             }
3532
3533             ff_fix_long_p_mvs(s);
3534             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3535             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3536                 int j;
3537                 for(i=0; i<2; i++){
3538                     for(j=0; j<2; j++)
3539                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3540                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3541                 }
3542             }
3543         }
3544
3545         if(s->pict_type==AV_PICTURE_TYPE_B){
3546             int a, b;
3547
3548             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3549             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3550             s->f_code = FFMAX(a, b);
3551
3552             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3553             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3554             s->b_code = FFMAX(a, b);
3555
3556             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3557             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3558             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3559             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3560             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3561                 int dir, j;
3562                 for(dir=0; dir<2; dir++){
3563                     for(i=0; i<2; i++){
3564                         for(j=0; j<2; j++){
3565                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3566                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3567                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3568                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3569                         }
3570                     }
3571                 }
3572             }
3573         }
3574     }
3575
3576     if (estimate_qp(s, 0) < 0)
3577         return -1;
3578
3579     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3580         s->qscale= 3; //reduce clipping problems
3581
3582     if (s->out_format == FMT_MJPEG) {
3583         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3584         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3585
3586         if (s->avctx->intra_matrix) {
3587             chroma_matrix =
3588             luma_matrix = s->avctx->intra_matrix;
3589         }
3590         if (s->avctx->chroma_intra_matrix)
3591             chroma_matrix = s->avctx->chroma_intra_matrix;
3592
3593         /* for mjpeg, we do include qscale in the matrix */
3594         for(i=1;i<64;i++){
3595             int j = s->idsp.idct_permutation[i];
3596
3597             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3598             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3599         }
3600         s->y_dc_scale_table=
3601         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3602         s->chroma_intra_matrix[0] =
3603         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3604         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3605                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3606         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3607                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3608         s->qscale= 8;
3609     }
3610     if(s->codec_id == AV_CODEC_ID_AMV){
3611         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3612         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3613         for(i=1;i<64;i++){
3614             int j= s->idsp.idct_permutation[ff_zigzag_direct[i]];
3615
3616             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3617             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3618         }
3619         s->y_dc_scale_table= y;
3620         s->c_dc_scale_table= c;
3621         s->intra_matrix[0] = 13;
3622         s->chroma_intra_matrix[0] = 14;
3623         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3624                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3625         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3626                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3627         s->qscale= 8;
3628     }
3629
3630     //FIXME var duplication
3631     s->current_picture_ptr->f->key_frame =
3632     s->current_picture.f->key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3633     s->current_picture_ptr->f->pict_type =
3634     s->current_picture.f->pict_type = s->pict_type;
3635
3636     if (s->current_picture.f->key_frame)
3637         s->picture_in_gop_number=0;
3638
3639     s->mb_x = s->mb_y = 0;
3640     s->last_bits= put_bits_count(&s->pb);
3641     switch(s->out_format) {
3642     case FMT_MJPEG:
3643         if (CONFIG_MJPEG_ENCODER)
3644             ff_mjpeg_encode_picture_header(s->avctx, &s->pb, &s->intra_scantable,
3645                                            s->intra_matrix, s->chroma_intra_matrix);
3646         break;
3647     case FMT_H261:
3648         if (CONFIG_H261_ENCODER)
3649             ff_h261_encode_picture_header(s, picture_number);
3650         break;
3651     case FMT_H263:
3652         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3653             ff_wmv2_encode_picture_header(s, picture_number);
3654         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3655             ff_msmpeg4_encode_picture_header(s, picture_number);
3656         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3657             ff_mpeg4_encode_picture_header(s, picture_number);
3658         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3659             ff_rv10_encode_picture_header(s, picture_number);
3660         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3661             ff_rv20_encode_picture_header(s, picture_number);
3662         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3663             ff_flv_encode_picture_header(s, picture_number);
3664         else if (CONFIG_H263_ENCODER)
3665             ff_h263_encode_picture_header(s, picture_number);
3666         break;
3667     case FMT_MPEG1:
3668         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3669             ff_mpeg1_encode_picture_header(s, picture_number);
3670         break;
3671     default:
3672         av_assert0(0);
3673     }
3674     bits= put_bits_count(&s->pb);
3675     s->header_bits= bits - s->last_bits;
3676
3677     for(i=1; i<context_count; i++){
3678         update_duplicate_context_after_me(s->thread_context[i], s);
3679     }
3680     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3681     for(i=1; i<context_count; i++){
3682         merge_context_after_encode(s, s->thread_context[i]);
3683     }
3684     emms_c();
3685     return 0;
3686 }
3687
3688 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3689     const int intra= s->mb_intra;
3690     int i;
3691
3692     s->dct_count[intra]++;
3693
3694     for(i=0; i<64; i++){
3695         int level= block[i];
3696
3697         if(level){
3698             if(level>0){
3699                 s->dct_error_sum[intra][i] += level;
3700                 level -= s->dct_offset[intra][i];
3701                 if(level<0) level=0;
3702             }else{
3703                 s->dct_error_sum[intra][i] -= level;
3704                 level += s->dct_offset[intra][i];
3705                 if(level>0) level=0;
3706             }
3707             block[i]= level;
3708         }
3709     }
3710 }
3711
3712 static int dct_quantize_trellis_c(MpegEncContext *s,
3713                                   int16_t *block, int n,
3714                                   int qscale, int *overflow){
3715     const int *qmat;
3716     const uint8_t *scantable= s->intra_scantable.scantable;
3717     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3718     int max=0;
3719     unsigned int threshold1, threshold2;
3720     int bias=0;
3721     int run_tab[65];
3722     int level_tab[65];
3723     int score_tab[65];
3724     int survivor[65];
3725     int survivor_count;
3726     int last_run=0;
3727     int last_level=0;
3728     int last_score= 0;
3729     int last_i;
3730     int coeff[2][64];
3731     int coeff_count[64];
3732     int qmul, qadd, start_i, last_non_zero, i, dc;
3733     const int esc_length= s->ac_esc_length;
3734     uint8_t * length;
3735     uint8_t * last_length;
3736     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3737
3738     s->fdsp.fdct(block);
3739
3740     if(s->dct_error_sum)
3741         s->denoise_dct(s, block);
3742     qmul= qscale*16;
3743     qadd= ((qscale-1)|1)*8;
3744
3745     if (s->mb_intra) {
3746         int q;
3747         if (!s->h263_aic) {
3748             if (n < 4)
3749                 q = s->y_dc_scale;
3750             else
3751                 q = s->c_dc_scale;
3752             q = q << 3;
3753         } else{
3754             /* For AIC we skip quant/dequant of INTRADC */
3755             q = 1 << 3;
3756             qadd=0;
3757         }
3758
3759         /* note: block[0] is assumed to be positive */
3760         block[0] = (block[0] + (q >> 1)) / q;
3761         start_i = 1;
3762         last_non_zero = 0;
3763         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3764         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3765             bias= 1<<(QMAT_SHIFT-1);
3766         length     = s->intra_ac_vlc_length;
3767         last_length= s->intra_ac_vlc_last_length;
3768     } else {
3769         start_i = 0;
3770         last_non_zero = -1;
3771         qmat = s->q_inter_matrix[qscale];
3772         length     = s->inter_ac_vlc_length;
3773         last_length= s->inter_ac_vlc_last_length;
3774     }
3775     last_i= start_i;
3776
3777     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3778     threshold2= (threshold1<<1);
3779
3780     for(i=63; i>=start_i; i--) {
3781         const int j = scantable[i];
3782         int level = block[j] * qmat[j];
3783
3784         if(((unsigned)(level+threshold1))>threshold2){
3785             last_non_zero = i;
3786             break;
3787         }
3788     }
3789
3790     for(i=start_i; i<=last_non_zero; i++) {
3791         const int j = scantable[i];
3792         int level = block[j] * qmat[j];
3793
3794 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3795 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3796         if(((unsigned)(level+threshold1))>threshold2){
3797             if(level>0){
3798                 level= (bias + level)>>QMAT_SHIFT;
3799                 coeff[0][i]= level;
3800                 coeff[1][i]= level-1;
3801 //                coeff[2][k]= level-2;
3802             }else{
3803                 level= (bias - level)>>QMAT_SHIFT;
3804                 coeff[0][i]= -level;
3805                 coeff[1][i]= -level+1;
3806 //                coeff[2][k]= -level+2;
3807             }
3808             coeff_count[i]= FFMIN(level, 2);
3809             av_assert2(coeff_count[i]);
3810             max |=level;
3811         }else{
3812             coeff[0][i]= (level>>31)|1;
3813             coeff_count[i]= 1;
3814         }
3815     }
3816
3817     *overflow= s->max_qcoeff < max; //overflow might have happened
3818
3819     if(last_non_zero < start_i){
3820         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3821         return last_non_zero;
3822     }
3823
3824     score_tab[start_i]= 0;
3825     survivor[0]= start_i;
3826     survivor_count= 1;
3827
3828     for(i=start_i; i<=last_non_zero; i++){
3829         int level_index, j, zero_distortion;
3830         int dct_coeff= FFABS(block[ scantable[i] ]);
3831         int best_score=256*256*256*120;
3832
3833         if (s->fdsp.fdct == ff_fdct_ifast)
3834             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3835         zero_distortion= dct_coeff*dct_coeff;
3836
3837         for(level_index=0; level_index < coeff_count[i]; level_index++){
3838             int distortion;
3839             int level= coeff[level_index][i];
3840             const int alevel= FFABS(level);
3841             int unquant_coeff;
3842
3843             av_assert2(level);
3844
3845             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3846                 unquant_coeff= alevel*qmul + qadd;
3847             }else{ //MPEG1
3848                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
3849                 if(s->mb_intra){
3850                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3851                         unquant_coeff =   (unquant_coeff - 1) | 1;
3852                 }else{
3853                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3854                         unquant_coeff =   (unquant_coeff - 1) | 1;
3855                 }
3856                 unquant_coeff<<= 3;
3857             }
3858
3859             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3860             level+=64;
3861             if((level&(~127)) == 0){
3862                 for(j=survivor_count-1; j>=0; j--){
3863                     int run= i - survivor[j];
3864                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3865                     score += score_tab[i-run];
3866
3867                     if(score < best_score){
3868                         best_score= score;
3869                         run_tab[i+1]= run;
3870                         level_tab[i+1]= level-64;
3871                     }
3872                 }
3873
3874                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3875                     for(j=survivor_count-1; j>=0; j--){
3876                         int run= i - survivor[j];
3877                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3878                         score += score_tab[i-run];
3879                         if(score < last_score){
3880                             last_score= score;
3881                             last_run= run;
3882                             last_level= level-64;
3883                             last_i= i+1;
3884                         }
3885                     }
3886                 }
3887             }else{
3888                 distortion += esc_length*lambda;
3889                 for(j=survivor_count-1; j>=0; j--){
3890                     int run= i - survivor[j];
3891                     int score= distortion + score_tab[i-run];
3892
3893                     if(score < best_score){
3894                         best_score= score;
3895                         run_tab[i+1]= run;
3896                         level_tab[i+1]= level-64;
3897                     }
3898                 }
3899
3900                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3901                   for(j=survivor_count-1; j>=0; j--){
3902                         int run= i - survivor[j];
3903                         int score= distortion + score_tab[i-run];
3904                         if(score < last_score){
3905                             last_score= score;
3906                             last_run= run;
3907                             last_level= level-64;
3908                             last_i= i+1;
3909                         }
3910                     }
3911                 }
3912             }
3913         }
3914
3915         score_tab[i+1]= best_score;
3916
3917         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3918         if(last_non_zero <= 27){
3919             for(; survivor_count; survivor_count--){
3920                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3921                     break;
3922             }
3923         }else{
3924             for(; survivor_count; survivor_count--){
3925                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3926                     break;
3927             }
3928         }
3929
3930         survivor[ survivor_count++ ]= i+1;
3931     }
3932
3933     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
3934         last_score= 256*256*256*120;
3935         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3936             int score= score_tab[i];
3937             if(i) score += lambda*2; //FIXME exacter?
3938
3939             if(score < last_score){
3940                 last_score= score;
3941                 last_i= i;
3942                 last_level= level_tab[i];
3943                 last_run= run_tab[i];
3944             }
3945         }
3946     }
3947
3948     s->coded_score[n] = last_score;
3949
3950     dc= FFABS(block[0]);
3951     last_non_zero= last_i - 1;
3952     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3953
3954     if(last_non_zero < start_i)
3955         return last_non_zero;
3956
3957     if(last_non_zero == 0 && start_i == 0){
3958         int best_level= 0;
3959         int best_score= dc * dc;
3960
3961         for(i=0; i<coeff_count[0]; i++){
3962             int level= coeff[i][0];
3963             int alevel= FFABS(level);
3964             int unquant_coeff, score, distortion;
3965
3966             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
3967                     unquant_coeff= (alevel*qmul + qadd)>>3;
3968             }else{ //MPEG1
3969                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3970                     unquant_coeff =   (unquant_coeff - 1) | 1;
3971             }
3972             unquant_coeff = (unquant_coeff + 4) >> 3;
3973             unquant_coeff<<= 3 + 3;
3974
3975             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3976             level+=64;
3977             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3978             else                    score= distortion + esc_length*lambda;
3979
3980             if(score < best_score){
3981                 best_score= score;
3982                 best_level= level - 64;
3983             }
3984         }
3985         block[0]= best_level;
3986         s->coded_score[n] = best_score - dc*dc;
3987         if(best_level == 0) return -1;
3988         else                return last_non_zero;
3989     }
3990
3991     i= last_i;
3992     av_assert2(last_level);
3993
3994     block[ perm_scantable[last_non_zero] ]= last_level;
3995     i -= last_run + 1;
3996
3997     for(; i>start_i; i -= run_tab[i] + 1){
3998         block[ perm_scantable[i-1] ]= level_tab[i];
3999     }
4000
4001     return last_non_zero;
4002 }
4003
4004 //#define REFINE_STATS 1
4005 static int16_t basis[64][64];
4006
4007 static void build_basis(uint8_t *perm){
4008     int i, j, x, y;
4009     emms_c();
4010     for(i=0; i<8; i++){
4011         for(j=0; j<8; j++){
4012             for(y=0; y<8; y++){
4013                 for(x=0; x<8; x++){
4014                     double s= 0.25*(1<<BASIS_SHIFT);
4015                     int index= 8*i + j;
4016                     int perm_index= perm[index];
4017                     if(i==0) s*= sqrt(0.5);
4018                     if(j==0) s*= sqrt(0.5);
4019                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4020                 }
4021             }
4022         }
4023     }
4024 }
4025
4026 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4027                         int16_t *block, int16_t *weight, int16_t *orig,
4028                         int n, int qscale){
4029     int16_t rem[64];
4030     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4031     const uint8_t *scantable= s->intra_scantable.scantable;
4032     const uint8_t *perm_scantable= s->intra_scantable.permutated;
4033 //    unsigned int threshold1, threshold2;
4034 //    int bias=0;
4035     int run_tab[65];
4036     int prev_run=0;
4037     int prev_level=0;
4038     int qmul, qadd, start_i, last_non_zero, i, dc;
4039     uint8_t * length;
4040     uint8_t * last_length;
4041     int lambda;
4042     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4043 #ifdef REFINE_STATS
4044 static int count=0;
4045 static int after_last=0;
4046 static int to_zero=0;
4047 static int from_zero=0;
4048 static int raise=0;
4049 static int lower=0;
4050 static int messed_sign=0;
4051 #endif
4052
4053     if(basis[0][0] == 0)
4054         build_basis(s->idsp.idct_permutation);
4055
4056     qmul= qscale*2;
4057     qadd= (qscale-1)|1;
4058     if (s->mb_intra) {
4059         if (!s->h263_aic) {
4060             if (n < 4)
4061                 q = s->y_dc_scale;
4062             else
4063                 q = s->c_dc_scale;
4064         } else{
4065             /* For AIC we skip quant/dequant of INTRADC */
4066             q = 1;
4067             qadd=0;
4068         }
4069         q <<= RECON_SHIFT-3;
4070         /* note: block[0] is assumed to be positive */
4071         dc= block[0]*q;
4072 //        block[0] = (block[0] + (q >> 1)) / q;
4073         start_i = 1;
4074 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4075 //            bias= 1<<(QMAT_SHIFT-1);
4076         length     = s->intra_ac_vlc_length;
4077         last_length= s->intra_ac_vlc_last_length;
4078     } else {
4079         dc= 0;
4080         start_i = 0;
4081         length     = s->inter_ac_vlc_length;
4082         last_length= s->inter_ac_vlc_last_length;
4083     }
4084     last_non_zero = s->block_last_index[n];
4085
4086 #ifdef REFINE_STATS
4087 {START_TIMER
4088 #endif
4089     dc += (1<<(RECON_SHIFT-1));
4090     for(i=0; i<64; i++){
4091         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
4092     }
4093 #ifdef REFINE_STATS
4094 STOP_TIMER("memset rem[]")}
4095 #endif
4096     sum=0;
4097     for(i=0; i<64; i++){
4098         int one= 36;
4099         int qns=4;
4100         int w;
4101
4102         w= FFABS(weight[i]) + qns*one;
4103         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4104
4105         weight[i] = w;
4106 //        w=weight[i] = (63*qns + (w/2)) / w;
4107
4108         av_assert2(w>0);
4109         av_assert2(w<(1<<6));
4110         sum += w*w;
4111     }
4112     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4113 #ifdef REFINE_STATS
4114 {START_TIMER
4115 #endif
4116     run=0;
4117     rle_index=0;
4118     for(i=start_i; i<=last_non_zero; i++){
4119         int j= perm_scantable[i];
4120         const int level= block[j];
4121         int coeff;
4122
4123         if(level){
4124             if(level<0) coeff= qmul*level - qadd;
4125             else        coeff= qmul*level + qadd;
4126             run_tab[rle_index++]=run;
4127             run=0;
4128
4129             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4130         }else{
4131             run++;
4132         }
4133     }
4134 #ifdef REFINE_STATS
4135 if(last_non_zero>0){
4136 STOP_TIMER("init rem[]")
4137 }
4138 }
4139
4140 {START_TIMER
4141 #endif
4142     for(;;){
4143         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4144         int best_coeff=0;
4145         int best_change=0;
4146         int run2, best_unquant_change=0, analyze_gradient;
4147 #ifdef REFINE_STATS
4148 {START_TIMER
4149 #endif
4150         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4151
4152         if(analyze_gradient){
4153 #ifdef REFINE_STATS
4154 {START_TIMER
4155 #endif
4156             for(i=0; i<64; i++){
4157                 int w= weight[i];
4158
4159                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4160             }
4161 #ifdef REFINE_STATS
4162 STOP_TIMER("rem*w*w")}
4163 {START_TIMER
4164 #endif
4165             s->fdsp.fdct(d1);
4166 #ifdef REFINE_STATS
4167 STOP_TIMER("dct")}
4168 #endif
4169         }
4170
4171         if(start_i){
4172             const int level= block[0];
4173             int change, old_coeff;
4174
4175             av_assert2(s->mb_intra);
4176
4177             old_coeff= q*level;
4178
4179             for(change=-1; change<=1; change+=2){
4180                 int new_level= level + change;
4181                 int score, new_coeff;
4182
4183                 new_coeff= q*new_level;
4184                 if(new_coeff >= 2048 || new_coeff < 0)
4185                     continue;
4186
4187                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4188                                                   new_coeff - old_coeff);
4189                 if(score<best_score){
4190                     best_score= score;
4191                     best_coeff= 0;
4192                     best_change= change;
4193                     best_unquant_change= new_coeff - old_coeff;
4194                 }
4195             }
4196         }
4197
4198         run=0;
4199         rle_index=0;
4200         run2= run_tab[rle_index++];
4201         prev_level=0;
4202         prev_run=0;
4203
4204         for(i=start_i; i<64; i++){
4205             int j= perm_scantable[i];
4206             const int level= block[j];
4207             int change, old_coeff;
4208
4209             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4210                 break;
4211
4212             if(level){
4213                 if(level<0) old_coeff= qmul*level - qadd;
4214                 else        old_coeff= qmul*level + qadd;
4215                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4216             }else{
4217                 old_coeff=0;
4218                 run2--;
4219                 av_assert2(run2>=0 || i >= last_non_zero );
4220             }
4221
4222             for(change=-1; change<=1; change+=2){
4223                 int new_level= level + change;
4224                 int score, new_coeff, unquant_change;
4225
4226                 score=0;
4227                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4228                    continue;
4229
4230                 if(new_level){
4231                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4232                     else            new_coeff= qmul*new_level + qadd;
4233                     if(new_coeff >= 2048 || new_coeff <= -2048)
4234                         continue;
4235                     //FIXME check for overflow
4236
4237                     if(level){
4238                         if(level < 63 && level > -63){
4239                             if(i < last_non_zero)
4240                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4241                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4242                             else
4243                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4244                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4245                         }
4246                     }else{
4247                         av_assert2(FFABS(new_level)==1);
4248
4249                         if(analyze_gradient){
4250                             int g= d1[ scantable[i] ];
4251                             if(g && (g^new_level) >= 0)
4252                                 continue;
4253                         }
4254
4255                         if(i < last_non_zero){
4256                             int next_i= i + run2 + 1;
4257                             int next_level= block[ perm_scantable[next_i] ] + 64;
4258
4259                             if(next_level&(~127))
4260                                 next_level= 0;
4261
4262                             if(next_i < last_non_zero)
4263                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4264                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4265                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4266                             else
4267                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4268                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4269                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4270                         }else{
4271                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4272                             if(prev_level){
4273                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4274                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4275                             }
4276                         }
4277                     }
4278                 }else{
4279                     new_coeff=0;
4280                     av_assert2(FFABS(level)==1);
4281
4282                     if(i < last_non_zero){
4283                         int next_i= i + run2 + 1;
4284                         int next_level= block[ perm_scantable[next_i] ] + 64;
4285
4286                         if(next_level&(~127))
4287                             next_level= 0;
4288
4289                         if(next_i < last_non_zero)
4290                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4291                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4292                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4293                         else
4294                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4295                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4296                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4297                     }else{
4298                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4299                         if(prev_level){
4300                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4301                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4302                         }
4303                     }
4304                 }
4305
4306                 score *= lambda;
4307
4308                 unquant_change= new_coeff - old_coeff;
4309                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4310
4311                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4312                                                    unquant_change);
4313                 if(score<best_score){
4314                     best_score= score;
4315                     best_coeff= i;
4316                     best_change= change;
4317                     best_unquant_change= unquant_change;
4318                 }
4319             }
4320             if(level){
4321                 prev_level= level + 64;
4322                 if(prev_level&(~127))
4323                     prev_level= 0;
4324                 prev_run= run;
4325                 run=0;
4326             }else{
4327                 run++;
4328             }
4329         }
4330 #ifdef REFINE_STATS
4331 STOP_TIMER("iterative step")}
4332 #endif
4333
4334         if(best_change){
4335             int j= perm_scantable[ best_coeff ];
4336
4337             block[j] += best_change;
4338
4339             if(best_coeff > last_non_zero){
4340                 last_non_zero= best_coeff;
4341                 av_assert2(block[j]);
4342 #ifdef REFINE_STATS
4343 after_last++;
4344 #endif
4345             }else{
4346 #ifdef REFINE_STATS
4347 if(block[j]){
4348     if(block[j] - best_change){
4349         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4350             raise++;
4351         }else{
4352             lower++;
4353         }
4354     }else{
4355         from_zero++;
4356     }
4357 }else{
4358     to_zero++;
4359 }
4360 #endif
4361                 for(; last_non_zero>=start_i; last_non_zero--){
4362                     if(block[perm_scantable[last_non_zero]])
4363                         break;
4364                 }
4365             }
4366 #ifdef REFINE_STATS
4367 count++;
4368 if(256*256*256*64 % count == 0){
4369     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4370 }
4371 #endif
4372             run=0;
4373             rle_index=0;
4374             for(i=start_i; i<=last_non_zero; i++){
4375                 int j= perm_scantable[i];
4376                 const int level= block[j];
4377
4378                  if(level){
4379                      run_tab[rle_index++]=run;
4380                      run=0;
4381                  }else{
4382                      run++;
4383                  }
4384             }
4385
4386             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4387         }else{
4388             break;
4389         }
4390     }
4391 #ifdef REFINE_STATS
4392 if(last_non_zero>0){
4393 STOP_TIMER("iterative search")
4394 }
4395 }
4396 #endif
4397
4398     return last_non_zero;
4399 }
4400
4401 int ff_dct_quantize_c(MpegEncContext *s,
4402                         int16_t *block, int n,
4403                         int qscale, int *overflow)
4404 {
4405     int i, j, level, last_non_zero, q, start_i;
4406     const int *qmat;
4407     const uint8_t *scantable= s->intra_scantable.scantable;
4408     int bias;
4409     int max=0;
4410     unsigned int threshold1, threshold2;
4411
4412     s->fdsp.fdct(block);
4413
4414     if(s->dct_error_sum)
4415         s->denoise_dct(s, block);
4416
4417     if (s->mb_intra) {
4418         if (!s->h263_aic) {
4419             if (n < 4)
4420                 q = s->y_dc_scale;
4421             else
4422                 q = s->c_dc_scale;
4423             q = q << 3;
4424         } else
4425             /* For AIC we skip quant/dequant of INTRADC */
4426             q = 1 << 3;
4427
4428         /* note: block[0] is assumed to be positive */
4429         block[0] = (block[0] + (q >> 1)) / q;
4430         start_i = 1;
4431         last_non_zero = 0;
4432         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4433         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4434     } else {
4435         start_i = 0;
4436         last_non_zero = -1;
4437         qmat = s->q_inter_matrix[qscale];
4438         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4439     }
4440     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4441     threshold2= (threshold1<<1);
4442     for(i=63;i>=start_i;i--) {
4443         j = scantable[i];
4444         level = block[j] * qmat[j];
4445
4446         if(((unsigned)(level+threshold1))>threshold2){
4447             last_non_zero = i;
4448             break;
4449         }else{
4450             block[j]=0;
4451         }
4452     }
4453     for(i=start_i; i<=last_non_zero; i++) {
4454         j = scantable[i];
4455         level = block[j] * qmat[j];
4456
4457 //        if(   bias+level >= (1<<QMAT_SHIFT)
4458 //           || bias-level >= (1<<QMAT_SHIFT)){
4459         if(((unsigned)(level+threshold1))>threshold2){
4460             if(level>0){
4461                 level= (bias + level)>>QMAT_SHIFT;
4462                 block[j]= level;
4463             }else{
4464                 level= (bias - level)>>QMAT_SHIFT;
4465                 block[j]= -level;
4466             }
4467             max |=level;
4468         }else{
4469             block[j]=0;
4470         }
4471     }
4472     *overflow= s->max_qcoeff < max; //overflow might have happened
4473
4474     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4475     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4476         ff_block_permute(block, s->idsp.idct_permutation,
4477                          scantable, last_non_zero);
4478
4479     return last_non_zero;
4480 }
4481
4482 #define OFFSET(x) offsetof(MpegEncContext, x)
4483 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4484 static const AVOption h263_options[] = {
4485     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4486     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4487     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4488     FF_MPV_COMMON_OPTS
4489     { NULL },
4490 };
4491
4492 static const AVClass h263_class = {
4493     .class_name = "H.263 encoder",
4494     .item_name  = av_default_item_name,
4495     .option     = h263_options,
4496     .version    = LIBAVUTIL_VERSION_INT,
4497 };
4498
4499 AVCodec ff_h263_encoder = {
4500     .name           = "h263",
4501     .long_name      = NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4502     .type           = AVMEDIA_TYPE_VIDEO,
4503     .id             = AV_CODEC_ID_H263,
4504     .priv_data_size = sizeof(MpegEncContext),
4505     .init           = ff_mpv_encode_init,
4506     .encode2        = ff_mpv_encode_picture,
4507     .close          = ff_mpv_encode_end,
4508     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4509     .priv_class     = &h263_class,
4510 };
4511
4512 static const AVOption h263p_options[] = {
4513     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4514     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4515     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4516     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4517     FF_MPV_COMMON_OPTS
4518     { NULL },
4519 };
4520 static const AVClass h263p_class = {
4521     .class_name = "H.263p encoder",
4522     .item_name  = av_default_item_name,
4523     .option     = h263p_options,
4524     .version    = LIBAVUTIL_VERSION_INT,
4525 };
4526
4527 AVCodec ff_h263p_encoder = {
4528     .name           = "h263p",
4529     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4530     .type           = AVMEDIA_TYPE_VIDEO,
4531     .id             = AV_CODEC_ID_H263P,
4532     .priv_data_size = sizeof(MpegEncContext),
4533     .init           = ff_mpv_encode_init,
4534     .encode2        = ff_mpv_encode_picture,
4535     .close          = ff_mpv_encode_end,
4536     .capabilities   = CODEC_CAP_SLICE_THREADS,
4537     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4538     .priv_class     = &h263p_class,
4539 };
4540
4541 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4542
4543 AVCodec ff_msmpeg4v2_encoder = {
4544     .name           = "msmpeg4v2",
4545     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4546     .type           = AVMEDIA_TYPE_VIDEO,
4547     .id             = AV_CODEC_ID_MSMPEG4V2,
4548     .priv_data_size = sizeof(MpegEncContext),
4549     .init           = ff_mpv_encode_init,
4550     .encode2        = ff_mpv_encode_picture,
4551     .close          = ff_mpv_encode_end,
4552     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4553     .priv_class     = &msmpeg4v2_class,
4554 };
4555
4556 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4557
4558 AVCodec ff_msmpeg4v3_encoder = {
4559     .name           = "msmpeg4",
4560     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4561     .type           = AVMEDIA_TYPE_VIDEO,
4562     .id             = AV_CODEC_ID_MSMPEG4V3,
4563     .priv_data_size = sizeof(MpegEncContext),
4564     .init           = ff_mpv_encode_init,
4565     .encode2        = ff_mpv_encode_picture,
4566     .close          = ff_mpv_encode_end,
4567     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4568     .priv_class     = &msmpeg4v3_class,
4569 };
4570
4571 FF_MPV_GENERIC_CLASS(wmv1)
4572
4573 AVCodec ff_wmv1_encoder = {
4574     .name           = "wmv1",
4575     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4576     .type           = AVMEDIA_TYPE_VIDEO,
4577     .id             = AV_CODEC_ID_WMV1,
4578     .priv_data_size = sizeof(MpegEncContext),
4579     .init           = ff_mpv_encode_init,
4580     .encode2        = ff_mpv_encode_picture,
4581     .close          = ff_mpv_encode_end,
4582     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4583     .priv_class     = &wmv1_class,
4584 };