]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit 'b94e4acb4874843e914fd3cb8e089aff0756bb4a'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/intmath.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/opt.h"
33 #include "avcodec.h"
34 #include "dsputil.h"
35 #include "mpegvideo.h"
36 #include "h263.h"
37 #include "mjpegenc.h"
38 #include "msmpeg4.h"
39 #include "faandct.h"
40 #include "thread.h"
41 #include "aandcttab.h"
42 #include "flv.h"
43 #include "mpeg4video.h"
44 #include "internal.h"
45 #include "bytestream.h"
46 #include <limits.h>
47 #include "sp5x.h"
48
49 //#undef NDEBUG
50 //#include <assert.h>
51
52 static int encode_picture(MpegEncContext *s, int picture_number);
53 static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
54 static int sse_mb(MpegEncContext *s);
55 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block);
56 static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
57
58 /* enable all paranoid tests for rounding, overflows, etc... */
59 //#define PARANOID
60
61 //#define DEBUG
62
63 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
64 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
65
66 const AVOption ff_mpv_generic_options[] = {
67     FF_MPV_COMMON_OPTS
68     { NULL },
69 };
70
71 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
72                        uint16_t (*qmat16)[2][64],
73                        const uint16_t *quant_matrix,
74                        int bias, int qmin, int qmax, int intra)
75 {
76     int qscale;
77     int shift = 0;
78
79     for (qscale = qmin; qscale <= qmax; qscale++) {
80         int i;
81         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
82             dsp->fdct == ff_jpeg_fdct_islow_10 ||
83             dsp->fdct == ff_faandct) {
84             for (i = 0; i < 64; i++) {
85                 const int j = dsp->idct_permutation[i];
86                 /* 16 <= qscale * quant_matrix[i] <= 7905
87                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
88                  *             19952 <=              x  <= 249205026
89                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
90                  *           3444240 >= (1 << 36) / (x) >= 275 */
91
92                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
93                                         (qscale * quant_matrix[j]));
94             }
95         } else if (dsp->fdct == ff_fdct_ifast) {
96             for (i = 0; i < 64; i++) {
97                 const int j = dsp->idct_permutation[i];
98                 /* 16 <= qscale * quant_matrix[i] <= 7905
99                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
100                  *             19952 <=              x  <= 249205026
101                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
102                  *           3444240 >= (1 << 36) / (x) >= 275 */
103
104                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
105                                         (ff_aanscales[i] * qscale * quant_matrix[j]));
106             }
107         } else {
108             for (i = 0; i < 64; i++) {
109                 const int j = dsp->idct_permutation[i];
110                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
111                  * Assume x = qscale * quant_matrix[i]
112                  * So             16 <=              x  <= 7905
113                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
114                  * so          32768 >= (1 << 19) / (x) >= 67 */
115                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
116                                         (qscale * quant_matrix[j]));
117                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
118                 //                    (qscale * quant_matrix[i]);
119                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
120                                        (qscale * quant_matrix[j]);
121
122                 if (qmat16[qscale][0][i] == 0 ||
123                     qmat16[qscale][0][i] == 128 * 256)
124                     qmat16[qscale][0][i] = 128 * 256 - 1;
125                 qmat16[qscale][1][i] =
126                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
127                                 qmat16[qscale][0][i]);
128             }
129         }
130
131         for (i = intra; i < 64; i++) {
132             int64_t max = 8191;
133             if (dsp->fdct == ff_fdct_ifast) {
134                 max = (8191LL * ff_aanscales[i]) >> 14;
135             }
136             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
137                 shift++;
138             }
139         }
140     }
141     if (shift) {
142         av_log(NULL, AV_LOG_INFO,
143                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
144                QMAT_SHIFT - shift);
145     }
146 }
147
148 static inline void update_qscale(MpegEncContext *s)
149 {
150     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
151                 (FF_LAMBDA_SHIFT + 7);
152     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
153
154     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
155                  FF_LAMBDA_SHIFT;
156 }
157
158 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
159 {
160     int i;
161
162     if (matrix) {
163         put_bits(pb, 1, 1);
164         for (i = 0; i < 64; i++) {
165             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
166         }
167     } else
168         put_bits(pb, 1, 0);
169 }
170
171 /**
172  * init s->current_picture.qscale_table from s->lambda_table
173  */
174 void ff_init_qscale_tab(MpegEncContext *s)
175 {
176     int8_t * const qscale_table = s->current_picture.f.qscale_table;
177     int i;
178
179     for (i = 0; i < s->mb_num; i++) {
180         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
181         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
182         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
183                                                   s->avctx->qmax);
184     }
185 }
186
187 static void copy_picture_attributes(MpegEncContext *s,
188                                     AVFrame *dst,
189                                     AVFrame *src)
190 {
191     int i;
192
193     dst->pict_type              = src->pict_type;
194     dst->quality                = src->quality;
195     dst->coded_picture_number   = src->coded_picture_number;
196     dst->display_picture_number = src->display_picture_number;
197     //dst->reference              = src->reference;
198     dst->pts                    = src->pts;
199     dst->interlaced_frame       = src->interlaced_frame;
200     dst->top_field_first        = src->top_field_first;
201
202     if (s->avctx->me_threshold) {
203         if (!src->motion_val[0])
204             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
205         if (!src->mb_type)
206             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
207         if (!src->ref_index[0])
208             av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
209         if (src->motion_subsample_log2 != dst->motion_subsample_log2)
210             av_log(s->avctx, AV_LOG_ERROR,
211                    "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
212                    src->motion_subsample_log2, dst->motion_subsample_log2);
213
214         memcpy(dst->mb_type, src->mb_type,
215                s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
216
217         for (i = 0; i < 2; i++) {
218             int stride = ((16 * s->mb_width ) >>
219                           src->motion_subsample_log2) + 1;
220             int height = ((16 * s->mb_height) >> src->motion_subsample_log2);
221
222             if (src->motion_val[i] &&
223                 src->motion_val[i] != dst->motion_val[i]) {
224                 memcpy(dst->motion_val[i], src->motion_val[i],
225                        2 * stride * height * sizeof(int16_t));
226             }
227             if (src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]) {
228                 memcpy(dst->ref_index[i], src->ref_index[i],
229                        s->mb_stride * 4 * s->mb_height * sizeof(int8_t));
230             }
231         }
232     }
233 }
234
235 static void update_duplicate_context_after_me(MpegEncContext *dst,
236                                               MpegEncContext *src)
237 {
238 #define COPY(a) dst->a= src->a
239     COPY(pict_type);
240     COPY(current_picture);
241     COPY(f_code);
242     COPY(b_code);
243     COPY(qscale);
244     COPY(lambda);
245     COPY(lambda2);
246     COPY(picture_in_gop_number);
247     COPY(gop_picture_number);
248     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
249     COPY(progressive_frame);    // FIXME don't set in encode_header
250     COPY(partitioned_frame);    // FIXME don't set in encode_header
251 #undef COPY
252 }
253
254 /**
255  * Set the given MpegEncContext to defaults for encoding.
256  * the changed fields will not depend upon the prior state of the MpegEncContext.
257  */
258 static void MPV_encode_defaults(MpegEncContext *s)
259 {
260     int i;
261     ff_MPV_common_defaults(s);
262
263     for (i = -16; i < 16; i++) {
264         default_fcode_tab[i + MAX_MV] = 1;
265     }
266     s->me.mv_penalty = default_mv_penalty;
267     s->fcode_tab     = default_fcode_tab;
268 }
269
270 av_cold int ff_dct_encode_init(MpegEncContext *s) {
271     if (ARCH_X86)
272         ff_dct_encode_init_x86(s);
273
274     if (!s->dct_quantize)
275         s->dct_quantize = ff_dct_quantize_c;
276     if (!s->denoise_dct)
277         s->denoise_dct  = denoise_dct_c;
278     s->fast_dct_quantize = s->dct_quantize;
279     if (s->avctx->trellis)
280         s->dct_quantize  = dct_quantize_trellis_c;
281
282     return 0;
283 }
284
285 /* init video encoder */
286 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
287 {
288     MpegEncContext *s = avctx->priv_data;
289     int i;
290     int chroma_h_shift, chroma_v_shift;
291
292     MPV_encode_defaults(s);
293
294     switch (avctx->codec_id) {
295     case AV_CODEC_ID_MPEG2VIDEO:
296         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
297             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
298             av_log(avctx, AV_LOG_ERROR,
299                    "only YUV420 and YUV422 are supported\n");
300             return -1;
301         }
302         break;
303     case AV_CODEC_ID_LJPEG:
304         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
305             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
306             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
307             avctx->pix_fmt != AV_PIX_FMT_BGR0     &&
308             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
309             avctx->pix_fmt != AV_PIX_FMT_BGR24    &&
310             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
311               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
312               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
313              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
314             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
315             return -1;
316         }
317         break;
318     case AV_CODEC_ID_MJPEG:
319     case AV_CODEC_ID_AMV:
320         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
321             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
322             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
323               avctx->pix_fmt != AV_PIX_FMT_YUV422P) ||
324              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
325             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
326             return -1;
327         }
328         break;
329     default:
330         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
331             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
332             return -1;
333         }
334     }
335
336     switch (avctx->pix_fmt) {
337     case AV_PIX_FMT_YUVJ422P:
338     case AV_PIX_FMT_YUV422P:
339         s->chroma_format = CHROMA_422;
340         break;
341     case AV_PIX_FMT_YUVJ420P:
342     case AV_PIX_FMT_YUV420P:
343     default:
344         s->chroma_format = CHROMA_420;
345         break;
346     }
347
348     s->bit_rate = avctx->bit_rate;
349     s->width    = avctx->width;
350     s->height   = avctx->height;
351     if (avctx->gop_size > 600 &&
352         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
353         av_log(avctx, AV_LOG_WARNING,
354                "keyframe interval too large!, reducing it from %d to %d\n",
355                avctx->gop_size, 600);
356         avctx->gop_size = 600;
357     }
358     s->gop_size     = avctx->gop_size;
359     s->avctx        = avctx;
360     s->flags        = avctx->flags;
361     s->flags2       = avctx->flags2;
362     s->max_b_frames = avctx->max_b_frames;
363     s->codec_id     = avctx->codec->id;
364 #if FF_API_MPV_GLOBAL_OPTS
365     if (avctx->luma_elim_threshold)
366         s->luma_elim_threshold   = avctx->luma_elim_threshold;
367     if (avctx->chroma_elim_threshold)
368         s->chroma_elim_threshold = avctx->chroma_elim_threshold;
369 #endif
370     s->strict_std_compliance = avctx->strict_std_compliance;
371     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
372     s->mpeg_quant         = avctx->mpeg_quant;
373     s->rtp_mode           = !!avctx->rtp_payload_size;
374     s->intra_dc_precision = avctx->intra_dc_precision;
375     s->user_specified_pts = AV_NOPTS_VALUE;
376
377     if (s->gop_size <= 1) {
378         s->intra_only = 1;
379         s->gop_size   = 12;
380     } else {
381         s->intra_only = 0;
382     }
383
384     s->me_method = avctx->me_method;
385
386     /* Fixed QSCALE */
387     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
388
389 #if FF_API_MPV_GLOBAL_OPTS
390     if (s->flags & CODEC_FLAG_QP_RD)
391         s->mpv_flags |= FF_MPV_FLAG_QP_RD;
392 #endif
393
394     s->adaptive_quant = (s->avctx->lumi_masking ||
395                          s->avctx->dark_masking ||
396                          s->avctx->temporal_cplx_masking ||
397                          s->avctx->spatial_cplx_masking  ||
398                          s->avctx->p_masking      ||
399                          s->avctx->border_masking ||
400                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
401                         !s->fixed_qscale;
402
403     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
404
405     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
406         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
407         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
408             return -1;
409     }
410
411     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
412         av_log(avctx, AV_LOG_INFO,
413                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
414     }
415
416     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
417         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
418         return -1;
419     }
420
421     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
422         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
423         return -1;
424     }
425
426     if (avctx->rc_max_rate &&
427         avctx->rc_max_rate == avctx->bit_rate &&
428         avctx->rc_max_rate != avctx->rc_min_rate) {
429         av_log(avctx, AV_LOG_INFO,
430                "impossible bitrate constraints, this will fail\n");
431     }
432
433     if (avctx->rc_buffer_size &&
434         avctx->bit_rate * (int64_t)avctx->time_base.num >
435             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
436         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
437         return -1;
438     }
439
440     if (!s->fixed_qscale &&
441         avctx->bit_rate * av_q2d(avctx->time_base) >
442             avctx->bit_rate_tolerance) {
443         av_log(avctx, AV_LOG_ERROR,
444                "bitrate tolerance too small for bitrate\n");
445         return -1;
446     }
447
448     if (s->avctx->rc_max_rate &&
449         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
450         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
451          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
452         90000LL * (avctx->rc_buffer_size - 1) >
453             s->avctx->rc_max_rate * 0xFFFFLL) {
454         av_log(avctx, AV_LOG_INFO,
455                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
456                "specified vbv buffer is too large for the given bitrate!\n");
457     }
458
459     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
460         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
461         s->codec_id != AV_CODEC_ID_FLV1) {
462         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
463         return -1;
464     }
465
466     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
467         av_log(avctx, AV_LOG_ERROR,
468                "OBMC is only supported with simple mb decision\n");
469         return -1;
470     }
471
472     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
473         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
474         return -1;
475     }
476
477     if (s->max_b_frames                    &&
478         s->codec_id != AV_CODEC_ID_MPEG4      &&
479         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
480         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
481         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
482         return -1;
483     }
484
485     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
486          s->codec_id == AV_CODEC_ID_H263  ||
487          s->codec_id == AV_CODEC_ID_H263P) &&
488         (avctx->sample_aspect_ratio.num > 255 ||
489          avctx->sample_aspect_ratio.den > 255)) {
490         av_log(avctx, AV_LOG_WARNING,
491                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
492                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
493         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
494                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
495     }
496
497     if ((s->codec_id == AV_CODEC_ID_H263  ||
498          s->codec_id == AV_CODEC_ID_H263P) &&
499         (avctx->width  > 2048 ||
500          avctx->height > 1152 )) {
501         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
502         return -1;
503     }
504     if ((s->codec_id == AV_CODEC_ID_H263  ||
505          s->codec_id == AV_CODEC_ID_H263P) &&
506         ((avctx->width &3) ||
507          (avctx->height&3) )) {
508         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
509         return -1;
510     }
511
512     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
513         (avctx->width  > 4095 ||
514          avctx->height > 4095 )) {
515         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
516         return -1;
517     }
518
519     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
520         (avctx->width  > 16383 ||
521          avctx->height > 16383 )) {
522         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
523         return -1;
524     }
525
526     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
527          s->codec_id == AV_CODEC_ID_WMV2) &&
528          avctx->width & 1) {
529          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
530          return -1;
531     }
532
533     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
534         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
535         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
536         return -1;
537     }
538
539     // FIXME mpeg2 uses that too
540     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
541         av_log(avctx, AV_LOG_ERROR,
542                "mpeg2 style quantization not supported by codec\n");
543         return -1;
544     }
545
546 #if FF_API_MPV_GLOBAL_OPTS
547     if (s->flags & CODEC_FLAG_CBP_RD)
548         s->mpv_flags |= FF_MPV_FLAG_CBP_RD;
549 #endif
550
551     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
552         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
553         return -1;
554     }
555
556     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
557         s->avctx->mb_decision != FF_MB_DECISION_RD) {
558         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
559         return -1;
560     }
561
562     if (s->avctx->scenechange_threshold < 1000000000 &&
563         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
564         av_log(avctx, AV_LOG_ERROR,
565                "closed gop with scene change detection are not supported yet, "
566                "set threshold to 1000000000\n");
567         return -1;
568     }
569
570     if (s->flags & CODEC_FLAG_LOW_DELAY) {
571         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
572             av_log(avctx, AV_LOG_ERROR,
573                   "low delay forcing is only available for mpeg2\n");
574             return -1;
575         }
576         if (s->max_b_frames != 0) {
577             av_log(avctx, AV_LOG_ERROR,
578                    "b frames cannot be used with low delay\n");
579             return -1;
580         }
581     }
582
583     if (s->q_scale_type == 1) {
584         if (avctx->qmax > 12) {
585             av_log(avctx, AV_LOG_ERROR,
586                    "non linear quant only supports qmax <= 12 currently\n");
587             return -1;
588         }
589     }
590
591     if (s->avctx->thread_count > 1         &&
592         s->codec_id != AV_CODEC_ID_MPEG4      &&
593         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
594         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
595         s->codec_id != AV_CODEC_ID_MJPEG      &&
596         (s->codec_id != AV_CODEC_ID_H263P)) {
597         av_log(avctx, AV_LOG_ERROR,
598                "multi threaded encoding not supported by codec\n");
599         return -1;
600     }
601
602     if (s->avctx->thread_count < 1) {
603         av_log(avctx, AV_LOG_ERROR,
604                "automatic thread number detection not supported by codec, "
605                "patch welcome\n");
606         return -1;
607     }
608
609     if (s->avctx->thread_count > 1)
610         s->rtp_mode = 1;
611
612     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
613         s->h263_slice_structured = 1;
614
615     if (!avctx->time_base.den || !avctx->time_base.num) {
616         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
617         return -1;
618     }
619
620     i = (INT_MAX / 2 + 128) >> 8;
621     if (avctx->me_threshold >= i) {
622         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n",
623                i - 1);
624         return -1;
625     }
626     if (avctx->mb_threshold >= i) {
627         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
628                i - 1);
629         return -1;
630     }
631
632     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
633         av_log(avctx, AV_LOG_INFO,
634                "notice: b_frame_strategy only affects the first pass\n");
635         avctx->b_frame_strategy = 0;
636     }
637
638     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
639     if (i > 1) {
640         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
641         avctx->time_base.den /= i;
642         avctx->time_base.num /= i;
643         //return -1;
644     }
645
646     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
647         // (a + x * 3 / 8) / x
648         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
649         s->inter_quant_bias = 0;
650     } else {
651         s->intra_quant_bias = 0;
652         // (a - x / 4) / x
653         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
654     }
655
656     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
657         s->intra_quant_bias = avctx->intra_quant_bias;
658     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
659         s->inter_quant_bias = avctx->inter_quant_bias;
660
661     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
662
663     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift,
664                                   &chroma_v_shift);
665
666     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
667         s->avctx->time_base.den > (1 << 16) - 1) {
668         av_log(avctx, AV_LOG_ERROR,
669                "timebase %d/%d not supported by MPEG 4 standard, "
670                "the maximum admitted value for the timebase denominator "
671                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
672                (1 << 16) - 1);
673         return -1;
674     }
675     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
676
677 #if FF_API_MPV_GLOBAL_OPTS
678     if (avctx->flags2 & CODEC_FLAG2_SKIP_RD)
679         s->mpv_flags |= FF_MPV_FLAG_SKIP_RD;
680     if (avctx->flags2 & CODEC_FLAG2_STRICT_GOP)
681         s->mpv_flags |= FF_MPV_FLAG_STRICT_GOP;
682     if (avctx->quantizer_noise_shaping)
683         s->quantizer_noise_shaping = avctx->quantizer_noise_shaping;
684 #endif
685
686     switch (avctx->codec->id) {
687     case AV_CODEC_ID_MPEG1VIDEO:
688         s->out_format = FMT_MPEG1;
689         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
690         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
691         break;
692     case AV_CODEC_ID_MPEG2VIDEO:
693         s->out_format = FMT_MPEG1;
694         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
695         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
696         s->rtp_mode   = 1;
697         break;
698     case AV_CODEC_ID_LJPEG:
699     case AV_CODEC_ID_MJPEG:
700     case AV_CODEC_ID_AMV:
701         s->out_format = FMT_MJPEG;
702         s->intra_only = 1; /* force intra only for jpeg */
703         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
704             (avctx->pix_fmt == AV_PIX_FMT_BGR0
705              || s->avctx->pix_fmt == AV_PIX_FMT_BGRA
706              || s->avctx->pix_fmt == AV_PIX_FMT_BGR24)) {
707             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
708             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
709             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
710         } else {
711             s->mjpeg_vsample[0] = 2;
712             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
713             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
714             s->mjpeg_hsample[0] = 2;
715             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
716             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
717         }
718         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
719             ff_mjpeg_encode_init(s) < 0)
720             return -1;
721         avctx->delay = 0;
722         s->low_delay = 1;
723         break;
724     case AV_CODEC_ID_H261:
725         if (!CONFIG_H261_ENCODER)
726             return -1;
727         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
728             av_log(avctx, AV_LOG_ERROR,
729                    "The specified picture size of %dx%d is not valid for the "
730                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
731                     s->width, s->height);
732             return -1;
733         }
734         s->out_format = FMT_H261;
735         avctx->delay  = 0;
736         s->low_delay  = 1;
737         break;
738     case AV_CODEC_ID_H263:
739         if (!CONFIG_H263_ENCODER)
740             return -1;
741         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
742                              s->width, s->height) == 8) {
743             av_log(avctx, AV_LOG_ERROR,
744                    "The specified picture size of %dx%d is not valid for "
745                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
746                    "352x288, 704x576, and 1408x1152. "
747                    "Try H.263+.\n", s->width, s->height);
748             return -1;
749         }
750         s->out_format = FMT_H263;
751         avctx->delay  = 0;
752         s->low_delay  = 1;
753         break;
754     case AV_CODEC_ID_H263P:
755         s->out_format = FMT_H263;
756         s->h263_plus  = 1;
757         /* Fx */
758         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
759         s->modified_quant  = s->h263_aic;
760         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
761         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
762
763         /* /Fx */
764         /* These are just to be sure */
765         avctx->delay = 0;
766         s->low_delay = 1;
767         break;
768     case AV_CODEC_ID_FLV1:
769         s->out_format      = FMT_H263;
770         s->h263_flv        = 2; /* format = 1; 11-bit codes */
771         s->unrestricted_mv = 1;
772         s->rtp_mode  = 0; /* don't allow GOB */
773         avctx->delay = 0;
774         s->low_delay = 1;
775         break;
776     case AV_CODEC_ID_RV10:
777         s->out_format = FMT_H263;
778         avctx->delay  = 0;
779         s->low_delay  = 1;
780         break;
781     case AV_CODEC_ID_RV20:
782         s->out_format      = FMT_H263;
783         avctx->delay       = 0;
784         s->low_delay       = 1;
785         s->modified_quant  = 1;
786         s->h263_aic        = 1;
787         s->h263_plus       = 1;
788         s->loop_filter     = 1;
789         s->unrestricted_mv = 0;
790         break;
791     case AV_CODEC_ID_MPEG4:
792         s->out_format      = FMT_H263;
793         s->h263_pred       = 1;
794         s->unrestricted_mv = 1;
795         s->low_delay       = s->max_b_frames ? 0 : 1;
796         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
797         break;
798     case AV_CODEC_ID_MSMPEG4V2:
799         s->out_format      = FMT_H263;
800         s->h263_pred       = 1;
801         s->unrestricted_mv = 1;
802         s->msmpeg4_version = 2;
803         avctx->delay       = 0;
804         s->low_delay       = 1;
805         break;
806     case AV_CODEC_ID_MSMPEG4V3:
807         s->out_format        = FMT_H263;
808         s->h263_pred         = 1;
809         s->unrestricted_mv   = 1;
810         s->msmpeg4_version   = 3;
811         s->flipflop_rounding = 1;
812         avctx->delay         = 0;
813         s->low_delay         = 1;
814         break;
815     case AV_CODEC_ID_WMV1:
816         s->out_format        = FMT_H263;
817         s->h263_pred         = 1;
818         s->unrestricted_mv   = 1;
819         s->msmpeg4_version   = 4;
820         s->flipflop_rounding = 1;
821         avctx->delay         = 0;
822         s->low_delay         = 1;
823         break;
824     case AV_CODEC_ID_WMV2:
825         s->out_format        = FMT_H263;
826         s->h263_pred         = 1;
827         s->unrestricted_mv   = 1;
828         s->msmpeg4_version   = 5;
829         s->flipflop_rounding = 1;
830         avctx->delay         = 0;
831         s->low_delay         = 1;
832         break;
833     default:
834         return -1;
835     }
836
837     avctx->has_b_frames = !s->low_delay;
838
839     s->encoding = 1;
840
841     s->progressive_frame    =
842     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
843                                                 CODEC_FLAG_INTERLACED_ME) ||
844                                 s->alternate_scan);
845
846     /* init */
847     if (ff_MPV_common_init(s) < 0)
848         return -1;
849
850     ff_dct_encode_init(s);
851
852     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
853         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
854
855     s->quant_precision = 5;
856
857     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
858     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
859
860     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
861         ff_h261_encode_init(s);
862     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
863         ff_h263_encode_init(s);
864     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
865         ff_msmpeg4_encode_init(s);
866     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
867         && s->out_format == FMT_MPEG1)
868         ff_mpeg1_encode_init(s);
869
870     /* init q matrix */
871     for (i = 0; i < 64; i++) {
872         int j = s->dsp.idct_permutation[i];
873         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
874             s->mpeg_quant) {
875             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
876             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
877         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
878             s->intra_matrix[j] =
879             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
880         } else {
881             /* mpeg1/2 */
882             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
883             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
884         }
885         if (s->avctx->intra_matrix)
886             s->intra_matrix[j] = s->avctx->intra_matrix[i];
887         if (s->avctx->inter_matrix)
888             s->inter_matrix[j] = s->avctx->inter_matrix[i];
889     }
890
891     /* precompute matrix */
892     /* for mjpeg, we do include qscale in the matrix */
893     if (s->out_format != FMT_MJPEG) {
894         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
895                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
896                           31, 1);
897         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
898                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
899                           31, 0);
900     }
901
902     if (ff_rate_control_init(s) < 0)
903         return -1;
904
905     return 0;
906 }
907
908 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
909 {
910     MpegEncContext *s = avctx->priv_data;
911
912     ff_rate_control_uninit(s);
913
914     ff_MPV_common_end(s);
915     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
916         s->out_format == FMT_MJPEG)
917         ff_mjpeg_encode_close(s);
918
919     av_freep(&avctx->extradata);
920
921     return 0;
922 }
923
924 static int get_sae(uint8_t *src, int ref, int stride)
925 {
926     int x,y;
927     int acc = 0;
928
929     for (y = 0; y < 16; y++) {
930         for (x = 0; x < 16; x++) {
931             acc += FFABS(src[x + y * stride] - ref);
932         }
933     }
934
935     return acc;
936 }
937
938 static int get_intra_count(MpegEncContext *s, uint8_t *src,
939                            uint8_t *ref, int stride)
940 {
941     int x, y, w, h;
942     int acc = 0;
943
944     w = s->width  & ~15;
945     h = s->height & ~15;
946
947     for (y = 0; y < h; y += 16) {
948         for (x = 0; x < w; x += 16) {
949             int offset = x + y * stride;
950             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
951                                      16);
952             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
953             int sae  = get_sae(src + offset, mean, stride);
954
955             acc += sae + 500 < sad;
956         }
957     }
958     return acc;
959 }
960
961
962 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg)
963 {
964     AVFrame *pic = NULL;
965     int64_t pts;
966     int i;
967     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
968                                                  (s->low_delay ? 0 : 1);
969     int direct = 1;
970
971     if (pic_arg) {
972         pts = pic_arg->pts;
973         pic_arg->display_picture_number = s->input_picture_number++;
974
975         if (pts != AV_NOPTS_VALUE) {
976             if (s->user_specified_pts != AV_NOPTS_VALUE) {
977                 int64_t time = pts;
978                 int64_t last = s->user_specified_pts;
979
980                 if (time <= last) {
981                     av_log(s->avctx, AV_LOG_ERROR,
982                            "Error, Invalid timestamp=%"PRId64", "
983                            "last=%"PRId64"\n", pts, s->user_specified_pts);
984                     return -1;
985                 }
986
987                 if (!s->low_delay && pic_arg->display_picture_number == 1)
988                     s->dts_delta = time - last;
989             }
990             s->user_specified_pts = pts;
991         } else {
992             if (s->user_specified_pts != AV_NOPTS_VALUE) {
993                 s->user_specified_pts =
994                 pts = s->user_specified_pts + 1;
995                 av_log(s->avctx, AV_LOG_INFO,
996                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
997                        pts);
998             } else {
999                 pts = pic_arg->display_picture_number;
1000             }
1001         }
1002     }
1003
1004   if (pic_arg) {
1005     if (encoding_delay && !(s->flags & CODEC_FLAG_INPUT_PRESERVED))
1006         direct = 0;
1007     if (pic_arg->linesize[0] != s->linesize)
1008         direct = 0;
1009     if (pic_arg->linesize[1] != s->uvlinesize)
1010         direct = 0;
1011     if (pic_arg->linesize[2] != s->uvlinesize)
1012         direct = 0;
1013
1014     av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
1015             pic_arg->linesize[1], s->linesize, s->uvlinesize);
1016
1017     if (direct) {
1018         i = ff_find_unused_picture(s, 1);
1019         if (i < 0)
1020             return i;
1021
1022         pic = &s->picture[i].f;
1023         pic->reference = 3;
1024
1025         for (i = 0; i < 4; i++) {
1026             pic->data[i]     = pic_arg->data[i];
1027             pic->linesize[i] = pic_arg->linesize[i];
1028         }
1029         if (ff_alloc_picture(s, (Picture *) pic, 1) < 0) {
1030             return -1;
1031         }
1032     } else {
1033         i = ff_find_unused_picture(s, 0);
1034         if (i < 0)
1035             return i;
1036
1037         pic = &s->picture[i].f;
1038         pic->reference = 3;
1039
1040         if (ff_alloc_picture(s, (Picture *) pic, 0) < 0) {
1041             return -1;
1042         }
1043
1044         if (pic->data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1045             pic->data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1046             pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1047             // empty
1048         } else {
1049             int h_chroma_shift, v_chroma_shift;
1050             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift,
1051                                           &v_chroma_shift);
1052
1053             for (i = 0; i < 3; i++) {
1054                 int src_stride = pic_arg->linesize[i];
1055                 int dst_stride = i ? s->uvlinesize : s->linesize;
1056                 int h_shift = i ? h_chroma_shift : 0;
1057                 int v_shift = i ? v_chroma_shift : 0;
1058                 int w = s->width  >> h_shift;
1059                 int h = s->height >> v_shift;
1060                 uint8_t *src = pic_arg->data[i];
1061                 uint8_t *dst = pic->data[i];
1062
1063                 if(s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)){
1064                     h= ((s->height+15)/16*16)>>v_shift;
1065                 }
1066
1067                 if (!s->avctx->rc_buffer_size)
1068                     dst += INPLACE_OFFSET;
1069
1070                 if (src_stride == dst_stride)
1071                     memcpy(dst, src, src_stride * h);
1072                 else {
1073                     while (h--) {
1074                         memcpy(dst, src, w);
1075                         dst += dst_stride;
1076                         src += src_stride;
1077                     }
1078                 }
1079             }
1080         }
1081     }
1082     copy_picture_attributes(s, pic, pic_arg);
1083     pic->pts = pts; // we set this here to avoid modifiying pic_arg
1084   }
1085
1086     /* shift buffer entries */
1087     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1088         s->input_picture[i - 1] = s->input_picture[i];
1089
1090     s->input_picture[encoding_delay] = (Picture*) pic;
1091
1092     return 0;
1093 }
1094
1095 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1096 {
1097     int x, y, plane;
1098     int score = 0;
1099     int64_t score64 = 0;
1100
1101     for (plane = 0; plane < 3; plane++) {
1102         const int stride = p->f.linesize[plane];
1103         const int bw = plane ? 1 : 2;
1104         for (y = 0; y < s->mb_height * bw; y++) {
1105             for (x = 0; x < s->mb_width * bw; x++) {
1106                 int off = p->f.type == FF_BUFFER_TYPE_SHARED ? 0 : 16;
1107                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1108                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1109                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1110
1111                 switch (s->avctx->frame_skip_exp) {
1112                 case 0: score    =  FFMAX(score, v);          break;
1113                 case 1: score   += FFABS(v);                  break;
1114                 case 2: score   += v * v;                     break;
1115                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1116                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1117                 }
1118             }
1119         }
1120     }
1121
1122     if (score)
1123         score64 = score;
1124
1125     if (score64 < s->avctx->frame_skip_threshold)
1126         return 1;
1127     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1128         return 1;
1129     return 0;
1130 }
1131
1132 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1133 {
1134     AVPacket pkt = { 0 };
1135     int ret, got_output;
1136
1137     av_init_packet(&pkt);
1138     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1139     if (ret < 0)
1140         return ret;
1141
1142     ret = pkt.size;
1143     av_free_packet(&pkt);
1144     return ret;
1145 }
1146
1147 static int estimate_best_b_count(MpegEncContext *s)
1148 {
1149     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1150     AVCodecContext *c = avcodec_alloc_context3(NULL);
1151     AVFrame input[FF_MAX_B_FRAMES + 2];
1152     const int scale = s->avctx->brd_scale;
1153     int i, j, out_size, p_lambda, b_lambda, lambda2;
1154     int64_t best_rd  = INT64_MAX;
1155     int best_b_count = -1;
1156
1157     av_assert0(scale >= 0 && scale <= 3);
1158
1159     //emms_c();
1160     //s->next_picture_ptr->quality;
1161     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1162     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1163     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1164     if (!b_lambda) // FIXME we should do this somewhere else
1165         b_lambda = p_lambda;
1166     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1167                FF_LAMBDA_SHIFT;
1168
1169     c->width        = s->width  >> scale;
1170     c->height       = s->height >> scale;
1171     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1172                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1173     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1174     c->mb_decision  = s->avctx->mb_decision;
1175     c->me_cmp       = s->avctx->me_cmp;
1176     c->mb_cmp       = s->avctx->mb_cmp;
1177     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1178     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1179     c->time_base    = s->avctx->time_base;
1180     c->max_b_frames = s->max_b_frames;
1181
1182     if (avcodec_open2(c, codec, NULL) < 0)
1183         return -1;
1184
1185     for (i = 0; i < s->max_b_frames + 2; i++) {
1186         int ysize = c->width * c->height;
1187         int csize = (c->width / 2) * (c->height / 2);
1188         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1189                                                 s->next_picture_ptr;
1190
1191         avcodec_get_frame_defaults(&input[i]);
1192         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1193         input[i].data[1]     = input[i].data[0] + ysize;
1194         input[i].data[2]     = input[i].data[1] + csize;
1195         input[i].linesize[0] = c->width;
1196         input[i].linesize[1] =
1197         input[i].linesize[2] = c->width / 2;
1198
1199         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1200             pre_input = *pre_input_ptr;
1201
1202             if (pre_input.f.type != FF_BUFFER_TYPE_SHARED && i) {
1203                 pre_input.f.data[0] += INPLACE_OFFSET;
1204                 pre_input.f.data[1] += INPLACE_OFFSET;
1205                 pre_input.f.data[2] += INPLACE_OFFSET;
1206             }
1207
1208             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1209                                  pre_input.f.data[0], pre_input.f.linesize[0],
1210                                  c->width,      c->height);
1211             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1212                                  pre_input.f.data[1], pre_input.f.linesize[1],
1213                                  c->width >> 1, c->height >> 1);
1214             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1215                                  pre_input.f.data[2], pre_input.f.linesize[2],
1216                                  c->width >> 1, c->height >> 1);
1217         }
1218     }
1219
1220     for (j = 0; j < s->max_b_frames + 1; j++) {
1221         int64_t rd = 0;
1222
1223         if (!s->input_picture[j])
1224             break;
1225
1226         c->error[0] = c->error[1] = c->error[2] = 0;
1227
1228         input[0].pict_type = AV_PICTURE_TYPE_I;
1229         input[0].quality   = 1 * FF_QP2LAMBDA;
1230
1231         out_size = encode_frame(c, &input[0]);
1232
1233         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1234
1235         for (i = 0; i < s->max_b_frames + 1; i++) {
1236             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1237
1238             input[i + 1].pict_type = is_p ?
1239                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1240             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1241
1242             out_size = encode_frame(c, &input[i + 1]);
1243
1244             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1245         }
1246
1247         /* get the delayed frames */
1248         while (out_size) {
1249             out_size = encode_frame(c, NULL);
1250             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1251         }
1252
1253         rd += c->error[0] + c->error[1] + c->error[2];
1254
1255         if (rd < best_rd) {
1256             best_rd = rd;
1257             best_b_count = j;
1258         }
1259     }
1260
1261     avcodec_close(c);
1262     av_freep(&c);
1263
1264     for (i = 0; i < s->max_b_frames + 2; i++) {
1265         av_freep(&input[i].data[0]);
1266     }
1267
1268     return best_b_count;
1269 }
1270
1271 static int select_input_picture(MpegEncContext *s)
1272 {
1273     int i;
1274
1275     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1276         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1277     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1278
1279     /* set next picture type & ordering */
1280     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1281         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1282             s->next_picture_ptr == NULL || s->intra_only) {
1283             s->reordered_input_picture[0] = s->input_picture[0];
1284             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1285             s->reordered_input_picture[0]->f.coded_picture_number =
1286                 s->coded_picture_number++;
1287         } else {
1288             int b_frames;
1289
1290             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1291                 if (s->picture_in_gop_number < s->gop_size &&
1292                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1293                     // FIXME check that te gop check above is +-1 correct
1294                     if (s->input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED) {
1295                         for (i = 0; i < 4; i++)
1296                             s->input_picture[0]->f.data[i] = NULL;
1297                         s->input_picture[0]->f.type = 0;
1298                     } else {
1299                         assert(s->input_picture[0]->f.type == FF_BUFFER_TYPE_USER ||
1300                                s->input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL);
1301
1302                         s->avctx->release_buffer(s->avctx,
1303                                                  &s->input_picture[0]->f);
1304                     }
1305
1306                     emms_c();
1307                     ff_vbv_update(s, 0);
1308
1309                     goto no_output_pic;
1310                 }
1311             }
1312
1313             if (s->flags & CODEC_FLAG_PASS2) {
1314                 for (i = 0; i < s->max_b_frames + 1; i++) {
1315                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1316
1317                     if (pict_num >= s->rc_context.num_entries)
1318                         break;
1319                     if (!s->input_picture[i]) {
1320                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1321                         break;
1322                     }
1323
1324                     s->input_picture[i]->f.pict_type =
1325                         s->rc_context.entry[pict_num].new_pict_type;
1326                 }
1327             }
1328
1329             if (s->avctx->b_frame_strategy == 0) {
1330                 b_frames = s->max_b_frames;
1331                 while (b_frames && !s->input_picture[b_frames])
1332                     b_frames--;
1333             } else if (s->avctx->b_frame_strategy == 1) {
1334                 for (i = 1; i < s->max_b_frames + 1; i++) {
1335                     if (s->input_picture[i] &&
1336                         s->input_picture[i]->b_frame_score == 0) {
1337                         s->input_picture[i]->b_frame_score =
1338                             get_intra_count(s,
1339                                             s->input_picture[i    ]->f.data[0],
1340                                             s->input_picture[i - 1]->f.data[0],
1341                                             s->linesize) + 1;
1342                     }
1343                 }
1344                 for (i = 0; i < s->max_b_frames + 1; i++) {
1345                     if (s->input_picture[i] == NULL ||
1346                         s->input_picture[i]->b_frame_score - 1 >
1347                             s->mb_num / s->avctx->b_sensitivity)
1348                         break;
1349                 }
1350
1351                 b_frames = FFMAX(0, i - 1);
1352
1353                 /* reset scores */
1354                 for (i = 0; i < b_frames + 1; i++) {
1355                     s->input_picture[i]->b_frame_score = 0;
1356                 }
1357             } else if (s->avctx->b_frame_strategy == 2) {
1358                 b_frames = estimate_best_b_count(s);
1359             } else {
1360                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1361                 b_frames = 0;
1362             }
1363
1364             emms_c();
1365
1366             for (i = b_frames - 1; i >= 0; i--) {
1367                 int type = s->input_picture[i]->f.pict_type;
1368                 if (type && type != AV_PICTURE_TYPE_B)
1369                     b_frames = i;
1370             }
1371             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1372                 b_frames == s->max_b_frames) {
1373                 av_log(s->avctx, AV_LOG_ERROR,
1374                        "warning, too many b frames in a row\n");
1375             }
1376
1377             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1378                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1379                     s->gop_size > s->picture_in_gop_number) {
1380                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1381                 } else {
1382                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1383                         b_frames = 0;
1384                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1385                 }
1386             }
1387
1388             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1389                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1390                 b_frames--;
1391
1392             s->reordered_input_picture[0] = s->input_picture[b_frames];
1393             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1394                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1395             s->reordered_input_picture[0]->f.coded_picture_number =
1396                 s->coded_picture_number++;
1397             for (i = 0; i < b_frames; i++) {
1398                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1399                 s->reordered_input_picture[i + 1]->f.pict_type =
1400                     AV_PICTURE_TYPE_B;
1401                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1402                     s->coded_picture_number++;
1403             }
1404         }
1405     }
1406 no_output_pic:
1407     if (s->reordered_input_picture[0]) {
1408         s->reordered_input_picture[0]->f.reference =
1409            s->reordered_input_picture[0]->f.pict_type !=
1410                AV_PICTURE_TYPE_B ? 3 : 0;
1411
1412         ff_copy_picture(&s->new_picture, s->reordered_input_picture[0]);
1413
1414         if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_SHARED ||
1415             s->avctx->rc_buffer_size) {
1416             // input is a shared pix, so we can't modifiy it -> alloc a new
1417             // one & ensure that the shared one is reuseable
1418
1419             Picture *pic;
1420             int i = ff_find_unused_picture(s, 0);
1421             if (i < 0)
1422                 return i;
1423             pic = &s->picture[i];
1424
1425             pic->f.reference = s->reordered_input_picture[0]->f.reference;
1426             if (ff_alloc_picture(s, pic, 0) < 0) {
1427                 return -1;
1428             }
1429
1430             /* mark us unused / free shared pic */
1431             if (s->reordered_input_picture[0]->f.type == FF_BUFFER_TYPE_INTERNAL)
1432                 s->avctx->release_buffer(s->avctx,
1433                                          &s->reordered_input_picture[0]->f);
1434             for (i = 0; i < 4; i++)
1435                 s->reordered_input_picture[0]->f.data[i] = NULL;
1436             s->reordered_input_picture[0]->f.type = 0;
1437
1438             copy_picture_attributes(s, &pic->f,
1439                                     &s->reordered_input_picture[0]->f);
1440
1441             s->current_picture_ptr = pic;
1442         } else {
1443             // input is not a shared pix -> reuse buffer for current_pix
1444
1445             assert(s->reordered_input_picture[0]->f.type ==
1446                        FF_BUFFER_TYPE_USER ||
1447                    s->reordered_input_picture[0]->f.type ==
1448                        FF_BUFFER_TYPE_INTERNAL);
1449
1450             s->current_picture_ptr = s->reordered_input_picture[0];
1451             for (i = 0; i < 4; i++) {
1452                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1453             }
1454         }
1455         ff_copy_picture(&s->current_picture, s->current_picture_ptr);
1456
1457         s->picture_number = s->new_picture.f.display_picture_number;
1458     } else {
1459         memset(&s->new_picture, 0, sizeof(Picture));
1460     }
1461     return 0;
1462 }
1463
1464 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1465                           AVFrame *pic_arg, int *got_packet)
1466 {
1467     MpegEncContext *s = avctx->priv_data;
1468     int i, stuffing_count, ret;
1469     int context_count = s->slice_context_count;
1470
1471     s->picture_in_gop_number++;
1472
1473     if (load_input_picture(s, pic_arg) < 0)
1474         return -1;
1475
1476     if (select_input_picture(s) < 0) {
1477         return -1;
1478     }
1479
1480     /* output? */
1481     if (s->new_picture.f.data[0]) {
1482         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1483             return ret;
1484         if (s->mb_info) {
1485             s->mb_info_ptr = av_packet_new_side_data(pkt,
1486                                  AV_PKT_DATA_H263_MB_INFO,
1487                                  s->mb_width*s->mb_height*12);
1488             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1489         }
1490
1491         for (i = 0; i < context_count; i++) {
1492             int start_y = s->thread_context[i]->start_mb_y;
1493             int   end_y = s->thread_context[i]->  end_mb_y;
1494             int h       = s->mb_height;
1495             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1496             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1497
1498             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1499         }
1500
1501         s->pict_type = s->new_picture.f.pict_type;
1502         //emms_c();
1503         ff_MPV_frame_start(s, avctx);
1504 vbv_retry:
1505         if (encode_picture(s, s->picture_number) < 0)
1506             return -1;
1507
1508         avctx->header_bits = s->header_bits;
1509         avctx->mv_bits     = s->mv_bits;
1510         avctx->misc_bits   = s->misc_bits;
1511         avctx->i_tex_bits  = s->i_tex_bits;
1512         avctx->p_tex_bits  = s->p_tex_bits;
1513         avctx->i_count     = s->i_count;
1514         // FIXME f/b_count in avctx
1515         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1516         avctx->skip_count  = s->skip_count;
1517
1518         ff_MPV_frame_end(s);
1519
1520         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1521             ff_mjpeg_encode_picture_trailer(s);
1522
1523         if (avctx->rc_buffer_size) {
1524             RateControlContext *rcc = &s->rc_context;
1525             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1526
1527             if (put_bits_count(&s->pb) > max_size &&
1528                 s->lambda < s->avctx->lmax) {
1529                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1530                                        (s->qscale + 1) / s->qscale);
1531                 if (s->adaptive_quant) {
1532                     int i;
1533                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1534                         s->lambda_table[i] =
1535                             FFMAX(s->lambda_table[i] + 1,
1536                                   s->lambda_table[i] * (s->qscale + 1) /
1537                                   s->qscale);
1538                 }
1539                 s->mb_skipped = 0;        // done in MPV_frame_start()
1540                 // done in encode_picture() so we must undo it
1541                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1542                     if (s->flipflop_rounding          ||
1543                         s->codec_id == AV_CODEC_ID_H263P ||
1544                         s->codec_id == AV_CODEC_ID_MPEG4)
1545                         s->no_rounding ^= 1;
1546                 }
1547                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1548                     s->time_base       = s->last_time_base;
1549                     s->last_non_b_time = s->time - s->pp_time;
1550                 }
1551                 for (i = 0; i < context_count; i++) {
1552                     PutBitContext *pb = &s->thread_context[i]->pb;
1553                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1554                 }
1555                 goto vbv_retry;
1556             }
1557
1558             assert(s->avctx->rc_max_rate);
1559         }
1560
1561         if (s->flags & CODEC_FLAG_PASS1)
1562             ff_write_pass1_stats(s);
1563
1564         for (i = 0; i < 4; i++) {
1565             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1566             avctx->error[i] += s->current_picture_ptr->f.error[i];
1567         }
1568
1569         if (s->flags & CODEC_FLAG_PASS1)
1570             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1571                    avctx->i_tex_bits + avctx->p_tex_bits ==
1572                        put_bits_count(&s->pb));
1573         flush_put_bits(&s->pb);
1574         s->frame_bits  = put_bits_count(&s->pb);
1575
1576         stuffing_count = ff_vbv_update(s, s->frame_bits);
1577         s->stuffing_bits = 8*stuffing_count;
1578         if (stuffing_count) {
1579             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1580                     stuffing_count + 50) {
1581                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1582                 return -1;
1583             }
1584
1585             switch (s->codec_id) {
1586             case AV_CODEC_ID_MPEG1VIDEO:
1587             case AV_CODEC_ID_MPEG2VIDEO:
1588                 while (stuffing_count--) {
1589                     put_bits(&s->pb, 8, 0);
1590                 }
1591             break;
1592             case AV_CODEC_ID_MPEG4:
1593                 put_bits(&s->pb, 16, 0);
1594                 put_bits(&s->pb, 16, 0x1C3);
1595                 stuffing_count -= 4;
1596                 while (stuffing_count--) {
1597                     put_bits(&s->pb, 8, 0xFF);
1598                 }
1599             break;
1600             default:
1601                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1602             }
1603             flush_put_bits(&s->pb);
1604             s->frame_bits  = put_bits_count(&s->pb);
1605         }
1606
1607         /* update mpeg1/2 vbv_delay for CBR */
1608         if (s->avctx->rc_max_rate                          &&
1609             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1610             s->out_format == FMT_MPEG1                     &&
1611             90000LL * (avctx->rc_buffer_size - 1) <=
1612                 s->avctx->rc_max_rate * 0xFFFFLL) {
1613             int vbv_delay, min_delay;
1614             double inbits  = s->avctx->rc_max_rate *
1615                              av_q2d(s->avctx->time_base);
1616             int    minbits = s->frame_bits - 8 *
1617                              (s->vbv_delay_ptr - s->pb.buf - 1);
1618             double bits    = s->rc_context.buffer_index + minbits - inbits;
1619
1620             if (bits < 0)
1621                 av_log(s->avctx, AV_LOG_ERROR,
1622                        "Internal error, negative bits\n");
1623
1624             assert(s->repeat_first_field == 0);
1625
1626             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1627             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1628                         s->avctx->rc_max_rate;
1629
1630             vbv_delay = FFMAX(vbv_delay, min_delay);
1631
1632             av_assert0(vbv_delay < 0xFFFF);
1633
1634             s->vbv_delay_ptr[0] &= 0xF8;
1635             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1636             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1637             s->vbv_delay_ptr[2] &= 0x07;
1638             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1639             avctx->vbv_delay     = vbv_delay * 300;
1640         }
1641         s->total_bits     += s->frame_bits;
1642         avctx->frame_bits  = s->frame_bits;
1643
1644         pkt->pts = s->current_picture.f.pts;
1645         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1646             if (!s->current_picture.f.coded_picture_number)
1647                 pkt->dts = pkt->pts - s->dts_delta;
1648             else
1649                 pkt->dts = s->reordered_pts;
1650             s->reordered_pts = pkt->pts;
1651         } else
1652             pkt->dts = pkt->pts;
1653         if (s->current_picture.f.key_frame)
1654             pkt->flags |= AV_PKT_FLAG_KEY;
1655         if (s->mb_info)
1656             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1657     } else {
1658         s->frame_bits = 0;
1659     }
1660     assert((s->frame_bits & 7) == 0);
1661
1662     pkt->size = s->frame_bits / 8;
1663     *got_packet = !!pkt->size;
1664     return 0;
1665 }
1666
1667 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1668                                                 int n, int threshold)
1669 {
1670     static const char tab[64] = {
1671         3, 2, 2, 1, 1, 1, 1, 1,
1672         1, 1, 1, 1, 1, 1, 1, 1,
1673         1, 1, 1, 1, 1, 1, 1, 1,
1674         0, 0, 0, 0, 0, 0, 0, 0,
1675         0, 0, 0, 0, 0, 0, 0, 0,
1676         0, 0, 0, 0, 0, 0, 0, 0,
1677         0, 0, 0, 0, 0, 0, 0, 0,
1678         0, 0, 0, 0, 0, 0, 0, 0
1679     };
1680     int score = 0;
1681     int run = 0;
1682     int i;
1683     DCTELEM *block = s->block[n];
1684     const int last_index = s->block_last_index[n];
1685     int skip_dc;
1686
1687     if (threshold < 0) {
1688         skip_dc = 0;
1689         threshold = -threshold;
1690     } else
1691         skip_dc = 1;
1692
1693     /* Are all we could set to zero already zero? */
1694     if (last_index <= skip_dc - 1)
1695         return;
1696
1697     for (i = 0; i <= last_index; i++) {
1698         const int j = s->intra_scantable.permutated[i];
1699         const int level = FFABS(block[j]);
1700         if (level == 1) {
1701             if (skip_dc && i == 0)
1702                 continue;
1703             score += tab[run];
1704             run = 0;
1705         } else if (level > 1) {
1706             return;
1707         } else {
1708             run++;
1709         }
1710     }
1711     if (score >= threshold)
1712         return;
1713     for (i = skip_dc; i <= last_index; i++) {
1714         const int j = s->intra_scantable.permutated[i];
1715         block[j] = 0;
1716     }
1717     if (block[0])
1718         s->block_last_index[n] = 0;
1719     else
1720         s->block_last_index[n] = -1;
1721 }
1722
1723 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block,
1724                                int last_index)
1725 {
1726     int i;
1727     const int maxlevel = s->max_qcoeff;
1728     const int minlevel = s->min_qcoeff;
1729     int overflow = 0;
1730
1731     if (s->mb_intra) {
1732         i = 1; // skip clipping of intra dc
1733     } else
1734         i = 0;
1735
1736     for (; i <= last_index; i++) {
1737         const int j = s->intra_scantable.permutated[i];
1738         int level = block[j];
1739
1740         if (level > maxlevel) {
1741             level = maxlevel;
1742             overflow++;
1743         } else if (level < minlevel) {
1744             level = minlevel;
1745             overflow++;
1746         }
1747
1748         block[j] = level;
1749     }
1750
1751     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1752         av_log(s->avctx, AV_LOG_INFO,
1753                "warning, clipping %d dct coefficients to %d..%d\n",
1754                overflow, minlevel, maxlevel);
1755 }
1756
1757 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1758 {
1759     int x, y;
1760     // FIXME optimize
1761     for (y = 0; y < 8; y++) {
1762         for (x = 0; x < 8; x++) {
1763             int x2, y2;
1764             int sum = 0;
1765             int sqr = 0;
1766             int count = 0;
1767
1768             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1769                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1770                     int v = ptr[x2 + y2 * stride];
1771                     sum += v;
1772                     sqr += v * v;
1773                     count++;
1774                 }
1775             }
1776             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1777         }
1778     }
1779 }
1780
1781 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1782                                                 int motion_x, int motion_y,
1783                                                 int mb_block_height,
1784                                                 int mb_block_count)
1785 {
1786     int16_t weight[8][64];
1787     DCTELEM orig[8][64];
1788     const int mb_x = s->mb_x;
1789     const int mb_y = s->mb_y;
1790     int i;
1791     int skip_dct[8];
1792     int dct_offset = s->linesize * 8; // default for progressive frames
1793     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1794     int wrap_y, wrap_c;
1795
1796     for (i = 0; i < mb_block_count; i++)
1797         skip_dct[i] = s->skipdct;
1798
1799     if (s->adaptive_quant) {
1800         const int last_qp = s->qscale;
1801         const int mb_xy = mb_x + mb_y * s->mb_stride;
1802
1803         s->lambda = s->lambda_table[mb_xy];
1804         update_qscale(s);
1805
1806         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1807             s->qscale = s->current_picture_ptr->f.qscale_table[mb_xy];
1808             s->dquant = s->qscale - last_qp;
1809
1810             if (s->out_format == FMT_H263) {
1811                 s->dquant = av_clip(s->dquant, -2, 2);
1812
1813                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1814                     if (!s->mb_intra) {
1815                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1816                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1817                                 s->dquant = 0;
1818                         }
1819                         if (s->mv_type == MV_TYPE_8X8)
1820                             s->dquant = 0;
1821                     }
1822                 }
1823             }
1824         }
1825         ff_set_qscale(s, last_qp + s->dquant);
1826     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1827         ff_set_qscale(s, s->qscale + s->dquant);
1828
1829     wrap_y = s->linesize;
1830     wrap_c = s->uvlinesize;
1831     ptr_y  = s->new_picture.f.data[0] +
1832              (mb_y * 16 * wrap_y)              + mb_x * 16;
1833     ptr_cb = s->new_picture.f.data[1] +
1834              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1835     ptr_cr = s->new_picture.f.data[2] +
1836              (mb_y * mb_block_height * wrap_c) + mb_x * 8;
1837
1838     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1839         uint8_t *ebuf = s->edge_emu_buffer + 32;
1840         s->dsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1841                                 mb_y * 16, s->width, s->height);
1842         ptr_y = ebuf;
1843         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, 8,
1844                                 mb_block_height, mb_x * 8, mb_y * 8,
1845                                 (s->width+1) >> 1, (s->height+1) >> 1);
1846         ptr_cb = ebuf + 18 * wrap_y;
1847         s->dsp.emulated_edge_mc(ebuf + 18 * wrap_y + 8, ptr_cr, wrap_c, 8,
1848                                 mb_block_height, mb_x * 8, mb_y * 8,
1849                                 (s->width+1) >> 1, (s->height+1) >> 1);
1850         ptr_cr = ebuf + 18 * wrap_y + 8;
1851     }
1852
1853     if (s->mb_intra) {
1854         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1855             int progressive_score, interlaced_score;
1856
1857             s->interlaced_dct = 0;
1858             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1859                                                     NULL, wrap_y, 8) +
1860                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1861                                                     NULL, wrap_y, 8) - 400;
1862
1863             if (progressive_score > 0) {
1864                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1865                                                        NULL, wrap_y * 2, 8) +
1866                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1867                                                        NULL, wrap_y * 2, 8);
1868                 if (progressive_score > interlaced_score) {
1869                     s->interlaced_dct = 1;
1870
1871                     dct_offset = wrap_y;
1872                     wrap_y <<= 1;
1873                     if (s->chroma_format == CHROMA_422)
1874                         wrap_c <<= 1;
1875                 }
1876             }
1877         }
1878
1879         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1880         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1881         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1882         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1883
1884         if (s->flags & CODEC_FLAG_GRAY) {
1885             skip_dct[4] = 1;
1886             skip_dct[5] = 1;
1887         } else {
1888             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1889             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1890             if (!s->chroma_y_shift) { /* 422 */
1891                 s->dsp.get_pixels(s->block[6],
1892                                   ptr_cb + (dct_offset >> 1), wrap_c);
1893                 s->dsp.get_pixels(s->block[7],
1894                                   ptr_cr + (dct_offset >> 1), wrap_c);
1895             }
1896         }
1897     } else {
1898         op_pixels_func (*op_pix)[4];
1899         qpel_mc_func (*op_qpix)[16];
1900         uint8_t *dest_y, *dest_cb, *dest_cr;
1901
1902         dest_y  = s->dest[0];
1903         dest_cb = s->dest[1];
1904         dest_cr = s->dest[2];
1905
1906         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1907             op_pix  = s->dsp.put_pixels_tab;
1908             op_qpix = s->dsp.put_qpel_pixels_tab;
1909         } else {
1910             op_pix  = s->dsp.put_no_rnd_pixels_tab;
1911             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1912         }
1913
1914         if (s->mv_dir & MV_DIR_FORWARD) {
1915             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1916                           s->last_picture.f.data,
1917                           op_pix, op_qpix);
1918             op_pix  = s->dsp.avg_pixels_tab;
1919             op_qpix = s->dsp.avg_qpel_pixels_tab;
1920         }
1921         if (s->mv_dir & MV_DIR_BACKWARD) {
1922             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1923                           s->next_picture.f.data,
1924                           op_pix, op_qpix);
1925         }
1926
1927         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1928             int progressive_score, interlaced_score;
1929
1930             s->interlaced_dct = 0;
1931             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1932                                                     ptr_y,              wrap_y,
1933                                                     8) +
1934                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1935                                                     ptr_y + wrap_y * 8, wrap_y,
1936                                                     8) - 400;
1937
1938             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1939                 progressive_score -= 400;
1940
1941             if (progressive_score > 0) {
1942                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1943                                                        ptr_y,
1944                                                        wrap_y * 2, 8) +
1945                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1946                                                        ptr_y + wrap_y,
1947                                                        wrap_y * 2, 8);
1948
1949                 if (progressive_score > interlaced_score) {
1950                     s->interlaced_dct = 1;
1951
1952                     dct_offset = wrap_y;
1953                     wrap_y <<= 1;
1954                     if (s->chroma_format == CHROMA_422)
1955                         wrap_c <<= 1;
1956                 }
1957             }
1958         }
1959
1960         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1961         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1962         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1963                            dest_y + dct_offset, wrap_y);
1964         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1965                            dest_y + dct_offset + 8, wrap_y);
1966
1967         if (s->flags & CODEC_FLAG_GRAY) {
1968             skip_dct[4] = 1;
1969             skip_dct[5] = 1;
1970         } else {
1971             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1972             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1973             if (!s->chroma_y_shift) { /* 422 */
1974                 s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset >> 1),
1975                                    dest_cb + (dct_offset >> 1), wrap_c);
1976                 s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset >> 1),
1977                                    dest_cr + (dct_offset >> 1), wrap_c);
1978             }
1979         }
1980         /* pre quantization */
1981         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1982                 2 * s->qscale * s->qscale) {
1983             // FIXME optimize
1984             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1985                               wrap_y, 8) < 20 * s->qscale)
1986                 skip_dct[0] = 1;
1987             if (s->dsp.sad[1](NULL, ptr_y + 8,
1988                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1989                 skip_dct[1] = 1;
1990             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1991                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1992                 skip_dct[2] = 1;
1993             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1994                               dest_y + dct_offset + 8,
1995                               wrap_y, 8) < 20 * s->qscale)
1996                 skip_dct[3] = 1;
1997             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1998                               wrap_c, 8) < 20 * s->qscale)
1999                 skip_dct[4] = 1;
2000             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
2001                               wrap_c, 8) < 20 * s->qscale)
2002                 skip_dct[5] = 1;
2003             if (!s->chroma_y_shift) { /* 422 */
2004                 if (s->dsp.sad[1](NULL, ptr_cb + (dct_offset >> 1),
2005                                   dest_cb + (dct_offset >> 1),
2006                                   wrap_c, 8) < 20 * s->qscale)
2007                     skip_dct[6] = 1;
2008                 if (s->dsp.sad[1](NULL, ptr_cr + (dct_offset >> 1),
2009                                   dest_cr + (dct_offset >> 1),
2010                                   wrap_c, 8) < 20 * s->qscale)
2011                     skip_dct[7] = 1;
2012             }
2013         }
2014     }
2015
2016     if (s->quantizer_noise_shaping) {
2017         if (!skip_dct[0])
2018             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2019         if (!skip_dct[1])
2020             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2021         if (!skip_dct[2])
2022             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2023         if (!skip_dct[3])
2024             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2025         if (!skip_dct[4])
2026             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2027         if (!skip_dct[5])
2028             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2029         if (!s->chroma_y_shift) { /* 422 */
2030             if (!skip_dct[6])
2031                 get_visual_weight(weight[6], ptr_cb + (dct_offset >> 1),
2032                                   wrap_c);
2033             if (!skip_dct[7])
2034                 get_visual_weight(weight[7], ptr_cr + (dct_offset >> 1),
2035                                   wrap_c);
2036         }
2037         memcpy(orig[0], s->block[0], sizeof(DCTELEM) * 64 * mb_block_count);
2038     }
2039
2040     /* DCT & quantize */
2041     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2042     {
2043         for (i = 0; i < mb_block_count; i++) {
2044             if (!skip_dct[i]) {
2045                 int overflow;
2046                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2047                 // FIXME we could decide to change to quantizer instead of
2048                 // clipping
2049                 // JS: I don't think that would be a good idea it could lower
2050                 //     quality instead of improve it. Just INTRADC clipping
2051                 //     deserves changes in quantizer
2052                 if (overflow)
2053                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2054             } else
2055                 s->block_last_index[i] = -1;
2056         }
2057         if (s->quantizer_noise_shaping) {
2058             for (i = 0; i < mb_block_count; i++) {
2059                 if (!skip_dct[i]) {
2060                     s->block_last_index[i] =
2061                         dct_quantize_refine(s, s->block[i], weight[i],
2062                                             orig[i], i, s->qscale);
2063                 }
2064             }
2065         }
2066
2067         if (s->luma_elim_threshold && !s->mb_intra)
2068             for (i = 0; i < 4; i++)
2069                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2070         if (s->chroma_elim_threshold && !s->mb_intra)
2071             for (i = 4; i < mb_block_count; i++)
2072                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2073
2074         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2075             for (i = 0; i < mb_block_count; i++) {
2076                 if (s->block_last_index[i] == -1)
2077                     s->coded_score[i] = INT_MAX / 256;
2078             }
2079         }
2080     }
2081
2082     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2083         s->block_last_index[4] =
2084         s->block_last_index[5] = 0;
2085         s->block[4][0] =
2086         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2087     }
2088
2089     // non c quantize code returns incorrect block_last_index FIXME
2090     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2091         for (i = 0; i < mb_block_count; i++) {
2092             int j;
2093             if (s->block_last_index[i] > 0) {
2094                 for (j = 63; j > 0; j--) {
2095                     if (s->block[i][s->intra_scantable.permutated[j]])
2096                         break;
2097                 }
2098                 s->block_last_index[i] = j;
2099             }
2100         }
2101     }
2102
2103     /* huffman encode */
2104     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2105     case AV_CODEC_ID_MPEG1VIDEO:
2106     case AV_CODEC_ID_MPEG2VIDEO:
2107         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2108             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2109         break;
2110     case AV_CODEC_ID_MPEG4:
2111         if (CONFIG_MPEG4_ENCODER)
2112             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2113         break;
2114     case AV_CODEC_ID_MSMPEG4V2:
2115     case AV_CODEC_ID_MSMPEG4V3:
2116     case AV_CODEC_ID_WMV1:
2117         if (CONFIG_MSMPEG4_ENCODER)
2118             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2119         break;
2120     case AV_CODEC_ID_WMV2:
2121         if (CONFIG_WMV2_ENCODER)
2122             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2123         break;
2124     case AV_CODEC_ID_H261:
2125         if (CONFIG_H261_ENCODER)
2126             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2127         break;
2128     case AV_CODEC_ID_H263:
2129     case AV_CODEC_ID_H263P:
2130     case AV_CODEC_ID_FLV1:
2131     case AV_CODEC_ID_RV10:
2132     case AV_CODEC_ID_RV20:
2133         if (CONFIG_H263_ENCODER)
2134             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2135         break;
2136     case AV_CODEC_ID_MJPEG:
2137     case AV_CODEC_ID_AMV:
2138         if (CONFIG_MJPEG_ENCODER)
2139             ff_mjpeg_encode_mb(s, s->block);
2140         break;
2141     default:
2142         av_assert1(0);
2143     }
2144 }
2145
2146 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2147 {
2148     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 6);
2149     else                                encode_mb_internal(s, motion_x, motion_y, 16, 8);
2150 }
2151
2152 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2153     int i;
2154
2155     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2156
2157     /* mpeg1 */
2158     d->mb_skip_run= s->mb_skip_run;
2159     for(i=0; i<3; i++)
2160         d->last_dc[i] = s->last_dc[i];
2161
2162     /* statistics */
2163     d->mv_bits= s->mv_bits;
2164     d->i_tex_bits= s->i_tex_bits;
2165     d->p_tex_bits= s->p_tex_bits;
2166     d->i_count= s->i_count;
2167     d->f_count= s->f_count;
2168     d->b_count= s->b_count;
2169     d->skip_count= s->skip_count;
2170     d->misc_bits= s->misc_bits;
2171     d->last_bits= 0;
2172
2173     d->mb_skipped= 0;
2174     d->qscale= s->qscale;
2175     d->dquant= s->dquant;
2176
2177     d->esc3_level_length= s->esc3_level_length;
2178 }
2179
2180 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2181     int i;
2182
2183     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2184     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2185
2186     /* mpeg1 */
2187     d->mb_skip_run= s->mb_skip_run;
2188     for(i=0; i<3; i++)
2189         d->last_dc[i] = s->last_dc[i];
2190
2191     /* statistics */
2192     d->mv_bits= s->mv_bits;
2193     d->i_tex_bits= s->i_tex_bits;
2194     d->p_tex_bits= s->p_tex_bits;
2195     d->i_count= s->i_count;
2196     d->f_count= s->f_count;
2197     d->b_count= s->b_count;
2198     d->skip_count= s->skip_count;
2199     d->misc_bits= s->misc_bits;
2200
2201     d->mb_intra= s->mb_intra;
2202     d->mb_skipped= s->mb_skipped;
2203     d->mv_type= s->mv_type;
2204     d->mv_dir= s->mv_dir;
2205     d->pb= s->pb;
2206     if(s->data_partitioning){
2207         d->pb2= s->pb2;
2208         d->tex_pb= s->tex_pb;
2209     }
2210     d->block= s->block;
2211     for(i=0; i<8; i++)
2212         d->block_last_index[i]= s->block_last_index[i];
2213     d->interlaced_dct= s->interlaced_dct;
2214     d->qscale= s->qscale;
2215
2216     d->esc3_level_length= s->esc3_level_length;
2217 }
2218
2219 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2220                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2221                            int *dmin, int *next_block, int motion_x, int motion_y)
2222 {
2223     int score;
2224     uint8_t *dest_backup[3];
2225
2226     copy_context_before_encode(s, backup, type);
2227
2228     s->block= s->blocks[*next_block];
2229     s->pb= pb[*next_block];
2230     if(s->data_partitioning){
2231         s->pb2   = pb2   [*next_block];
2232         s->tex_pb= tex_pb[*next_block];
2233     }
2234
2235     if(*next_block){
2236         memcpy(dest_backup, s->dest, sizeof(s->dest));
2237         s->dest[0] = s->rd_scratchpad;
2238         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2239         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2240         assert(s->linesize >= 32); //FIXME
2241     }
2242
2243     encode_mb(s, motion_x, motion_y);
2244
2245     score= put_bits_count(&s->pb);
2246     if(s->data_partitioning){
2247         score+= put_bits_count(&s->pb2);
2248         score+= put_bits_count(&s->tex_pb);
2249     }
2250
2251     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2252         ff_MPV_decode_mb(s, s->block);
2253
2254         score *= s->lambda2;
2255         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2256     }
2257
2258     if(*next_block){
2259         memcpy(s->dest, dest_backup, sizeof(s->dest));
2260     }
2261
2262     if(score<*dmin){
2263         *dmin= score;
2264         *next_block^=1;
2265
2266         copy_context_after_encode(best, s, type);
2267     }
2268 }
2269
2270 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2271     uint32_t *sq = ff_squareTbl + 256;
2272     int acc=0;
2273     int x,y;
2274
2275     if(w==16 && h==16)
2276         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2277     else if(w==8 && h==8)
2278         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2279
2280     for(y=0; y<h; y++){
2281         for(x=0; x<w; x++){
2282             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2283         }
2284     }
2285
2286     av_assert2(acc>=0);
2287
2288     return acc;
2289 }
2290
2291 static int sse_mb(MpegEncContext *s){
2292     int w= 16;
2293     int h= 16;
2294
2295     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2296     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2297
2298     if(w==16 && h==16)
2299       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2300         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2301                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2302                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2303       }else{
2304         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2305                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2306                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2307       }
2308     else
2309         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2310                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2311                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2312 }
2313
2314 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2315     MpegEncContext *s= *(void**)arg;
2316
2317
2318     s->me.pre_pass=1;
2319     s->me.dia_size= s->avctx->pre_dia_size;
2320     s->first_slice_line=1;
2321     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2322         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2323             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2324         }
2325         s->first_slice_line=0;
2326     }
2327
2328     s->me.pre_pass=0;
2329
2330     return 0;
2331 }
2332
2333 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2334     MpegEncContext *s= *(void**)arg;
2335
2336     ff_check_alignment();
2337
2338     s->me.dia_size= s->avctx->dia_size;
2339     s->first_slice_line=1;
2340     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2341         s->mb_x=0; //for block init below
2342         ff_init_block_index(s);
2343         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2344             s->block_index[0]+=2;
2345             s->block_index[1]+=2;
2346             s->block_index[2]+=2;
2347             s->block_index[3]+=2;
2348
2349             /* compute motion vector & mb_type and store in context */
2350             if(s->pict_type==AV_PICTURE_TYPE_B)
2351                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2352             else
2353                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2354         }
2355         s->first_slice_line=0;
2356     }
2357     return 0;
2358 }
2359
2360 static int mb_var_thread(AVCodecContext *c, void *arg){
2361     MpegEncContext *s= *(void**)arg;
2362     int mb_x, mb_y;
2363
2364     ff_check_alignment();
2365
2366     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2367         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2368             int xx = mb_x * 16;
2369             int yy = mb_y * 16;
2370             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2371             int varc;
2372             int sum = s->dsp.pix_sum(pix, s->linesize);
2373
2374             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2375
2376             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2377             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2378             s->me.mb_var_sum_temp    += varc;
2379         }
2380     }
2381     return 0;
2382 }
2383
2384 static void write_slice_end(MpegEncContext *s){
2385     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2386         if(s->partitioned_frame){
2387             ff_mpeg4_merge_partitions(s);
2388         }
2389
2390         ff_mpeg4_stuffing(&s->pb);
2391     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2392         ff_mjpeg_encode_stuffing(s);
2393     }
2394
2395     avpriv_align_put_bits(&s->pb);
2396     flush_put_bits(&s->pb);
2397
2398     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2399         s->misc_bits+= get_bits_diff(s);
2400 }
2401
2402 static void write_mb_info(MpegEncContext *s)
2403 {
2404     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2405     int offset = put_bits_count(&s->pb);
2406     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2407     int gobn = s->mb_y / s->gob_index;
2408     int pred_x, pred_y;
2409     if (CONFIG_H263_ENCODER)
2410         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2411     bytestream_put_le32(&ptr, offset);
2412     bytestream_put_byte(&ptr, s->qscale);
2413     bytestream_put_byte(&ptr, gobn);
2414     bytestream_put_le16(&ptr, mba);
2415     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2416     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2417     /* 4MV not implemented */
2418     bytestream_put_byte(&ptr, 0); /* hmv2 */
2419     bytestream_put_byte(&ptr, 0); /* vmv2 */
2420 }
2421
2422 static void update_mb_info(MpegEncContext *s, int startcode)
2423 {
2424     if (!s->mb_info)
2425         return;
2426     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2427         s->mb_info_size += 12;
2428         s->prev_mb_info = s->last_mb_info;
2429     }
2430     if (startcode) {
2431         s->prev_mb_info = put_bits_count(&s->pb)/8;
2432         /* This might have incremented mb_info_size above, and we return without
2433          * actually writing any info into that slot yet. But in that case,
2434          * this will be called again at the start of the after writing the
2435          * start code, actually writing the mb info. */
2436         return;
2437     }
2438
2439     s->last_mb_info = put_bits_count(&s->pb)/8;
2440     if (!s->mb_info_size)
2441         s->mb_info_size += 12;
2442     write_mb_info(s);
2443 }
2444
2445 static int encode_thread(AVCodecContext *c, void *arg){
2446     MpegEncContext *s= *(void**)arg;
2447     int mb_x, mb_y, pdif = 0;
2448     int chr_h= 16>>s->chroma_y_shift;
2449     int i, j;
2450     MpegEncContext best_s, backup_s;
2451     uint8_t bit_buf[2][MAX_MB_BYTES];
2452     uint8_t bit_buf2[2][MAX_MB_BYTES];
2453     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2454     PutBitContext pb[2], pb2[2], tex_pb[2];
2455
2456     ff_check_alignment();
2457
2458     for(i=0; i<2; i++){
2459         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2460         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2461         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2462     }
2463
2464     s->last_bits= put_bits_count(&s->pb);
2465     s->mv_bits=0;
2466     s->misc_bits=0;
2467     s->i_tex_bits=0;
2468     s->p_tex_bits=0;
2469     s->i_count=0;
2470     s->f_count=0;
2471     s->b_count=0;
2472     s->skip_count=0;
2473
2474     for(i=0; i<3; i++){
2475         /* init last dc values */
2476         /* note: quant matrix value (8) is implied here */
2477         s->last_dc[i] = 128 << s->intra_dc_precision;
2478
2479         s->current_picture.f.error[i] = 0;
2480     }
2481     if(s->codec_id==AV_CODEC_ID_AMV){
2482         s->last_dc[0] = 128*8/13;
2483         s->last_dc[1] = 128*8/14;
2484         s->last_dc[2] = 128*8/14;
2485     }
2486     s->mb_skip_run = 0;
2487     memset(s->last_mv, 0, sizeof(s->last_mv));
2488
2489     s->last_mv_dir = 0;
2490
2491     switch(s->codec_id){
2492     case AV_CODEC_ID_H263:
2493     case AV_CODEC_ID_H263P:
2494     case AV_CODEC_ID_FLV1:
2495         if (CONFIG_H263_ENCODER)
2496             s->gob_index = ff_h263_get_gob_height(s);
2497         break;
2498     case AV_CODEC_ID_MPEG4:
2499         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2500             ff_mpeg4_init_partitions(s);
2501         break;
2502     }
2503
2504     s->resync_mb_x=0;
2505     s->resync_mb_y=0;
2506     s->first_slice_line = 1;
2507     s->ptr_lastgob = s->pb.buf;
2508     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2509         s->mb_x=0;
2510         s->mb_y= mb_y;
2511
2512         ff_set_qscale(s, s->qscale);
2513         ff_init_block_index(s);
2514
2515         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2516             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2517             int mb_type= s->mb_type[xy];
2518 //            int d;
2519             int dmin= INT_MAX;
2520             int dir;
2521
2522             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2523                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2524                 return -1;
2525             }
2526             if(s->data_partitioning){
2527                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2528                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2529                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2530                     return -1;
2531                 }
2532             }
2533
2534             s->mb_x = mb_x;
2535             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2536             ff_update_block_index(s);
2537
2538             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2539                 ff_h261_reorder_mb_index(s);
2540                 xy= s->mb_y*s->mb_stride + s->mb_x;
2541                 mb_type= s->mb_type[xy];
2542             }
2543
2544             /* write gob / video packet header  */
2545             if(s->rtp_mode){
2546                 int current_packet_size, is_gob_start;
2547
2548                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2549
2550                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2551
2552                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2553
2554                 switch(s->codec_id){
2555                 case AV_CODEC_ID_H263:
2556                 case AV_CODEC_ID_H263P:
2557                     if(!s->h263_slice_structured)
2558                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2559                     break;
2560                 case AV_CODEC_ID_MPEG2VIDEO:
2561                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2562                 case AV_CODEC_ID_MPEG1VIDEO:
2563                     if(s->mb_skip_run) is_gob_start=0;
2564                     break;
2565                 case AV_CODEC_ID_MJPEG:
2566                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2567                     break;
2568                 }
2569
2570                 if(is_gob_start){
2571                     if(s->start_mb_y != mb_y || mb_x!=0){
2572                         write_slice_end(s);
2573                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2574                             ff_mpeg4_init_partitions(s);
2575                         }
2576                     }
2577
2578                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2579                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2580
2581                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2582                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2583                         int d= 100 / s->avctx->error_rate;
2584                         if(r % d == 0){
2585                             current_packet_size=0;
2586                             s->pb.buf_ptr= s->ptr_lastgob;
2587                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2588                         }
2589                     }
2590
2591                     if (s->avctx->rtp_callback){
2592                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2593                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2594                     }
2595                     update_mb_info(s, 1);
2596
2597                     switch(s->codec_id){
2598                     case AV_CODEC_ID_MPEG4:
2599                         if (CONFIG_MPEG4_ENCODER) {
2600                             ff_mpeg4_encode_video_packet_header(s);
2601                             ff_mpeg4_clean_buffers(s);
2602                         }
2603                     break;
2604                     case AV_CODEC_ID_MPEG1VIDEO:
2605                     case AV_CODEC_ID_MPEG2VIDEO:
2606                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2607                             ff_mpeg1_encode_slice_header(s);
2608                             ff_mpeg1_clean_buffers(s);
2609                         }
2610                     break;
2611                     case AV_CODEC_ID_H263:
2612                     case AV_CODEC_ID_H263P:
2613                         if (CONFIG_H263_ENCODER)
2614                             ff_h263_encode_gob_header(s, mb_y);
2615                     break;
2616                     }
2617
2618                     if(s->flags&CODEC_FLAG_PASS1){
2619                         int bits= put_bits_count(&s->pb);
2620                         s->misc_bits+= bits - s->last_bits;
2621                         s->last_bits= bits;
2622                     }
2623
2624                     s->ptr_lastgob += current_packet_size;
2625                     s->first_slice_line=1;
2626                     s->resync_mb_x=mb_x;
2627                     s->resync_mb_y=mb_y;
2628                 }
2629             }
2630
2631             if(  (s->resync_mb_x   == s->mb_x)
2632                && s->resync_mb_y+1 == s->mb_y){
2633                 s->first_slice_line=0;
2634             }
2635
2636             s->mb_skipped=0;
2637             s->dquant=0; //only for QP_RD
2638
2639             update_mb_info(s, 0);
2640
2641             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2642                 int next_block=0;
2643                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2644
2645                 copy_context_before_encode(&backup_s, s, -1);
2646                 backup_s.pb= s->pb;
2647                 best_s.data_partitioning= s->data_partitioning;
2648                 best_s.partitioned_frame= s->partitioned_frame;
2649                 if(s->data_partitioning){
2650                     backup_s.pb2= s->pb2;
2651                     backup_s.tex_pb= s->tex_pb;
2652                 }
2653
2654                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2655                     s->mv_dir = MV_DIR_FORWARD;
2656                     s->mv_type = MV_TYPE_16X16;
2657                     s->mb_intra= 0;
2658                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2659                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2660                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2661                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2662                 }
2663                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2664                     s->mv_dir = MV_DIR_FORWARD;
2665                     s->mv_type = MV_TYPE_FIELD;
2666                     s->mb_intra= 0;
2667                     for(i=0; i<2; i++){
2668                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2669                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2670                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2671                     }
2672                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2673                                  &dmin, &next_block, 0, 0);
2674                 }
2675                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2676                     s->mv_dir = MV_DIR_FORWARD;
2677                     s->mv_type = MV_TYPE_16X16;
2678                     s->mb_intra= 0;
2679                     s->mv[0][0][0] = 0;
2680                     s->mv[0][0][1] = 0;
2681                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2682                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2683                 }
2684                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2685                     s->mv_dir = MV_DIR_FORWARD;
2686                     s->mv_type = MV_TYPE_8X8;
2687                     s->mb_intra= 0;
2688                     for(i=0; i<4; i++){
2689                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2690                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2691                     }
2692                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2693                                  &dmin, &next_block, 0, 0);
2694                 }
2695                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2696                     s->mv_dir = MV_DIR_FORWARD;
2697                     s->mv_type = MV_TYPE_16X16;
2698                     s->mb_intra= 0;
2699                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2700                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2701                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2702                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2703                 }
2704                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2705                     s->mv_dir = MV_DIR_BACKWARD;
2706                     s->mv_type = MV_TYPE_16X16;
2707                     s->mb_intra= 0;
2708                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2709                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2710                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2711                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2712                 }
2713                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2714                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2715                     s->mv_type = MV_TYPE_16X16;
2716                     s->mb_intra= 0;
2717                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2718                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2719                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2720                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2721                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2722                                  &dmin, &next_block, 0, 0);
2723                 }
2724                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2725                     s->mv_dir = MV_DIR_FORWARD;
2726                     s->mv_type = MV_TYPE_FIELD;
2727                     s->mb_intra= 0;
2728                     for(i=0; i<2; i++){
2729                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2730                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2731                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2732                     }
2733                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2734                                  &dmin, &next_block, 0, 0);
2735                 }
2736                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2737                     s->mv_dir = MV_DIR_BACKWARD;
2738                     s->mv_type = MV_TYPE_FIELD;
2739                     s->mb_intra= 0;
2740                     for(i=0; i<2; i++){
2741                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2742                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2743                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2744                     }
2745                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2746                                  &dmin, &next_block, 0, 0);
2747                 }
2748                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2749                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2750                     s->mv_type = MV_TYPE_FIELD;
2751                     s->mb_intra= 0;
2752                     for(dir=0; dir<2; dir++){
2753                         for(i=0; i<2; i++){
2754                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2755                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2756                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2757                         }
2758                     }
2759                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2760                                  &dmin, &next_block, 0, 0);
2761                 }
2762                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2763                     s->mv_dir = 0;
2764                     s->mv_type = MV_TYPE_16X16;
2765                     s->mb_intra= 1;
2766                     s->mv[0][0][0] = 0;
2767                     s->mv[0][0][1] = 0;
2768                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2769                                  &dmin, &next_block, 0, 0);
2770                     if(s->h263_pred || s->h263_aic){
2771                         if(best_s.mb_intra)
2772                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2773                         else
2774                             ff_clean_intra_table_entries(s); //old mode?
2775                     }
2776                 }
2777
2778                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2779                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2780                         const int last_qp= backup_s.qscale;
2781                         int qpi, qp, dc[6];
2782                         DCTELEM ac[6][16];
2783                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2784                         static const int dquant_tab[4]={-1,1,-2,2};
2785
2786                         av_assert2(backup_s.dquant == 0);
2787
2788                         //FIXME intra
2789                         s->mv_dir= best_s.mv_dir;
2790                         s->mv_type = MV_TYPE_16X16;
2791                         s->mb_intra= best_s.mb_intra;
2792                         s->mv[0][0][0] = best_s.mv[0][0][0];
2793                         s->mv[0][0][1] = best_s.mv[0][0][1];
2794                         s->mv[1][0][0] = best_s.mv[1][0][0];
2795                         s->mv[1][0][1] = best_s.mv[1][0][1];
2796
2797                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2798                         for(; qpi<4; qpi++){
2799                             int dquant= dquant_tab[qpi];
2800                             qp= last_qp + dquant;
2801                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2802                                 continue;
2803                             backup_s.dquant= dquant;
2804                             if(s->mb_intra && s->dc_val[0]){
2805                                 for(i=0; i<6; i++){
2806                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2807                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16);
2808                                 }
2809                             }
2810
2811                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2812                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2813                             if(best_s.qscale != qp){
2814                                 if(s->mb_intra && s->dc_val[0]){
2815                                     for(i=0; i<6; i++){
2816                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2817                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16);
2818                                     }
2819                                 }
2820                             }
2821                         }
2822                     }
2823                 }
2824                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2825                     int mx= s->b_direct_mv_table[xy][0];
2826                     int my= s->b_direct_mv_table[xy][1];
2827
2828                     backup_s.dquant = 0;
2829                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2830                     s->mb_intra= 0;
2831                     ff_mpeg4_set_direct_mv(s, mx, my);
2832                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2833                                  &dmin, &next_block, mx, my);
2834                 }
2835                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2836                     backup_s.dquant = 0;
2837                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2838                     s->mb_intra= 0;
2839                     ff_mpeg4_set_direct_mv(s, 0, 0);
2840                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2841                                  &dmin, &next_block, 0, 0);
2842                 }
2843                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2844                     int coded=0;
2845                     for(i=0; i<6; i++)
2846                         coded |= s->block_last_index[i];
2847                     if(coded){
2848                         int mx,my;
2849                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2850                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2851                             mx=my=0; //FIXME find the one we actually used
2852                             ff_mpeg4_set_direct_mv(s, mx, my);
2853                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2854                             mx= s->mv[1][0][0];
2855                             my= s->mv[1][0][1];
2856                         }else{
2857                             mx= s->mv[0][0][0];
2858                             my= s->mv[0][0][1];
2859                         }
2860
2861                         s->mv_dir= best_s.mv_dir;
2862                         s->mv_type = best_s.mv_type;
2863                         s->mb_intra= 0;
2864 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2865                         s->mv[0][0][1] = best_s.mv[0][0][1];
2866                         s->mv[1][0][0] = best_s.mv[1][0][0];
2867                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2868                         backup_s.dquant= 0;
2869                         s->skipdct=1;
2870                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2871                                         &dmin, &next_block, mx, my);
2872                         s->skipdct=0;
2873                     }
2874                 }
2875
2876                 s->current_picture.f.qscale_table[xy] = best_s.qscale;
2877
2878                 copy_context_after_encode(s, &best_s, -1);
2879
2880                 pb_bits_count= put_bits_count(&s->pb);
2881                 flush_put_bits(&s->pb);
2882                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2883                 s->pb= backup_s.pb;
2884
2885                 if(s->data_partitioning){
2886                     pb2_bits_count= put_bits_count(&s->pb2);
2887                     flush_put_bits(&s->pb2);
2888                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2889                     s->pb2= backup_s.pb2;
2890
2891                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2892                     flush_put_bits(&s->tex_pb);
2893                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2894                     s->tex_pb= backup_s.tex_pb;
2895                 }
2896                 s->last_bits= put_bits_count(&s->pb);
2897
2898                 if (CONFIG_H263_ENCODER &&
2899                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2900                     ff_h263_update_motion_val(s);
2901
2902                 if(next_block==0){ //FIXME 16 vs linesize16
2903                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2904                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2905                     s->dsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2906                 }
2907
2908                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2909                     ff_MPV_decode_mb(s, s->block);
2910             } else {
2911                 int motion_x = 0, motion_y = 0;
2912                 s->mv_type=MV_TYPE_16X16;
2913                 // only one MB-Type possible
2914
2915                 switch(mb_type){
2916                 case CANDIDATE_MB_TYPE_INTRA:
2917                     s->mv_dir = 0;
2918                     s->mb_intra= 1;
2919                     motion_x= s->mv[0][0][0] = 0;
2920                     motion_y= s->mv[0][0][1] = 0;
2921                     break;
2922                 case CANDIDATE_MB_TYPE_INTER:
2923                     s->mv_dir = MV_DIR_FORWARD;
2924                     s->mb_intra= 0;
2925                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2926                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2927                     break;
2928                 case CANDIDATE_MB_TYPE_INTER_I:
2929                     s->mv_dir = MV_DIR_FORWARD;
2930                     s->mv_type = MV_TYPE_FIELD;
2931                     s->mb_intra= 0;
2932                     for(i=0; i<2; i++){
2933                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2934                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2935                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2936                     }
2937                     break;
2938                 case CANDIDATE_MB_TYPE_INTER4V:
2939                     s->mv_dir = MV_DIR_FORWARD;
2940                     s->mv_type = MV_TYPE_8X8;
2941                     s->mb_intra= 0;
2942                     for(i=0; i<4; i++){
2943                         s->mv[0][i][0] = s->current_picture.f.motion_val[0][s->block_index[i]][0];
2944                         s->mv[0][i][1] = s->current_picture.f.motion_val[0][s->block_index[i]][1];
2945                     }
2946                     break;
2947                 case CANDIDATE_MB_TYPE_DIRECT:
2948                     if (CONFIG_MPEG4_ENCODER) {
2949                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2950                         s->mb_intra= 0;
2951                         motion_x=s->b_direct_mv_table[xy][0];
2952                         motion_y=s->b_direct_mv_table[xy][1];
2953                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2954                     }
2955                     break;
2956                 case CANDIDATE_MB_TYPE_DIRECT0:
2957                     if (CONFIG_MPEG4_ENCODER) {
2958                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2959                         s->mb_intra= 0;
2960                         ff_mpeg4_set_direct_mv(s, 0, 0);
2961                     }
2962                     break;
2963                 case CANDIDATE_MB_TYPE_BIDIR:
2964                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2965                     s->mb_intra= 0;
2966                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2967                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2968                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2969                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2970                     break;
2971                 case CANDIDATE_MB_TYPE_BACKWARD:
2972                     s->mv_dir = MV_DIR_BACKWARD;
2973                     s->mb_intra= 0;
2974                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2975                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2976                     break;
2977                 case CANDIDATE_MB_TYPE_FORWARD:
2978                     s->mv_dir = MV_DIR_FORWARD;
2979                     s->mb_intra= 0;
2980                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2981                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2982                     break;
2983                 case CANDIDATE_MB_TYPE_FORWARD_I:
2984                     s->mv_dir = MV_DIR_FORWARD;
2985                     s->mv_type = MV_TYPE_FIELD;
2986                     s->mb_intra= 0;
2987                     for(i=0; i<2; i++){
2988                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2989                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2990                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2991                     }
2992                     break;
2993                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2994                     s->mv_dir = MV_DIR_BACKWARD;
2995                     s->mv_type = MV_TYPE_FIELD;
2996                     s->mb_intra= 0;
2997                     for(i=0; i<2; i++){
2998                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2999                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3000                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3001                     }
3002                     break;
3003                 case CANDIDATE_MB_TYPE_BIDIR_I:
3004                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3005                     s->mv_type = MV_TYPE_FIELD;
3006                     s->mb_intra= 0;
3007                     for(dir=0; dir<2; dir++){
3008                         for(i=0; i<2; i++){
3009                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3010                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3011                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3012                         }
3013                     }
3014                     break;
3015                 default:
3016                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3017                 }
3018
3019                 encode_mb(s, motion_x, motion_y);
3020
3021                 // RAL: Update last macroblock type
3022                 s->last_mv_dir = s->mv_dir;
3023
3024                 if (CONFIG_H263_ENCODER &&
3025                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3026                     ff_h263_update_motion_val(s);
3027
3028                 ff_MPV_decode_mb(s, s->block);
3029             }
3030
3031             /* clean the MV table in IPS frames for direct mode in B frames */
3032             if(s->mb_intra /* && I,P,S_TYPE */){
3033                 s->p_mv_table[xy][0]=0;
3034                 s->p_mv_table[xy][1]=0;
3035             }
3036
3037             if(s->flags&CODEC_FLAG_PSNR){
3038                 int w= 16;
3039                 int h= 16;
3040
3041                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3042                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3043
3044                 s->current_picture.f.error[0] += sse(
3045                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3046                     s->dest[0], w, h, s->linesize);
3047                 s->current_picture.f.error[1] += sse(
3048                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3049                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3050                 s->current_picture.f.error[2] += sse(
3051                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3052                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3053             }
3054             if(s->loop_filter){
3055                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3056                     ff_h263_loop_filter(s);
3057             }
3058             av_dlog(s->avctx, "MB %d %d bits\n",
3059                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3060         }
3061     }
3062
3063     //not beautiful here but we must write it before flushing so it has to be here
3064     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3065         ff_msmpeg4_encode_ext_header(s);
3066
3067     write_slice_end(s);
3068
3069     /* Send the last GOB if RTP */
3070     if (s->avctx->rtp_callback) {
3071         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3072         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3073         /* Call the RTP callback to send the last GOB */
3074         emms_c();
3075         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3076     }
3077
3078     return 0;
3079 }
3080
3081 #define MERGE(field) dst->field += src->field; src->field=0
3082 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3083     MERGE(me.scene_change_score);
3084     MERGE(me.mc_mb_var_sum_temp);
3085     MERGE(me.mb_var_sum_temp);
3086 }
3087
3088 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3089     int i;
3090
3091     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3092     MERGE(dct_count[1]);
3093     MERGE(mv_bits);
3094     MERGE(i_tex_bits);
3095     MERGE(p_tex_bits);
3096     MERGE(i_count);
3097     MERGE(f_count);
3098     MERGE(b_count);
3099     MERGE(skip_count);
3100     MERGE(misc_bits);
3101     MERGE(error_count);
3102     MERGE(padding_bug_score);
3103     MERGE(current_picture.f.error[0]);
3104     MERGE(current_picture.f.error[1]);
3105     MERGE(current_picture.f.error[2]);
3106
3107     if(dst->avctx->noise_reduction){
3108         for(i=0; i<64; i++){
3109             MERGE(dct_error_sum[0][i]);
3110             MERGE(dct_error_sum[1][i]);
3111         }
3112     }
3113
3114     assert(put_bits_count(&src->pb) % 8 ==0);
3115     assert(put_bits_count(&dst->pb) % 8 ==0);
3116     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3117     flush_put_bits(&dst->pb);
3118 }
3119
3120 static int estimate_qp(MpegEncContext *s, int dry_run){
3121     if (s->next_lambda){
3122         s->current_picture_ptr->f.quality =
3123         s->current_picture.f.quality = s->next_lambda;
3124         if(!dry_run) s->next_lambda= 0;
3125     } else if (!s->fixed_qscale) {
3126         s->current_picture_ptr->f.quality =
3127         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3128         if (s->current_picture.f.quality < 0)
3129             return -1;
3130     }
3131
3132     if(s->adaptive_quant){
3133         switch(s->codec_id){
3134         case AV_CODEC_ID_MPEG4:
3135             if (CONFIG_MPEG4_ENCODER)
3136                 ff_clean_mpeg4_qscales(s);
3137             break;
3138         case AV_CODEC_ID_H263:
3139         case AV_CODEC_ID_H263P:
3140         case AV_CODEC_ID_FLV1:
3141             if (CONFIG_H263_ENCODER)
3142                 ff_clean_h263_qscales(s);
3143             break;
3144         default:
3145             ff_init_qscale_tab(s);
3146         }
3147
3148         s->lambda= s->lambda_table[0];
3149         //FIXME broken
3150     }else
3151         s->lambda = s->current_picture.f.quality;
3152     update_qscale(s);
3153     return 0;
3154 }
3155
3156 /* must be called before writing the header */
3157 static void set_frame_distances(MpegEncContext * s){
3158     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3159     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3160
3161     if(s->pict_type==AV_PICTURE_TYPE_B){
3162         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3163         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3164     }else{
3165         s->pp_time= s->time - s->last_non_b_time;
3166         s->last_non_b_time= s->time;
3167         assert(s->picture_number==0 || s->pp_time > 0);
3168     }
3169 }
3170
3171 static int encode_picture(MpegEncContext *s, int picture_number)
3172 {
3173     int i;
3174     int bits;
3175     int context_count = s->slice_context_count;
3176
3177     s->picture_number = picture_number;
3178
3179     /* Reset the average MB variance */
3180     s->me.mb_var_sum_temp    =
3181     s->me.mc_mb_var_sum_temp = 0;
3182
3183     /* we need to initialize some time vars before we can encode b-frames */
3184     // RAL: Condition added for MPEG1VIDEO
3185     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3186         set_frame_distances(s);
3187     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3188         ff_set_mpeg4_time(s);
3189
3190     s->me.scene_change_score=0;
3191
3192 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3193
3194     if(s->pict_type==AV_PICTURE_TYPE_I){
3195         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3196         else                        s->no_rounding=0;
3197     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3198         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3199             s->no_rounding ^= 1;
3200     }
3201
3202     if(s->flags & CODEC_FLAG_PASS2){
3203         if (estimate_qp(s,1) < 0)
3204             return -1;
3205         ff_get_2pass_fcode(s);
3206     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3207         if(s->pict_type==AV_PICTURE_TYPE_B)
3208             s->lambda= s->last_lambda_for[s->pict_type];
3209         else
3210             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3211         update_qscale(s);
3212     }
3213
3214     if(s->codec_id != AV_CODEC_ID_AMV){
3215         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3216         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3217         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3218         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3219     }
3220
3221     s->mb_intra=0; //for the rate distortion & bit compare functions
3222     for(i=1; i<context_count; i++){
3223         ff_update_duplicate_context(s->thread_context[i], s);
3224     }
3225
3226     if(ff_init_me(s)<0)
3227         return -1;
3228
3229     /* Estimate motion for every MB */
3230     if(s->pict_type != AV_PICTURE_TYPE_I){
3231         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3232         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3233         if(s->pict_type != AV_PICTURE_TYPE_B && s->avctx->me_threshold==0){
3234             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3235                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3236             }
3237         }
3238
3239         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3240     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3241         /* I-Frame */
3242         for(i=0; i<s->mb_stride*s->mb_height; i++)
3243             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3244
3245         if(!s->fixed_qscale){
3246             /* finding spatial complexity for I-frame rate control */
3247             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3248         }
3249     }
3250     for(i=1; i<context_count; i++){
3251         merge_context_after_me(s, s->thread_context[i]);
3252     }
3253     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3254     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3255     emms_c();
3256
3257     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3258         s->pict_type= AV_PICTURE_TYPE_I;
3259         for(i=0; i<s->mb_stride*s->mb_height; i++)
3260             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3261         if(s->msmpeg4_version >= 3)
3262             s->no_rounding=1;
3263         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3264                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3265     }
3266
3267     if(!s->umvplus){
3268         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3269             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3270
3271             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3272                 int a,b;
3273                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3274                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3275                 s->f_code= FFMAX3(s->f_code, a, b);
3276             }
3277
3278             ff_fix_long_p_mvs(s);
3279             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3280             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3281                 int j;
3282                 for(i=0; i<2; i++){
3283                     for(j=0; j<2; j++)
3284                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3285                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3286                 }
3287             }
3288         }
3289
3290         if(s->pict_type==AV_PICTURE_TYPE_B){
3291             int a, b;
3292
3293             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3294             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3295             s->f_code = FFMAX(a, b);
3296
3297             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3298             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3299             s->b_code = FFMAX(a, b);
3300
3301             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3302             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3303             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3304             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3305             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3306                 int dir, j;
3307                 for(dir=0; dir<2; dir++){
3308                     for(i=0; i<2; i++){
3309                         for(j=0; j<2; j++){
3310                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3311                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3312                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3313                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3314                         }
3315                     }
3316                 }
3317             }
3318         }
3319     }
3320
3321     if (estimate_qp(s, 0) < 0)
3322         return -1;
3323
3324     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3325         s->qscale= 3; //reduce clipping problems
3326
3327     if (s->out_format == FMT_MJPEG) {
3328         /* for mjpeg, we do include qscale in the matrix */
3329         for(i=1;i<64;i++){
3330             int j= s->dsp.idct_permutation[i];
3331
3332             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3333         }
3334         s->y_dc_scale_table=
3335         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3336         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3337         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3338                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3339         s->qscale= 8;
3340     }
3341     if(s->codec_id == AV_CODEC_ID_AMV){
3342         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3343         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3344         for(i=1;i<64;i++){
3345             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3346
3347             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3348             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3349         }
3350         s->y_dc_scale_table= y;
3351         s->c_dc_scale_table= c;
3352         s->intra_matrix[0] = 13;
3353         s->chroma_intra_matrix[0] = 14;
3354         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3355                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3356         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3357                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3358         s->qscale= 8;
3359     }
3360
3361     //FIXME var duplication
3362     s->current_picture_ptr->f.key_frame =
3363     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3364     s->current_picture_ptr->f.pict_type =
3365     s->current_picture.f.pict_type = s->pict_type;
3366
3367     if (s->current_picture.f.key_frame)
3368         s->picture_in_gop_number=0;
3369
3370     s->mb_x = s->mb_y = 0;
3371     s->last_bits= put_bits_count(&s->pb);
3372     switch(s->out_format) {
3373     case FMT_MJPEG:
3374         if (CONFIG_MJPEG_ENCODER)
3375             ff_mjpeg_encode_picture_header(s);
3376         break;
3377     case FMT_H261:
3378         if (CONFIG_H261_ENCODER)
3379             ff_h261_encode_picture_header(s, picture_number);
3380         break;
3381     case FMT_H263:
3382         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3383             ff_wmv2_encode_picture_header(s, picture_number);
3384         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3385             ff_msmpeg4_encode_picture_header(s, picture_number);
3386         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3387             ff_mpeg4_encode_picture_header(s, picture_number);
3388         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3389             ff_rv10_encode_picture_header(s, picture_number);
3390         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3391             ff_rv20_encode_picture_header(s, picture_number);
3392         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3393             ff_flv_encode_picture_header(s, picture_number);
3394         else if (CONFIG_H263_ENCODER)
3395             ff_h263_encode_picture_header(s, picture_number);
3396         break;
3397     case FMT_MPEG1:
3398         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3399             ff_mpeg1_encode_picture_header(s, picture_number);
3400         break;
3401     case FMT_H264:
3402         break;
3403     default:
3404         av_assert0(0);
3405     }
3406     bits= put_bits_count(&s->pb);
3407     s->header_bits= bits - s->last_bits;
3408
3409     for(i=1; i<context_count; i++){
3410         update_duplicate_context_after_me(s->thread_context[i], s);
3411     }
3412     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3413     for(i=1; i<context_count; i++){
3414         merge_context_after_encode(s, s->thread_context[i]);
3415     }
3416     emms_c();
3417     return 0;
3418 }
3419
3420 static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){
3421     const int intra= s->mb_intra;
3422     int i;
3423
3424     s->dct_count[intra]++;
3425
3426     for(i=0; i<64; i++){
3427         int level= block[i];
3428
3429         if(level){
3430             if(level>0){
3431                 s->dct_error_sum[intra][i] += level;
3432                 level -= s->dct_offset[intra][i];
3433                 if(level<0) level=0;
3434             }else{
3435                 s->dct_error_sum[intra][i] -= level;
3436                 level += s->dct_offset[intra][i];
3437                 if(level>0) level=0;
3438             }
3439             block[i]= level;
3440         }
3441     }
3442 }
3443
3444 static int dct_quantize_trellis_c(MpegEncContext *s,
3445                                   DCTELEM *block, int n,
3446                                   int qscale, int *overflow){
3447     const int *qmat;
3448     const uint8_t *scantable= s->intra_scantable.scantable;
3449     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3450     int max=0;
3451     unsigned int threshold1, threshold2;
3452     int bias=0;
3453     int run_tab[65];
3454     int level_tab[65];
3455     int score_tab[65];
3456     int survivor[65];
3457     int survivor_count;
3458     int last_run=0;
3459     int last_level=0;
3460     int last_score= 0;
3461     int last_i;
3462     int coeff[2][64];
3463     int coeff_count[64];
3464     int qmul, qadd, start_i, last_non_zero, i, dc;
3465     const int esc_length= s->ac_esc_length;
3466     uint8_t * length;
3467     uint8_t * last_length;
3468     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3469
3470     s->dsp.fdct (block);
3471
3472     if(s->dct_error_sum)
3473         s->denoise_dct(s, block);
3474     qmul= qscale*16;
3475     qadd= ((qscale-1)|1)*8;
3476
3477     if (s->mb_intra) {
3478         int q;
3479         if (!s->h263_aic) {
3480             if (n < 4)
3481                 q = s->y_dc_scale;
3482             else
3483                 q = s->c_dc_scale;
3484             q = q << 3;
3485         } else{
3486             /* For AIC we skip quant/dequant of INTRADC */
3487             q = 1 << 3;
3488             qadd=0;
3489         }
3490
3491         /* note: block[0] is assumed to be positive */
3492         block[0] = (block[0] + (q >> 1)) / q;
3493         start_i = 1;
3494         last_non_zero = 0;
3495         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3496         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3497             bias= 1<<(QMAT_SHIFT-1);
3498         length     = s->intra_ac_vlc_length;
3499         last_length= s->intra_ac_vlc_last_length;
3500     } else {
3501         start_i = 0;
3502         last_non_zero = -1;
3503         qmat = s->q_inter_matrix[qscale];
3504         length     = s->inter_ac_vlc_length;
3505         last_length= s->inter_ac_vlc_last_length;
3506     }
3507     last_i= start_i;
3508
3509     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3510     threshold2= (threshold1<<1);
3511
3512     for(i=63; i>=start_i; i--) {
3513         const int j = scantable[i];
3514         int level = block[j] * qmat[j];
3515
3516         if(((unsigned)(level+threshold1))>threshold2){
3517             last_non_zero = i;
3518             break;
3519         }
3520     }
3521
3522     for(i=start_i; i<=last_non_zero; i++) {
3523         const int j = scantable[i];
3524         int level = block[j] * qmat[j];
3525
3526 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3527 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3528         if(((unsigned)(level+threshold1))>threshold2){
3529             if(level>0){
3530                 level= (bias + level)>>QMAT_SHIFT;
3531                 coeff[0][i]= level;
3532                 coeff[1][i]= level-1;
3533 //                coeff[2][k]= level-2;
3534             }else{
3535                 level= (bias - level)>>QMAT_SHIFT;
3536                 coeff[0][i]= -level;
3537                 coeff[1][i]= -level+1;
3538 //                coeff[2][k]= -level+2;
3539             }
3540             coeff_count[i]= FFMIN(level, 2);
3541             av_assert2(coeff_count[i]);
3542             max |=level;
3543         }else{
3544             coeff[0][i]= (level>>31)|1;
3545             coeff_count[i]= 1;
3546         }
3547     }
3548
3549     *overflow= s->max_qcoeff < max; //overflow might have happened
3550
3551     if(last_non_zero < start_i){
3552         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3553         return last_non_zero;
3554     }
3555
3556     score_tab[start_i]= 0;
3557     survivor[0]= start_i;
3558     survivor_count= 1;
3559
3560     for(i=start_i; i<=last_non_zero; i++){
3561         int level_index, j, zero_distortion;
3562         int dct_coeff= FFABS(block[ scantable[i] ]);
3563         int best_score=256*256*256*120;
3564
3565         if (s->dsp.fdct == ff_fdct_ifast)
3566             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3567         zero_distortion= dct_coeff*dct_coeff;
3568
3569         for(level_index=0; level_index < coeff_count[i]; level_index++){
3570             int distortion;
3571             int level= coeff[level_index][i];
3572             const int alevel= FFABS(level);
3573             int unquant_coeff;
3574
3575             av_assert2(level);
3576
3577             if(s->out_format == FMT_H263){
3578                 unquant_coeff= alevel*qmul + qadd;
3579             }else{ //MPEG1
3580                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3581                 if(s->mb_intra){
3582                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3583                         unquant_coeff =   (unquant_coeff - 1) | 1;
3584                 }else{
3585                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3586                         unquant_coeff =   (unquant_coeff - 1) | 1;
3587                 }
3588                 unquant_coeff<<= 3;
3589             }
3590
3591             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3592             level+=64;
3593             if((level&(~127)) == 0){
3594                 for(j=survivor_count-1; j>=0; j--){
3595                     int run= i - survivor[j];
3596                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3597                     score += score_tab[i-run];
3598
3599                     if(score < best_score){
3600                         best_score= score;
3601                         run_tab[i+1]= run;
3602                         level_tab[i+1]= level-64;
3603                     }
3604                 }
3605
3606                 if(s->out_format == FMT_H263){
3607                     for(j=survivor_count-1; j>=0; j--){
3608                         int run= i - survivor[j];
3609                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3610                         score += score_tab[i-run];
3611                         if(score < last_score){
3612                             last_score= score;
3613                             last_run= run;
3614                             last_level= level-64;
3615                             last_i= i+1;
3616                         }
3617                     }
3618                 }
3619             }else{
3620                 distortion += esc_length*lambda;
3621                 for(j=survivor_count-1; j>=0; j--){
3622                     int run= i - survivor[j];
3623                     int score= distortion + score_tab[i-run];
3624
3625                     if(score < best_score){
3626                         best_score= score;
3627                         run_tab[i+1]= run;
3628                         level_tab[i+1]= level-64;
3629                     }
3630                 }
3631
3632                 if(s->out_format == FMT_H263){
3633                   for(j=survivor_count-1; j>=0; j--){
3634                         int run= i - survivor[j];
3635                         int score= distortion + score_tab[i-run];
3636                         if(score < last_score){
3637                             last_score= score;
3638                             last_run= run;
3639                             last_level= level-64;
3640                             last_i= i+1;
3641                         }
3642                     }
3643                 }
3644             }
3645         }
3646
3647         score_tab[i+1]= best_score;
3648
3649         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3650         if(last_non_zero <= 27){
3651             for(; survivor_count; survivor_count--){
3652                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3653                     break;
3654             }
3655         }else{
3656             for(; survivor_count; survivor_count--){
3657                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3658                     break;
3659             }
3660         }
3661
3662         survivor[ survivor_count++ ]= i+1;
3663     }
3664
3665     if(s->out_format != FMT_H263){
3666         last_score= 256*256*256*120;
3667         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3668             int score= score_tab[i];
3669             if(i) score += lambda*2; //FIXME exacter?
3670
3671             if(score < last_score){
3672                 last_score= score;
3673                 last_i= i;
3674                 last_level= level_tab[i];
3675                 last_run= run_tab[i];
3676             }
3677         }
3678     }
3679
3680     s->coded_score[n] = last_score;
3681
3682     dc= FFABS(block[0]);
3683     last_non_zero= last_i - 1;
3684     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
3685
3686     if(last_non_zero < start_i)
3687         return last_non_zero;
3688
3689     if(last_non_zero == 0 && start_i == 0){
3690         int best_level= 0;
3691         int best_score= dc * dc;
3692
3693         for(i=0; i<coeff_count[0]; i++){
3694             int level= coeff[i][0];
3695             int alevel= FFABS(level);
3696             int unquant_coeff, score, distortion;
3697
3698             if(s->out_format == FMT_H263){
3699                     unquant_coeff= (alevel*qmul + qadd)>>3;
3700             }else{ //MPEG1
3701                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3702                     unquant_coeff =   (unquant_coeff - 1) | 1;
3703             }
3704             unquant_coeff = (unquant_coeff + 4) >> 3;
3705             unquant_coeff<<= 3 + 3;
3706
3707             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3708             level+=64;
3709             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3710             else                    score= distortion + esc_length*lambda;
3711
3712             if(score < best_score){
3713                 best_score= score;
3714                 best_level= level - 64;
3715             }
3716         }
3717         block[0]= best_level;
3718         s->coded_score[n] = best_score - dc*dc;
3719         if(best_level == 0) return -1;
3720         else                return last_non_zero;
3721     }
3722
3723     i= last_i;
3724     av_assert2(last_level);
3725
3726     block[ perm_scantable[last_non_zero] ]= last_level;
3727     i -= last_run + 1;
3728
3729     for(; i>start_i; i -= run_tab[i] + 1){
3730         block[ perm_scantable[i-1] ]= level_tab[i];
3731     }
3732
3733     return last_non_zero;
3734 }
3735
3736 //#define REFINE_STATS 1
3737 static int16_t basis[64][64];
3738
3739 static void build_basis(uint8_t *perm){
3740     int i, j, x, y;
3741     emms_c();
3742     for(i=0; i<8; i++){
3743         for(j=0; j<8; j++){
3744             for(y=0; y<8; y++){
3745                 for(x=0; x<8; x++){
3746                     double s= 0.25*(1<<BASIS_SHIFT);
3747                     int index= 8*i + j;
3748                     int perm_index= perm[index];
3749                     if(i==0) s*= sqrt(0.5);
3750                     if(j==0) s*= sqrt(0.5);
3751                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3752                 }
3753             }
3754         }
3755     }
3756 }
3757
3758 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3759                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
3760                         int n, int qscale){
3761     int16_t rem[64];
3762     LOCAL_ALIGNED_16(DCTELEM, d1, [64]);
3763     const uint8_t *scantable= s->intra_scantable.scantable;
3764     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3765 //    unsigned int threshold1, threshold2;
3766 //    int bias=0;
3767     int run_tab[65];
3768     int prev_run=0;
3769     int prev_level=0;
3770     int qmul, qadd, start_i, last_non_zero, i, dc;
3771     uint8_t * length;
3772     uint8_t * last_length;
3773     int lambda;
3774     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3775 #ifdef REFINE_STATS
3776 static int count=0;
3777 static int after_last=0;
3778 static int to_zero=0;
3779 static int from_zero=0;
3780 static int raise=0;
3781 static int lower=0;
3782 static int messed_sign=0;
3783 #endif
3784
3785     if(basis[0][0] == 0)
3786         build_basis(s->dsp.idct_permutation);
3787
3788     qmul= qscale*2;
3789     qadd= (qscale-1)|1;
3790     if (s->mb_intra) {
3791         if (!s->h263_aic) {
3792             if (n < 4)
3793                 q = s->y_dc_scale;
3794             else
3795                 q = s->c_dc_scale;
3796         } else{
3797             /* For AIC we skip quant/dequant of INTRADC */
3798             q = 1;
3799             qadd=0;
3800         }
3801         q <<= RECON_SHIFT-3;
3802         /* note: block[0] is assumed to be positive */
3803         dc= block[0]*q;
3804 //        block[0] = (block[0] + (q >> 1)) / q;
3805         start_i = 1;
3806 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3807 //            bias= 1<<(QMAT_SHIFT-1);
3808         length     = s->intra_ac_vlc_length;
3809         last_length= s->intra_ac_vlc_last_length;
3810     } else {
3811         dc= 0;
3812         start_i = 0;
3813         length     = s->inter_ac_vlc_length;
3814         last_length= s->inter_ac_vlc_last_length;
3815     }
3816     last_non_zero = s->block_last_index[n];
3817
3818 #ifdef REFINE_STATS
3819 {START_TIMER
3820 #endif
3821     dc += (1<<(RECON_SHIFT-1));
3822     for(i=0; i<64; i++){
3823         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3824     }
3825 #ifdef REFINE_STATS
3826 STOP_TIMER("memset rem[]")}
3827 #endif
3828     sum=0;
3829     for(i=0; i<64; i++){
3830         int one= 36;
3831         int qns=4;
3832         int w;
3833
3834         w= FFABS(weight[i]) + qns*one;
3835         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3836
3837         weight[i] = w;
3838 //        w=weight[i] = (63*qns + (w/2)) / w;
3839
3840         av_assert2(w>0);
3841         av_assert2(w<(1<<6));
3842         sum += w*w;
3843     }
3844     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3845 #ifdef REFINE_STATS
3846 {START_TIMER
3847 #endif
3848     run=0;
3849     rle_index=0;
3850     for(i=start_i; i<=last_non_zero; i++){
3851         int j= perm_scantable[i];
3852         const int level= block[j];
3853         int coeff;
3854
3855         if(level){
3856             if(level<0) coeff= qmul*level - qadd;
3857             else        coeff= qmul*level + qadd;
3858             run_tab[rle_index++]=run;
3859             run=0;
3860
3861             s->dsp.add_8x8basis(rem, basis[j], coeff);
3862         }else{
3863             run++;
3864         }
3865     }
3866 #ifdef REFINE_STATS
3867 if(last_non_zero>0){
3868 STOP_TIMER("init rem[]")
3869 }
3870 }
3871
3872 {START_TIMER
3873 #endif
3874     for(;;){
3875         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3876         int best_coeff=0;
3877         int best_change=0;
3878         int run2, best_unquant_change=0, analyze_gradient;
3879 #ifdef REFINE_STATS
3880 {START_TIMER
3881 #endif
3882         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3883
3884         if(analyze_gradient){
3885 #ifdef REFINE_STATS
3886 {START_TIMER
3887 #endif
3888             for(i=0; i<64; i++){
3889                 int w= weight[i];
3890
3891                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3892             }
3893 #ifdef REFINE_STATS
3894 STOP_TIMER("rem*w*w")}
3895 {START_TIMER
3896 #endif
3897             s->dsp.fdct(d1);
3898 #ifdef REFINE_STATS
3899 STOP_TIMER("dct")}
3900 #endif
3901         }
3902
3903         if(start_i){
3904             const int level= block[0];
3905             int change, old_coeff;
3906
3907             av_assert2(s->mb_intra);
3908
3909             old_coeff= q*level;
3910
3911             for(change=-1; change<=1; change+=2){
3912                 int new_level= level + change;
3913                 int score, new_coeff;
3914
3915                 new_coeff= q*new_level;
3916                 if(new_coeff >= 2048 || new_coeff < 0)
3917                     continue;
3918
3919                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3920                 if(score<best_score){
3921                     best_score= score;
3922                     best_coeff= 0;
3923                     best_change= change;
3924                     best_unquant_change= new_coeff - old_coeff;
3925                 }
3926             }
3927         }
3928
3929         run=0;
3930         rle_index=0;
3931         run2= run_tab[rle_index++];
3932         prev_level=0;
3933         prev_run=0;
3934
3935         for(i=start_i; i<64; i++){
3936             int j= perm_scantable[i];
3937             const int level= block[j];
3938             int change, old_coeff;
3939
3940             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3941                 break;
3942
3943             if(level){
3944                 if(level<0) old_coeff= qmul*level - qadd;
3945                 else        old_coeff= qmul*level + qadd;
3946                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3947             }else{
3948                 old_coeff=0;
3949                 run2--;
3950                 av_assert2(run2>=0 || i >= last_non_zero );
3951             }
3952
3953             for(change=-1; change<=1; change+=2){
3954                 int new_level= level + change;
3955                 int score, new_coeff, unquant_change;
3956
3957                 score=0;
3958                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3959                    continue;
3960
3961                 if(new_level){
3962                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3963                     else            new_coeff= qmul*new_level + qadd;
3964                     if(new_coeff >= 2048 || new_coeff <= -2048)
3965                         continue;
3966                     //FIXME check for overflow
3967
3968                     if(level){
3969                         if(level < 63 && level > -63){
3970                             if(i < last_non_zero)
3971                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3972                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3973                             else
3974                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3975                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3976                         }
3977                     }else{
3978                         av_assert2(FFABS(new_level)==1);
3979
3980                         if(analyze_gradient){
3981                             int g= d1[ scantable[i] ];
3982                             if(g && (g^new_level) >= 0)
3983                                 continue;
3984                         }
3985
3986                         if(i < last_non_zero){
3987                             int next_i= i + run2 + 1;
3988                             int next_level= block[ perm_scantable[next_i] ] + 64;
3989
3990                             if(next_level&(~127))
3991                                 next_level= 0;
3992
3993                             if(next_i < last_non_zero)
3994                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3995                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3996                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3997                             else
3998                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3999                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4000                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4001                         }else{
4002                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4003                             if(prev_level){
4004                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4005                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4006                             }
4007                         }
4008                     }
4009                 }else{
4010                     new_coeff=0;
4011                     av_assert2(FFABS(level)==1);
4012
4013                     if(i < last_non_zero){
4014                         int next_i= i + run2 + 1;
4015                         int next_level= block[ perm_scantable[next_i] ] + 64;
4016
4017                         if(next_level&(~127))
4018                             next_level= 0;
4019
4020                         if(next_i < last_non_zero)
4021                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4022                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4023                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4024                         else
4025                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4026                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4027                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4028                     }else{
4029                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4030                         if(prev_level){
4031                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4032                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4033                         }
4034                     }
4035                 }
4036
4037                 score *= lambda;
4038
4039                 unquant_change= new_coeff - old_coeff;
4040                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4041
4042                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4043                 if(score<best_score){
4044                     best_score= score;
4045                     best_coeff= i;
4046                     best_change= change;
4047                     best_unquant_change= unquant_change;
4048                 }
4049             }
4050             if(level){
4051                 prev_level= level + 64;
4052                 if(prev_level&(~127))
4053                     prev_level= 0;
4054                 prev_run= run;
4055                 run=0;
4056             }else{
4057                 run++;
4058             }
4059         }
4060 #ifdef REFINE_STATS
4061 STOP_TIMER("iterative step")}
4062 #endif
4063
4064         if(best_change){
4065             int j= perm_scantable[ best_coeff ];
4066
4067             block[j] += best_change;
4068
4069             if(best_coeff > last_non_zero){
4070                 last_non_zero= best_coeff;
4071                 av_assert2(block[j]);
4072 #ifdef REFINE_STATS
4073 after_last++;
4074 #endif
4075             }else{
4076 #ifdef REFINE_STATS
4077 if(block[j]){
4078     if(block[j] - best_change){
4079         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4080             raise++;
4081         }else{
4082             lower++;
4083         }
4084     }else{
4085         from_zero++;
4086     }
4087 }else{
4088     to_zero++;
4089 }
4090 #endif
4091                 for(; last_non_zero>=start_i; last_non_zero--){
4092                     if(block[perm_scantable[last_non_zero]])
4093                         break;
4094                 }
4095             }
4096 #ifdef REFINE_STATS
4097 count++;
4098 if(256*256*256*64 % count == 0){
4099     printf("after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4100 }
4101 #endif
4102             run=0;
4103             rle_index=0;
4104             for(i=start_i; i<=last_non_zero; i++){
4105                 int j= perm_scantable[i];
4106                 const int level= block[j];
4107
4108                  if(level){
4109                      run_tab[rle_index++]=run;
4110                      run=0;
4111                  }else{
4112                      run++;
4113                  }
4114             }
4115
4116             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4117         }else{
4118             break;
4119         }
4120     }
4121 #ifdef REFINE_STATS
4122 if(last_non_zero>0){
4123 STOP_TIMER("iterative search")
4124 }
4125 }
4126 #endif
4127
4128     return last_non_zero;
4129 }
4130
4131 int ff_dct_quantize_c(MpegEncContext *s,
4132                         DCTELEM *block, int n,
4133                         int qscale, int *overflow)
4134 {
4135     int i, j, level, last_non_zero, q, start_i;
4136     const int *qmat;
4137     const uint8_t *scantable= s->intra_scantable.scantable;
4138     int bias;
4139     int max=0;
4140     unsigned int threshold1, threshold2;
4141
4142     s->dsp.fdct (block);
4143
4144     if(s->dct_error_sum)
4145         s->denoise_dct(s, block);
4146
4147     if (s->mb_intra) {
4148         if (!s->h263_aic) {
4149             if (n < 4)
4150                 q = s->y_dc_scale;
4151             else
4152                 q = s->c_dc_scale;
4153             q = q << 3;
4154         } else
4155             /* For AIC we skip quant/dequant of INTRADC */
4156             q = 1 << 3;
4157
4158         /* note: block[0] is assumed to be positive */
4159         block[0] = (block[0] + (q >> 1)) / q;
4160         start_i = 1;
4161         last_non_zero = 0;
4162         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4163         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4164     } else {
4165         start_i = 0;
4166         last_non_zero = -1;
4167         qmat = s->q_inter_matrix[qscale];
4168         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4169     }
4170     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4171     threshold2= (threshold1<<1);
4172     for(i=63;i>=start_i;i--) {
4173         j = scantable[i];
4174         level = block[j] * qmat[j];
4175
4176         if(((unsigned)(level+threshold1))>threshold2){
4177             last_non_zero = i;
4178             break;
4179         }else{
4180             block[j]=0;
4181         }
4182     }
4183     for(i=start_i; i<=last_non_zero; i++) {
4184         j = scantable[i];
4185         level = block[j] * qmat[j];
4186
4187 //        if(   bias+level >= (1<<QMAT_SHIFT)
4188 //           || bias-level >= (1<<QMAT_SHIFT)){
4189         if(((unsigned)(level+threshold1))>threshold2){
4190             if(level>0){
4191                 level= (bias + level)>>QMAT_SHIFT;
4192                 block[j]= level;
4193             }else{
4194                 level= (bias - level)>>QMAT_SHIFT;
4195                 block[j]= -level;
4196             }
4197             max |=level;
4198         }else{
4199             block[j]=0;
4200         }
4201     }
4202     *overflow= s->max_qcoeff < max; //overflow might have happened
4203
4204     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4205     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4206         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4207
4208     return last_non_zero;
4209 }
4210
4211 #define OFFSET(x) offsetof(MpegEncContext, x)
4212 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4213 static const AVOption h263_options[] = {
4214     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4215     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4216     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4217     FF_MPV_COMMON_OPTS
4218     { NULL },
4219 };
4220
4221 static const AVClass h263_class = {
4222     .class_name = "H.263 encoder",
4223     .item_name  = av_default_item_name,
4224     .option     = h263_options,
4225     .version    = LIBAVUTIL_VERSION_INT,
4226 };
4227
4228 AVCodec ff_h263_encoder = {
4229     .name           = "h263",
4230     .type           = AVMEDIA_TYPE_VIDEO,
4231     .id             = AV_CODEC_ID_H263,
4232     .priv_data_size = sizeof(MpegEncContext),
4233     .init           = ff_MPV_encode_init,
4234     .encode2        = ff_MPV_encode_picture,
4235     .close          = ff_MPV_encode_end,
4236     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4237     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4238     .priv_class     = &h263_class,
4239 };
4240
4241 static const AVOption h263p_options[] = {
4242     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4243     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4244     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4245     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4246     FF_MPV_COMMON_OPTS
4247     { NULL },
4248 };
4249 static const AVClass h263p_class = {
4250     .class_name = "H.263p encoder",
4251     .item_name  = av_default_item_name,
4252     .option     = h263p_options,
4253     .version    = LIBAVUTIL_VERSION_INT,
4254 };
4255
4256 AVCodec ff_h263p_encoder = {
4257     .name           = "h263p",
4258     .type           = AVMEDIA_TYPE_VIDEO,
4259     .id             = AV_CODEC_ID_H263P,
4260     .priv_data_size = sizeof(MpegEncContext),
4261     .init           = ff_MPV_encode_init,
4262     .encode2        = ff_MPV_encode_picture,
4263     .close          = ff_MPV_encode_end,
4264     .capabilities   = CODEC_CAP_SLICE_THREADS,
4265     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4266     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4267     .priv_class     = &h263p_class,
4268 };
4269
4270 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4271
4272 AVCodec ff_msmpeg4v2_encoder = {
4273     .name           = "msmpeg4v2",
4274     .type           = AVMEDIA_TYPE_VIDEO,
4275     .id             = AV_CODEC_ID_MSMPEG4V2,
4276     .priv_data_size = sizeof(MpegEncContext),
4277     .init           = ff_MPV_encode_init,
4278     .encode2        = ff_MPV_encode_picture,
4279     .close          = ff_MPV_encode_end,
4280     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4281     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4282     .priv_class     = &msmpeg4v2_class,
4283 };
4284
4285 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4286
4287 AVCodec ff_msmpeg4v3_encoder = {
4288     .name           = "msmpeg4",
4289     .type           = AVMEDIA_TYPE_VIDEO,
4290     .id             = AV_CODEC_ID_MSMPEG4V3,
4291     .priv_data_size = sizeof(MpegEncContext),
4292     .init           = ff_MPV_encode_init,
4293     .encode2        = ff_MPV_encode_picture,
4294     .close          = ff_MPV_encode_end,
4295     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4296     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4297     .priv_class     = &msmpeg4v3_class,
4298 };
4299
4300 FF_MPV_GENERIC_CLASS(wmv1)
4301
4302 AVCodec ff_wmv1_encoder = {
4303     .name           = "wmv1",
4304     .type           = AVMEDIA_TYPE_VIDEO,
4305     .id             = AV_CODEC_ID_WMV1,
4306     .priv_data_size = sizeof(MpegEncContext),
4307     .init           = ff_MPV_encode_init,
4308     .encode2        = ff_MPV_encode_picture,
4309     .close          = ff_MPV_encode_end,
4310     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4311     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4312     .priv_class     = &wmv1_class,
4313 };