]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit 'f36d7831d96aeb072db5a2b78892a534d96e288e'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/internal.h"
31 #include "libavutil/intmath.h"
32 #include "libavutil/mathematics.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "dct.h"
37 #include "dsputil.h"
38 #include "mpeg12.h"
39 #include "mpegvideo.h"
40 #include "h261.h"
41 #include "h263.h"
42 #include "mathops.h"
43 #include "mjpegenc.h"
44 #include "msmpeg4.h"
45 #include "faandct.h"
46 #include "thread.h"
47 #include "aandcttab.h"
48 #include "flv.h"
49 #include "mpeg4video.h"
50 #include "internal.h"
51 #include "bytestream.h"
52 #include <limits.h>
53 #include "sp5x.h"
54
55 static int encode_picture(MpegEncContext *s, int picture_number);
56 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
57 static int sse_mb(MpegEncContext *s);
58 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
59 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
60
61 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
62 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
63
64 const AVOption ff_mpv_generic_options[] = {
65     FF_MPV_COMMON_OPTS
66     { NULL },
67 };
68
69 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
70                        uint16_t (*qmat16)[2][64],
71                        const uint16_t *quant_matrix,
72                        int bias, int qmin, int qmax, int intra)
73 {
74     int qscale;
75     int shift = 0;
76
77     for (qscale = qmin; qscale <= qmax; qscale++) {
78         int i;
79         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
80             dsp->fdct == ff_jpeg_fdct_islow_10 ||
81             dsp->fdct == ff_faandct) {
82             for (i = 0; i < 64; i++) {
83                 const int j = dsp->idct_permutation[i];
84                 /* 16 <= qscale * quant_matrix[i] <= 7905
85                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
86                  *             19952 <=              x  <= 249205026
87                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
88                  *           3444240 >= (1 << 36) / (x) >= 275 */
89
90                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
91                                         (qscale * quant_matrix[j]));
92             }
93         } else if (dsp->fdct == ff_fdct_ifast) {
94             for (i = 0; i < 64; i++) {
95                 const int j = dsp->idct_permutation[i];
96                 /* 16 <= qscale * quant_matrix[i] <= 7905
97                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
98                  *             19952 <=              x  <= 249205026
99                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
100                  *           3444240 >= (1 << 36) / (x) >= 275 */
101
102                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
103                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
104             }
105         } else {
106             for (i = 0; i < 64; i++) {
107                 const int j = dsp->idct_permutation[i];
108                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
109                  * Assume x = qscale * quant_matrix[i]
110                  * So             16 <=              x  <= 7905
111                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
112                  * so          32768 >= (1 << 19) / (x) >= 67 */
113                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
114                                         (qscale * quant_matrix[j]));
115                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
116                 //                    (qscale * quant_matrix[i]);
117                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
118                                        (qscale * quant_matrix[j]);
119
120                 if (qmat16[qscale][0][i] == 0 ||
121                     qmat16[qscale][0][i] == 128 * 256)
122                     qmat16[qscale][0][i] = 128 * 256 - 1;
123                 qmat16[qscale][1][i] =
124                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
125                                 qmat16[qscale][0][i]);
126             }
127         }
128
129         for (i = intra; i < 64; i++) {
130             int64_t max = 8191;
131             if (dsp->fdct == ff_fdct_ifast) {
132                 max = (8191LL * ff_aanscales[i]) >> 14;
133             }
134             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
135                 shift++;
136             }
137         }
138     }
139     if (shift) {
140         av_log(NULL, AV_LOG_INFO,
141                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
142                QMAT_SHIFT - shift);
143     }
144 }
145
146 static inline void update_qscale(MpegEncContext *s)
147 {
148     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
149                 (FF_LAMBDA_SHIFT + 7);
150     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
151
152     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
153                  FF_LAMBDA_SHIFT;
154 }
155
156 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
157 {
158     int i;
159
160     if (matrix) {
161         put_bits(pb, 1, 1);
162         for (i = 0; i < 64; i++) {
163             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
164         }
165     } else
166         put_bits(pb, 1, 0);
167 }
168
169 /**
170  * init s->current_picture.qscale_table from s->lambda_table
171  */
172 void ff_init_qscale_tab(MpegEncContext *s)
173 {
174     int8_t * const qscale_table = s->current_picture.qscale_table;
175     int i;
176
177     for (i = 0; i < s->mb_num; i++) {
178         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
179         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
180         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
181                                                   s->avctx->qmax);
182     }
183 }
184
185 static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst,
186                                     const AVFrame *src)
187 {
188     dst->pict_type              = src->pict_type;
189     dst->quality                = src->quality;
190     dst->coded_picture_number   = src->coded_picture_number;
191     dst->display_picture_number = src->display_picture_number;
192     //dst->reference              = src->reference;
193     dst->pts                    = src->pts;
194     dst->interlaced_frame       = src->interlaced_frame;
195     dst->top_field_first        = src->top_field_first;
196 }
197
198 static void update_duplicate_context_after_me(MpegEncContext *dst,
199                                               MpegEncContext *src)
200 {
201 #define COPY(a) dst->a= src->a
202     COPY(pict_type);
203     COPY(current_picture);
204     COPY(f_code);
205     COPY(b_code);
206     COPY(qscale);
207     COPY(lambda);
208     COPY(lambda2);
209     COPY(picture_in_gop_number);
210     COPY(gop_picture_number);
211     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
212     COPY(progressive_frame);    // FIXME don't set in encode_header
213     COPY(partitioned_frame);    // FIXME don't set in encode_header
214 #undef COPY
215 }
216
217 /**
218  * Set the given MpegEncContext to defaults for encoding.
219  * the changed fields will not depend upon the prior state of the MpegEncContext.
220  */
221 static void MPV_encode_defaults(MpegEncContext *s)
222 {
223     int i;
224     ff_MPV_common_defaults(s);
225
226     for (i = -16; i < 16; i++) {
227         default_fcode_tab[i + MAX_MV] = 1;
228     }
229     s->me.mv_penalty = default_mv_penalty;
230     s->fcode_tab     = default_fcode_tab;
231 }
232
233 av_cold int ff_dct_encode_init(MpegEncContext *s) {
234     if (ARCH_X86)
235         ff_dct_encode_init_x86(s);
236
237     if (!s->dct_quantize)
238         s->dct_quantize = ff_dct_quantize_c;
239     if (!s->denoise_dct)
240         s->denoise_dct  = denoise_dct_c;
241     s->fast_dct_quantize = s->dct_quantize;
242     if (s->avctx->trellis)
243         s->dct_quantize  = dct_quantize_trellis_c;
244
245     return 0;
246 }
247
248 /* init video encoder */
249 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
250 {
251     MpegEncContext *s = avctx->priv_data;
252     int i;
253     int chroma_h_shift, chroma_v_shift;
254
255     MPV_encode_defaults(s);
256
257     switch (avctx->codec_id) {
258     case AV_CODEC_ID_MPEG2VIDEO:
259         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
260             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
261             av_log(avctx, AV_LOG_ERROR,
262                    "only YUV420 and YUV422 are supported\n");
263             return -1;
264         }
265         break;
266     case AV_CODEC_ID_LJPEG:
267         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
268             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
269             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
270             avctx->pix_fmt != AV_PIX_FMT_BGR0     &&
271             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
272             avctx->pix_fmt != AV_PIX_FMT_BGR24    &&
273             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
274               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
275               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
276              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
277             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
278             return -1;
279         }
280         break;
281     case AV_CODEC_ID_MJPEG:
282     case AV_CODEC_ID_AMV:
283         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
284             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
285             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
286             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
287               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
288               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
289              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
290             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
291             return -1;
292         }
293         break;
294     default:
295         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
296             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
297             return -1;
298         }
299     }
300
301     switch (avctx->pix_fmt) {
302     case AV_PIX_FMT_YUVJ444P:
303     case AV_PIX_FMT_YUV444P:
304         s->chroma_format = CHROMA_444;
305         break;
306     case AV_PIX_FMT_YUVJ422P:
307     case AV_PIX_FMT_YUV422P:
308         s->chroma_format = CHROMA_422;
309         break;
310     case AV_PIX_FMT_YUVJ420P:
311     case AV_PIX_FMT_YUV420P:
312     default:
313         s->chroma_format = CHROMA_420;
314         break;
315     }
316
317     s->bit_rate = avctx->bit_rate;
318     s->width    = avctx->width;
319     s->height   = avctx->height;
320     if (avctx->gop_size > 600 &&
321         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
322         av_log(avctx, AV_LOG_WARNING,
323                "keyframe interval too large!, reducing it from %d to %d\n",
324                avctx->gop_size, 600);
325         avctx->gop_size = 600;
326     }
327     s->gop_size     = avctx->gop_size;
328     s->avctx        = avctx;
329     s->flags        = avctx->flags;
330     s->flags2       = avctx->flags2;
331     s->max_b_frames = avctx->max_b_frames;
332     s->codec_id     = avctx->codec->id;
333     s->strict_std_compliance = avctx->strict_std_compliance;
334     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
335     s->mpeg_quant         = avctx->mpeg_quant;
336     s->rtp_mode           = !!avctx->rtp_payload_size;
337     s->intra_dc_precision = avctx->intra_dc_precision;
338     s->user_specified_pts = AV_NOPTS_VALUE;
339
340     if (s->gop_size <= 1) {
341         s->intra_only = 1;
342         s->gop_size   = 12;
343     } else {
344         s->intra_only = 0;
345     }
346
347     s->me_method = avctx->me_method;
348
349     /* Fixed QSCALE */
350     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
351
352     s->adaptive_quant = (s->avctx->lumi_masking ||
353                          s->avctx->dark_masking ||
354                          s->avctx->temporal_cplx_masking ||
355                          s->avctx->spatial_cplx_masking  ||
356                          s->avctx->p_masking      ||
357                          s->avctx->border_masking ||
358                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
359                         !s->fixed_qscale;
360
361     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
362
363     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
364         switch(avctx->codec_id) {
365         case AV_CODEC_ID_MPEG1VIDEO:
366         case AV_CODEC_ID_MPEG2VIDEO:
367             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
368             break;
369         case AV_CODEC_ID_MPEG4:
370         case AV_CODEC_ID_MSMPEG4V1:
371         case AV_CODEC_ID_MSMPEG4V2:
372         case AV_CODEC_ID_MSMPEG4V3:
373             if       (avctx->rc_max_rate >= 15000000) {
374                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
375             } else if(avctx->rc_max_rate >=  2000000) {
376                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
377             } else if(avctx->rc_max_rate >=   384000) {
378                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
379             } else
380                 avctx->rc_buffer_size = 40;
381             avctx->rc_buffer_size *= 16384;
382             break;
383         }
384         if (avctx->rc_buffer_size) {
385             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
386         }
387     }
388
389     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
390         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
391         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
392             return -1;
393     }
394
395     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
396         av_log(avctx, AV_LOG_INFO,
397                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
398     }
399
400     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
401         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
402         return -1;
403     }
404
405     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
406         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
407         return -1;
408     }
409
410     if (avctx->rc_max_rate &&
411         avctx->rc_max_rate == avctx->bit_rate &&
412         avctx->rc_max_rate != avctx->rc_min_rate) {
413         av_log(avctx, AV_LOG_INFO,
414                "impossible bitrate constraints, this will fail\n");
415     }
416
417     if (avctx->rc_buffer_size &&
418         avctx->bit_rate * (int64_t)avctx->time_base.num >
419             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
420         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
421         return -1;
422     }
423
424     if (!s->fixed_qscale &&
425         avctx->bit_rate * av_q2d(avctx->time_base) >
426             avctx->bit_rate_tolerance) {
427         av_log(avctx, AV_LOG_ERROR,
428                "bitrate tolerance too small for bitrate\n");
429         return -1;
430     }
431
432     if (s->avctx->rc_max_rate &&
433         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
434         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
435          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
436         90000LL * (avctx->rc_buffer_size - 1) >
437             s->avctx->rc_max_rate * 0xFFFFLL) {
438         av_log(avctx, AV_LOG_INFO,
439                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
440                "specified vbv buffer is too large for the given bitrate!\n");
441     }
442
443     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
444         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
445         s->codec_id != AV_CODEC_ID_FLV1) {
446         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
447         return -1;
448     }
449
450     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
451         av_log(avctx, AV_LOG_ERROR,
452                "OBMC is only supported with simple mb decision\n");
453         return -1;
454     }
455
456     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
457         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
458         return -1;
459     }
460
461     if (s->max_b_frames                    &&
462         s->codec_id != AV_CODEC_ID_MPEG4      &&
463         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
464         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
465         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
466         return -1;
467     }
468
469     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
470          s->codec_id == AV_CODEC_ID_H263  ||
471          s->codec_id == AV_CODEC_ID_H263P) &&
472         (avctx->sample_aspect_ratio.num > 255 ||
473          avctx->sample_aspect_ratio.den > 255)) {
474         av_log(avctx, AV_LOG_WARNING,
475                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
476                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
477         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
478                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
479     }
480
481     if ((s->codec_id == AV_CODEC_ID_H263  ||
482          s->codec_id == AV_CODEC_ID_H263P) &&
483         (avctx->width  > 2048 ||
484          avctx->height > 1152 )) {
485         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
486         return -1;
487     }
488     if ((s->codec_id == AV_CODEC_ID_H263  ||
489          s->codec_id == AV_CODEC_ID_H263P) &&
490         ((avctx->width &3) ||
491          (avctx->height&3) )) {
492         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
493         return -1;
494     }
495
496     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
497         (avctx->width  > 4095 ||
498          avctx->height > 4095 )) {
499         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
500         return -1;
501     }
502
503     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
504         (avctx->width  > 16383 ||
505          avctx->height > 16383 )) {
506         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
507         return -1;
508     }
509
510     if (s->codec_id == AV_CODEC_ID_RV10 &&
511         (avctx->width &15 ||
512          avctx->height&15 )) {
513         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
514         return AVERROR(EINVAL);
515     }
516
517     if (s->codec_id == AV_CODEC_ID_RV20 &&
518         (avctx->width &3 ||
519          avctx->height&3 )) {
520         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
521         return AVERROR(EINVAL);
522     }
523
524     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
525          s->codec_id == AV_CODEC_ID_WMV2) &&
526          avctx->width & 1) {
527          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
528          return -1;
529     }
530
531     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
532         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
533         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
534         return -1;
535     }
536
537     // FIXME mpeg2 uses that too
538     if (s->mpeg_quant && s->codec_id != AV_CODEC_ID_MPEG4) {
539         av_log(avctx, AV_LOG_ERROR,
540                "mpeg2 style quantization not supported by codec\n");
541         return -1;
542     }
543
544     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
545         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
546         return -1;
547     }
548
549     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
550         s->avctx->mb_decision != FF_MB_DECISION_RD) {
551         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
552         return -1;
553     }
554
555     if (s->avctx->scenechange_threshold < 1000000000 &&
556         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
557         av_log(avctx, AV_LOG_ERROR,
558                "closed gop with scene change detection are not supported yet, "
559                "set threshold to 1000000000\n");
560         return -1;
561     }
562
563     if (s->flags & CODEC_FLAG_LOW_DELAY) {
564         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
565             av_log(avctx, AV_LOG_ERROR,
566                   "low delay forcing is only available for mpeg2\n");
567             return -1;
568         }
569         if (s->max_b_frames != 0) {
570             av_log(avctx, AV_LOG_ERROR,
571                    "b frames cannot be used with low delay\n");
572             return -1;
573         }
574     }
575
576     if (s->q_scale_type == 1) {
577         if (avctx->qmax > 12) {
578             av_log(avctx, AV_LOG_ERROR,
579                    "non linear quant only supports qmax <= 12 currently\n");
580             return -1;
581         }
582     }
583
584     if (s->avctx->thread_count > 1         &&
585         s->codec_id != AV_CODEC_ID_MPEG4      &&
586         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
587         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
588         s->codec_id != AV_CODEC_ID_MJPEG      &&
589         (s->codec_id != AV_CODEC_ID_H263P)) {
590         av_log(avctx, AV_LOG_ERROR,
591                "multi threaded encoding not supported by codec\n");
592         return -1;
593     }
594
595     if (s->avctx->thread_count < 1) {
596         av_log(avctx, AV_LOG_ERROR,
597                "automatic thread number detection not supported by codec, "
598                "patch welcome\n");
599         return -1;
600     }
601
602     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
603         s->rtp_mode = 1;
604
605     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
606         s->h263_slice_structured = 1;
607
608     if (!avctx->time_base.den || !avctx->time_base.num) {
609         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
610         return -1;
611     }
612
613     i = (INT_MAX / 2 + 128) >> 8;
614     if (avctx->mb_threshold >= i) {
615         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
616                i - 1);
617         return -1;
618     }
619
620     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
621         av_log(avctx, AV_LOG_INFO,
622                "notice: b_frame_strategy only affects the first pass\n");
623         avctx->b_frame_strategy = 0;
624     }
625
626     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
627     if (i > 1) {
628         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
629         avctx->time_base.den /= i;
630         avctx->time_base.num /= i;
631         //return -1;
632     }
633
634     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
635         // (a + x * 3 / 8) / x
636         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
637         s->inter_quant_bias = 0;
638     } else {
639         s->intra_quant_bias = 0;
640         // (a - x / 4) / x
641         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
642     }
643
644     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
645         s->intra_quant_bias = avctx->intra_quant_bias;
646     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
647         s->inter_quant_bias = avctx->inter_quant_bias;
648
649     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
650
651     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
652
653     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
654         s->avctx->time_base.den > (1 << 16) - 1) {
655         av_log(avctx, AV_LOG_ERROR,
656                "timebase %d/%d not supported by MPEG 4 standard, "
657                "the maximum admitted value for the timebase denominator "
658                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
659                (1 << 16) - 1);
660         return -1;
661     }
662     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
663
664     switch (avctx->codec->id) {
665     case AV_CODEC_ID_MPEG1VIDEO:
666         s->out_format = FMT_MPEG1;
667         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
668         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
669         break;
670     case AV_CODEC_ID_MPEG2VIDEO:
671         s->out_format = FMT_MPEG1;
672         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
673         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
674         s->rtp_mode   = 1;
675         break;
676     case AV_CODEC_ID_LJPEG:
677     case AV_CODEC_ID_MJPEG:
678     case AV_CODEC_ID_AMV:
679         s->out_format = FMT_MJPEG;
680         s->intra_only = 1; /* force intra only for jpeg */
681         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
682             (avctx->pix_fmt == AV_PIX_FMT_BGR0
683              || s->avctx->pix_fmt == AV_PIX_FMT_BGRA
684              || s->avctx->pix_fmt == AV_PIX_FMT_BGR24)) {
685             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
686             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
687             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
688         } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P || avctx->pix_fmt == AV_PIX_FMT_YUVJ444P) {
689             s->mjpeg_vsample[0] = s->mjpeg_vsample[1] = s->mjpeg_vsample[2] = 2;
690             s->mjpeg_hsample[0] = s->mjpeg_hsample[1] = s->mjpeg_hsample[2] = 1;
691         } else {
692             s->mjpeg_vsample[0] = 2;
693             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
694             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
695             s->mjpeg_hsample[0] = 2;
696             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
697             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
698         }
699         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
700             ff_mjpeg_encode_init(s) < 0)
701             return -1;
702         avctx->delay = 0;
703         s->low_delay = 1;
704         break;
705     case AV_CODEC_ID_H261:
706         if (!CONFIG_H261_ENCODER)
707             return -1;
708         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
709             av_log(avctx, AV_LOG_ERROR,
710                    "The specified picture size of %dx%d is not valid for the "
711                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
712                     s->width, s->height);
713             return -1;
714         }
715         s->out_format = FMT_H261;
716         avctx->delay  = 0;
717         s->low_delay  = 1;
718         break;
719     case AV_CODEC_ID_H263:
720         if (!CONFIG_H263_ENCODER)
721             return -1;
722         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
723                              s->width, s->height) == 8) {
724             av_log(avctx, AV_LOG_ERROR,
725                    "The specified picture size of %dx%d is not valid for "
726                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
727                    "352x288, 704x576, and 1408x1152. "
728                    "Try H.263+.\n", s->width, s->height);
729             return -1;
730         }
731         s->out_format = FMT_H263;
732         avctx->delay  = 0;
733         s->low_delay  = 1;
734         break;
735     case AV_CODEC_ID_H263P:
736         s->out_format = FMT_H263;
737         s->h263_plus  = 1;
738         /* Fx */
739         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
740         s->modified_quant  = s->h263_aic;
741         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
742         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
743
744         /* /Fx */
745         /* These are just to be sure */
746         avctx->delay = 0;
747         s->low_delay = 1;
748         break;
749     case AV_CODEC_ID_FLV1:
750         s->out_format      = FMT_H263;
751         s->h263_flv        = 2; /* format = 1; 11-bit codes */
752         s->unrestricted_mv = 1;
753         s->rtp_mode  = 0; /* don't allow GOB */
754         avctx->delay = 0;
755         s->low_delay = 1;
756         break;
757     case AV_CODEC_ID_RV10:
758         s->out_format = FMT_H263;
759         avctx->delay  = 0;
760         s->low_delay  = 1;
761         break;
762     case AV_CODEC_ID_RV20:
763         s->out_format      = FMT_H263;
764         avctx->delay       = 0;
765         s->low_delay       = 1;
766         s->modified_quant  = 1;
767         s->h263_aic        = 1;
768         s->h263_plus       = 1;
769         s->loop_filter     = 1;
770         s->unrestricted_mv = 0;
771         break;
772     case AV_CODEC_ID_MPEG4:
773         s->out_format      = FMT_H263;
774         s->h263_pred       = 1;
775         s->unrestricted_mv = 1;
776         s->low_delay       = s->max_b_frames ? 0 : 1;
777         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
778         break;
779     case AV_CODEC_ID_MSMPEG4V2:
780         s->out_format      = FMT_H263;
781         s->h263_pred       = 1;
782         s->unrestricted_mv = 1;
783         s->msmpeg4_version = 2;
784         avctx->delay       = 0;
785         s->low_delay       = 1;
786         break;
787     case AV_CODEC_ID_MSMPEG4V3:
788         s->out_format        = FMT_H263;
789         s->h263_pred         = 1;
790         s->unrestricted_mv   = 1;
791         s->msmpeg4_version   = 3;
792         s->flipflop_rounding = 1;
793         avctx->delay         = 0;
794         s->low_delay         = 1;
795         break;
796     case AV_CODEC_ID_WMV1:
797         s->out_format        = FMT_H263;
798         s->h263_pred         = 1;
799         s->unrestricted_mv   = 1;
800         s->msmpeg4_version   = 4;
801         s->flipflop_rounding = 1;
802         avctx->delay         = 0;
803         s->low_delay         = 1;
804         break;
805     case AV_CODEC_ID_WMV2:
806         s->out_format        = FMT_H263;
807         s->h263_pred         = 1;
808         s->unrestricted_mv   = 1;
809         s->msmpeg4_version   = 5;
810         s->flipflop_rounding = 1;
811         avctx->delay         = 0;
812         s->low_delay         = 1;
813         break;
814     default:
815         return -1;
816     }
817
818     avctx->has_b_frames = !s->low_delay;
819
820     s->encoding = 1;
821
822     s->progressive_frame    =
823     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
824                                                 CODEC_FLAG_INTERLACED_ME) ||
825                                 s->alternate_scan);
826
827     /* init */
828     if (ff_MPV_common_init(s) < 0)
829         return -1;
830
831     ff_dct_encode_init(s);
832
833     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
834         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
835
836     s->quant_precision = 5;
837
838     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
839     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
840
841     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
842         ff_h261_encode_init(s);
843     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
844         ff_h263_encode_init(s);
845     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
846         ff_msmpeg4_encode_init(s);
847     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
848         && s->out_format == FMT_MPEG1)
849         ff_mpeg1_encode_init(s);
850
851     /* init q matrix */
852     for (i = 0; i < 64; i++) {
853         int j = s->dsp.idct_permutation[i];
854         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
855             s->mpeg_quant) {
856             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
857             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
858         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
859             s->intra_matrix[j] =
860             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
861         } else {
862             /* mpeg1/2 */
863             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
864             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
865         }
866         if (s->avctx->intra_matrix)
867             s->intra_matrix[j] = s->avctx->intra_matrix[i];
868         if (s->avctx->inter_matrix)
869             s->inter_matrix[j] = s->avctx->inter_matrix[i];
870     }
871
872     /* precompute matrix */
873     /* for mjpeg, we do include qscale in the matrix */
874     if (s->out_format != FMT_MJPEG) {
875         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
876                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
877                           31, 1);
878         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
879                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
880                           31, 0);
881     }
882
883     if (ff_rate_control_init(s) < 0)
884         return -1;
885
886     return 0;
887 }
888
889 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
890 {
891     MpegEncContext *s = avctx->priv_data;
892
893     ff_rate_control_uninit(s);
894
895     ff_MPV_common_end(s);
896     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
897         s->out_format == FMT_MJPEG)
898         ff_mjpeg_encode_close(s);
899
900     av_freep(&avctx->extradata);
901
902     return 0;
903 }
904
905 static int get_sae(uint8_t *src, int ref, int stride)
906 {
907     int x,y;
908     int acc = 0;
909
910     for (y = 0; y < 16; y++) {
911         for (x = 0; x < 16; x++) {
912             acc += FFABS(src[x + y * stride] - ref);
913         }
914     }
915
916     return acc;
917 }
918
919 static int get_intra_count(MpegEncContext *s, uint8_t *src,
920                            uint8_t *ref, int stride)
921 {
922     int x, y, w, h;
923     int acc = 0;
924
925     w = s->width  & ~15;
926     h = s->height & ~15;
927
928     for (y = 0; y < h; y += 16) {
929         for (x = 0; x < w; x += 16) {
930             int offset = x + y * stride;
931             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
932                                      16);
933             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
934             int sae  = get_sae(src + offset, mean, stride);
935
936             acc += sae + 500 < sad;
937         }
938     }
939     return acc;
940 }
941
942
943 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
944 {
945     Picture *pic = NULL;
946     int64_t pts;
947     int i, display_picture_number = 0, ret;
948     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
949                                                  (s->low_delay ? 0 : 1);
950     int direct = 1;
951
952     if (pic_arg) {
953         pts = pic_arg->pts;
954         display_picture_number = s->input_picture_number++;
955
956         if (pts != AV_NOPTS_VALUE) {
957             if (s->user_specified_pts != AV_NOPTS_VALUE) {
958                 int64_t time = pts;
959                 int64_t last = s->user_specified_pts;
960
961                 if (time <= last) {
962                     av_log(s->avctx, AV_LOG_ERROR,
963                            "Error, Invalid timestamp=%"PRId64", "
964                            "last=%"PRId64"\n", pts, s->user_specified_pts);
965                     return -1;
966                 }
967
968                 if (!s->low_delay && display_picture_number == 1)
969                     s->dts_delta = time - last;
970             }
971             s->user_specified_pts = pts;
972         } else {
973             if (s->user_specified_pts != AV_NOPTS_VALUE) {
974                 s->user_specified_pts =
975                 pts = s->user_specified_pts + 1;
976                 av_log(s->avctx, AV_LOG_INFO,
977                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
978                        pts);
979             } else {
980                 pts = display_picture_number;
981             }
982         }
983     }
984
985     if (pic_arg) {
986         if (!pic_arg->buf[0])
987             direct = 0;
988         if (pic_arg->linesize[0] != s->linesize)
989             direct = 0;
990         if (pic_arg->linesize[1] != s->uvlinesize)
991             direct = 0;
992         if (pic_arg->linesize[2] != s->uvlinesize)
993             direct = 0;
994
995         av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
996                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
997
998         if (direct) {
999             i = ff_find_unused_picture(s, 1);
1000             if (i < 0)
1001                 return i;
1002
1003             pic = &s->picture[i];
1004             pic->reference = 3;
1005
1006             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
1007                 return ret;
1008             if (ff_alloc_picture(s, pic, 1) < 0) {
1009                 return -1;
1010             }
1011         } else {
1012             i = ff_find_unused_picture(s, 0);
1013             if (i < 0)
1014                 return i;
1015
1016             pic = &s->picture[i];
1017             pic->reference = 3;
1018
1019             if (ff_alloc_picture(s, pic, 0) < 0) {
1020                 return -1;
1021             }
1022
1023             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1024                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1025                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1026                 // empty
1027             } else {
1028                 int h_chroma_shift, v_chroma_shift;
1029                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1030                                                  &h_chroma_shift,
1031                                                  &v_chroma_shift);
1032
1033                 for (i = 0; i < 3; i++) {
1034                     int src_stride = pic_arg->linesize[i];
1035                     int dst_stride = i ? s->uvlinesize : s->linesize;
1036                     int h_shift = i ? h_chroma_shift : 0;
1037                     int v_shift = i ? v_chroma_shift : 0;
1038                     int w = s->width  >> h_shift;
1039                     int h = s->height >> v_shift;
1040                     uint8_t *src = pic_arg->data[i];
1041                     uint8_t *dst = pic->f.data[i];
1042
1043                     if (s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1044                         h = ((s->height + 15)/16*16) >> v_shift;
1045                     }
1046
1047                     if (!s->avctx->rc_buffer_size)
1048                         dst += INPLACE_OFFSET;
1049
1050                     if (src_stride == dst_stride)
1051                         memcpy(dst, src, src_stride * h);
1052                     else {
1053                         int h2 = h;
1054                         uint8_t *dst2 = dst;
1055                         while (h2--) {
1056                             memcpy(dst2, src, w);
1057                             dst2 += dst_stride;
1058                             src += src_stride;
1059                         }
1060                     }
1061                     if ((s->width & 15) || (s->height & 15)) {
1062                         s->dsp.draw_edges(dst, dst_stride,
1063                                           w, h,
1064                                           16>>h_shift,
1065                                           16>>v_shift,
1066                                           EDGE_BOTTOM);
1067                     }
1068                 }
1069             }
1070         }
1071         copy_picture_attributes(s, &pic->f, pic_arg);
1072         pic->f.display_picture_number = display_picture_number;
1073         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
1074     }
1075
1076     /* shift buffer entries */
1077     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1078         s->input_picture[i - 1] = s->input_picture[i];
1079
1080     s->input_picture[encoding_delay] = (Picture*) pic;
1081
1082     return 0;
1083 }
1084
1085 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1086 {
1087     int x, y, plane;
1088     int score = 0;
1089     int64_t score64 = 0;
1090
1091     for (plane = 0; plane < 3; plane++) {
1092         const int stride = p->f.linesize[plane];
1093         const int bw = plane ? 1 : 2;
1094         for (y = 0; y < s->mb_height * bw; y++) {
1095             for (x = 0; x < s->mb_width * bw; x++) {
1096                 int off = p->shared ? 0 : 16;
1097                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1098                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1099                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1100
1101                 switch (s->avctx->frame_skip_exp) {
1102                 case 0: score    =  FFMAX(score, v);          break;
1103                 case 1: score   += FFABS(v);                  break;
1104                 case 2: score   += v * v;                     break;
1105                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1106                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1107                 }
1108             }
1109         }
1110     }
1111
1112     if (score)
1113         score64 = score;
1114
1115     if (score64 < s->avctx->frame_skip_threshold)
1116         return 1;
1117     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1118         return 1;
1119     return 0;
1120 }
1121
1122 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1123 {
1124     AVPacket pkt = { 0 };
1125     int ret, got_output;
1126
1127     av_init_packet(&pkt);
1128     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1129     if (ret < 0)
1130         return ret;
1131
1132     ret = pkt.size;
1133     av_free_packet(&pkt);
1134     return ret;
1135 }
1136
1137 static int estimate_best_b_count(MpegEncContext *s)
1138 {
1139     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1140     AVCodecContext *c = avcodec_alloc_context3(NULL);
1141     AVFrame input[FF_MAX_B_FRAMES + 2];
1142     const int scale = s->avctx->brd_scale;
1143     int i, j, out_size, p_lambda, b_lambda, lambda2;
1144     int64_t best_rd  = INT64_MAX;
1145     int best_b_count = -1;
1146
1147     av_assert0(scale >= 0 && scale <= 3);
1148
1149     //emms_c();
1150     //s->next_picture_ptr->quality;
1151     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1152     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1153     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1154     if (!b_lambda) // FIXME we should do this somewhere else
1155         b_lambda = p_lambda;
1156     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1157                FF_LAMBDA_SHIFT;
1158
1159     c->width        = s->width  >> scale;
1160     c->height       = s->height >> scale;
1161     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1162                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1163     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1164     c->mb_decision  = s->avctx->mb_decision;
1165     c->me_cmp       = s->avctx->me_cmp;
1166     c->mb_cmp       = s->avctx->mb_cmp;
1167     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1168     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1169     c->time_base    = s->avctx->time_base;
1170     c->max_b_frames = s->max_b_frames;
1171
1172     if (avcodec_open2(c, codec, NULL) < 0)
1173         return -1;
1174
1175     for (i = 0; i < s->max_b_frames + 2; i++) {
1176         int ysize = c->width * c->height;
1177         int csize = (c->width / 2) * (c->height / 2);
1178         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1179                                                 s->next_picture_ptr;
1180
1181         avcodec_get_frame_defaults(&input[i]);
1182         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1183         input[i].data[1]     = input[i].data[0] + ysize;
1184         input[i].data[2]     = input[i].data[1] + csize;
1185         input[i].linesize[0] = c->width;
1186         input[i].linesize[1] =
1187         input[i].linesize[2] = c->width / 2;
1188
1189         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1190             pre_input = *pre_input_ptr;
1191
1192             if (!pre_input.shared && i) {
1193                 pre_input.f.data[0] += INPLACE_OFFSET;
1194                 pre_input.f.data[1] += INPLACE_OFFSET;
1195                 pre_input.f.data[2] += INPLACE_OFFSET;
1196             }
1197
1198             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1199                                  pre_input.f.data[0], pre_input.f.linesize[0],
1200                                  c->width,      c->height);
1201             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1202                                  pre_input.f.data[1], pre_input.f.linesize[1],
1203                                  c->width >> 1, c->height >> 1);
1204             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1205                                  pre_input.f.data[2], pre_input.f.linesize[2],
1206                                  c->width >> 1, c->height >> 1);
1207         }
1208     }
1209
1210     for (j = 0; j < s->max_b_frames + 1; j++) {
1211         int64_t rd = 0;
1212
1213         if (!s->input_picture[j])
1214             break;
1215
1216         c->error[0] = c->error[1] = c->error[2] = 0;
1217
1218         input[0].pict_type = AV_PICTURE_TYPE_I;
1219         input[0].quality   = 1 * FF_QP2LAMBDA;
1220
1221         out_size = encode_frame(c, &input[0]);
1222
1223         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1224
1225         for (i = 0; i < s->max_b_frames + 1; i++) {
1226             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1227
1228             input[i + 1].pict_type = is_p ?
1229                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1230             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1231
1232             out_size = encode_frame(c, &input[i + 1]);
1233
1234             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1235         }
1236
1237         /* get the delayed frames */
1238         while (out_size) {
1239             out_size = encode_frame(c, NULL);
1240             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1241         }
1242
1243         rd += c->error[0] + c->error[1] + c->error[2];
1244
1245         if (rd < best_rd) {
1246             best_rd = rd;
1247             best_b_count = j;
1248         }
1249     }
1250
1251     avcodec_close(c);
1252     av_freep(&c);
1253
1254     for (i = 0; i < s->max_b_frames + 2; i++) {
1255         av_freep(&input[i].data[0]);
1256     }
1257
1258     return best_b_count;
1259 }
1260
1261 static int select_input_picture(MpegEncContext *s)
1262 {
1263     int i, ret;
1264
1265     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1266         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1267     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1268
1269     /* set next picture type & ordering */
1270     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1271         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1272             s->next_picture_ptr == NULL || s->intra_only) {
1273             s->reordered_input_picture[0] = s->input_picture[0];
1274             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1275             s->reordered_input_picture[0]->f.coded_picture_number =
1276                 s->coded_picture_number++;
1277         } else {
1278             int b_frames;
1279
1280             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1281                 if (s->picture_in_gop_number < s->gop_size &&
1282                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1283                     // FIXME check that te gop check above is +-1 correct
1284                     av_frame_unref(&s->input_picture[0]->f);
1285
1286                     emms_c();
1287                     ff_vbv_update(s, 0);
1288
1289                     goto no_output_pic;
1290                 }
1291             }
1292
1293             if (s->flags & CODEC_FLAG_PASS2) {
1294                 for (i = 0; i < s->max_b_frames + 1; i++) {
1295                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1296
1297                     if (pict_num >= s->rc_context.num_entries)
1298                         break;
1299                     if (!s->input_picture[i]) {
1300                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1301                         break;
1302                     }
1303
1304                     s->input_picture[i]->f.pict_type =
1305                         s->rc_context.entry[pict_num].new_pict_type;
1306                 }
1307             }
1308
1309             if (s->avctx->b_frame_strategy == 0) {
1310                 b_frames = s->max_b_frames;
1311                 while (b_frames && !s->input_picture[b_frames])
1312                     b_frames--;
1313             } else if (s->avctx->b_frame_strategy == 1) {
1314                 for (i = 1; i < s->max_b_frames + 1; i++) {
1315                     if (s->input_picture[i] &&
1316                         s->input_picture[i]->b_frame_score == 0) {
1317                         s->input_picture[i]->b_frame_score =
1318                             get_intra_count(s,
1319                                             s->input_picture[i    ]->f.data[0],
1320                                             s->input_picture[i - 1]->f.data[0],
1321                                             s->linesize) + 1;
1322                     }
1323                 }
1324                 for (i = 0; i < s->max_b_frames + 1; i++) {
1325                     if (s->input_picture[i] == NULL ||
1326                         s->input_picture[i]->b_frame_score - 1 >
1327                             s->mb_num / s->avctx->b_sensitivity)
1328                         break;
1329                 }
1330
1331                 b_frames = FFMAX(0, i - 1);
1332
1333                 /* reset scores */
1334                 for (i = 0; i < b_frames + 1; i++) {
1335                     s->input_picture[i]->b_frame_score = 0;
1336                 }
1337             } else if (s->avctx->b_frame_strategy == 2) {
1338                 b_frames = estimate_best_b_count(s);
1339             } else {
1340                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1341                 b_frames = 0;
1342             }
1343
1344             emms_c();
1345
1346             for (i = b_frames - 1; i >= 0; i--) {
1347                 int type = s->input_picture[i]->f.pict_type;
1348                 if (type && type != AV_PICTURE_TYPE_B)
1349                     b_frames = i;
1350             }
1351             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1352                 b_frames == s->max_b_frames) {
1353                 av_log(s->avctx, AV_LOG_ERROR,
1354                        "warning, too many b frames in a row\n");
1355             }
1356
1357             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1358                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1359                     s->gop_size > s->picture_in_gop_number) {
1360                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1361                 } else {
1362                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1363                         b_frames = 0;
1364                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1365                 }
1366             }
1367
1368             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1369                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1370                 b_frames--;
1371
1372             s->reordered_input_picture[0] = s->input_picture[b_frames];
1373             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1374                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1375             s->reordered_input_picture[0]->f.coded_picture_number =
1376                 s->coded_picture_number++;
1377             for (i = 0; i < b_frames; i++) {
1378                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1379                 s->reordered_input_picture[i + 1]->f.pict_type =
1380                     AV_PICTURE_TYPE_B;
1381                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1382                     s->coded_picture_number++;
1383             }
1384         }
1385     }
1386 no_output_pic:
1387     if (s->reordered_input_picture[0]) {
1388         s->reordered_input_picture[0]->reference =
1389            s->reordered_input_picture[0]->f.pict_type !=
1390                AV_PICTURE_TYPE_B ? 3 : 0;
1391
1392         ff_mpeg_unref_picture(s, &s->new_picture);
1393         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1394             return ret;
1395
1396         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1397             // input is a shared pix, so we can't modifiy it -> alloc a new
1398             // one & ensure that the shared one is reuseable
1399
1400             Picture *pic;
1401             int i = ff_find_unused_picture(s, 0);
1402             if (i < 0)
1403                 return i;
1404             pic = &s->picture[i];
1405
1406             pic->reference = s->reordered_input_picture[0]->reference;
1407             if (ff_alloc_picture(s, pic, 0) < 0) {
1408                 return -1;
1409             }
1410
1411             copy_picture_attributes(s, &pic->f,
1412                                     &s->reordered_input_picture[0]->f);
1413
1414             /* mark us unused / free shared pic */
1415             av_frame_unref(&s->reordered_input_picture[0]->f);
1416             s->reordered_input_picture[0]->shared = 0;
1417
1418             s->current_picture_ptr = pic;
1419         } else {
1420             // input is not a shared pix -> reuse buffer for current_pix
1421             s->current_picture_ptr = s->reordered_input_picture[0];
1422             for (i = 0; i < 4; i++) {
1423                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1424             }
1425         }
1426         ff_mpeg_unref_picture(s, &s->current_picture);
1427         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1428                                        s->current_picture_ptr)) < 0)
1429             return ret;
1430
1431         s->picture_number = s->new_picture.f.display_picture_number;
1432     } else {
1433         ff_mpeg_unref_picture(s, &s->new_picture);
1434     }
1435     return 0;
1436 }
1437
1438 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1439                           AVFrame *pic_arg, int *got_packet)
1440 {
1441     MpegEncContext *s = avctx->priv_data;
1442     int i, stuffing_count, ret;
1443     int context_count = s->slice_context_count;
1444
1445     s->picture_in_gop_number++;
1446
1447     if (load_input_picture(s, pic_arg) < 0)
1448         return -1;
1449
1450     if (select_input_picture(s) < 0) {
1451         return -1;
1452     }
1453
1454     /* output? */
1455     if (s->new_picture.f.data[0]) {
1456         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1457             return ret;
1458         if (s->mb_info) {
1459             s->mb_info_ptr = av_packet_new_side_data(pkt,
1460                                  AV_PKT_DATA_H263_MB_INFO,
1461                                  s->mb_width*s->mb_height*12);
1462             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1463         }
1464
1465         for (i = 0; i < context_count; i++) {
1466             int start_y = s->thread_context[i]->start_mb_y;
1467             int   end_y = s->thread_context[i]->  end_mb_y;
1468             int h       = s->mb_height;
1469             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1470             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1471
1472             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1473         }
1474
1475         s->pict_type = s->new_picture.f.pict_type;
1476         //emms_c();
1477         if (ff_MPV_frame_start(s, avctx) < 0)
1478             return -1;
1479 vbv_retry:
1480         if (encode_picture(s, s->picture_number) < 0)
1481             return -1;
1482
1483         avctx->header_bits = s->header_bits;
1484         avctx->mv_bits     = s->mv_bits;
1485         avctx->misc_bits   = s->misc_bits;
1486         avctx->i_tex_bits  = s->i_tex_bits;
1487         avctx->p_tex_bits  = s->p_tex_bits;
1488         avctx->i_count     = s->i_count;
1489         // FIXME f/b_count in avctx
1490         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1491         avctx->skip_count  = s->skip_count;
1492
1493         ff_MPV_frame_end(s);
1494
1495         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1496             ff_mjpeg_encode_picture_trailer(s);
1497
1498         if (avctx->rc_buffer_size) {
1499             RateControlContext *rcc = &s->rc_context;
1500             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1501
1502             if (put_bits_count(&s->pb) > max_size &&
1503                 s->lambda < s->avctx->lmax) {
1504                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1505                                        (s->qscale + 1) / s->qscale);
1506                 if (s->adaptive_quant) {
1507                     int i;
1508                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1509                         s->lambda_table[i] =
1510                             FFMAX(s->lambda_table[i] + 1,
1511                                   s->lambda_table[i] * (s->qscale + 1) /
1512                                   s->qscale);
1513                 }
1514                 s->mb_skipped = 0;        // done in MPV_frame_start()
1515                 // done in encode_picture() so we must undo it
1516                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1517                     if (s->flipflop_rounding          ||
1518                         s->codec_id == AV_CODEC_ID_H263P ||
1519                         s->codec_id == AV_CODEC_ID_MPEG4)
1520                         s->no_rounding ^= 1;
1521                 }
1522                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1523                     s->time_base       = s->last_time_base;
1524                     s->last_non_b_time = s->time - s->pp_time;
1525                 }
1526                 for (i = 0; i < context_count; i++) {
1527                     PutBitContext *pb = &s->thread_context[i]->pb;
1528                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1529                 }
1530                 goto vbv_retry;
1531             }
1532
1533             assert(s->avctx->rc_max_rate);
1534         }
1535
1536         if (s->flags & CODEC_FLAG_PASS1)
1537             ff_write_pass1_stats(s);
1538
1539         for (i = 0; i < 4; i++) {
1540             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1541             avctx->error[i] += s->current_picture_ptr->f.error[i];
1542         }
1543
1544         if (s->flags & CODEC_FLAG_PASS1)
1545             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1546                    avctx->i_tex_bits + avctx->p_tex_bits ==
1547                        put_bits_count(&s->pb));
1548         flush_put_bits(&s->pb);
1549         s->frame_bits  = put_bits_count(&s->pb);
1550
1551         stuffing_count = ff_vbv_update(s, s->frame_bits);
1552         s->stuffing_bits = 8*stuffing_count;
1553         if (stuffing_count) {
1554             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1555                     stuffing_count + 50) {
1556                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1557                 return -1;
1558             }
1559
1560             switch (s->codec_id) {
1561             case AV_CODEC_ID_MPEG1VIDEO:
1562             case AV_CODEC_ID_MPEG2VIDEO:
1563                 while (stuffing_count--) {
1564                     put_bits(&s->pb, 8, 0);
1565                 }
1566             break;
1567             case AV_CODEC_ID_MPEG4:
1568                 put_bits(&s->pb, 16, 0);
1569                 put_bits(&s->pb, 16, 0x1C3);
1570                 stuffing_count -= 4;
1571                 while (stuffing_count--) {
1572                     put_bits(&s->pb, 8, 0xFF);
1573                 }
1574             break;
1575             default:
1576                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1577             }
1578             flush_put_bits(&s->pb);
1579             s->frame_bits  = put_bits_count(&s->pb);
1580         }
1581
1582         /* update mpeg1/2 vbv_delay for CBR */
1583         if (s->avctx->rc_max_rate                          &&
1584             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1585             s->out_format == FMT_MPEG1                     &&
1586             90000LL * (avctx->rc_buffer_size - 1) <=
1587                 s->avctx->rc_max_rate * 0xFFFFLL) {
1588             int vbv_delay, min_delay;
1589             double inbits  = s->avctx->rc_max_rate *
1590                              av_q2d(s->avctx->time_base);
1591             int    minbits = s->frame_bits - 8 *
1592                              (s->vbv_delay_ptr - s->pb.buf - 1);
1593             double bits    = s->rc_context.buffer_index + minbits - inbits;
1594
1595             if (bits < 0)
1596                 av_log(s->avctx, AV_LOG_ERROR,
1597                        "Internal error, negative bits\n");
1598
1599             assert(s->repeat_first_field == 0);
1600
1601             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1602             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1603                         s->avctx->rc_max_rate;
1604
1605             vbv_delay = FFMAX(vbv_delay, min_delay);
1606
1607             av_assert0(vbv_delay < 0xFFFF);
1608
1609             s->vbv_delay_ptr[0] &= 0xF8;
1610             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1611             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1612             s->vbv_delay_ptr[2] &= 0x07;
1613             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1614             avctx->vbv_delay     = vbv_delay * 300;
1615         }
1616         s->total_bits     += s->frame_bits;
1617         avctx->frame_bits  = s->frame_bits;
1618
1619         pkt->pts = s->current_picture.f.pts;
1620         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1621             if (!s->current_picture.f.coded_picture_number)
1622                 pkt->dts = pkt->pts - s->dts_delta;
1623             else
1624                 pkt->dts = s->reordered_pts;
1625             s->reordered_pts = pkt->pts;
1626         } else
1627             pkt->dts = pkt->pts;
1628         if (s->current_picture.f.key_frame)
1629             pkt->flags |= AV_PKT_FLAG_KEY;
1630         if (s->mb_info)
1631             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1632     } else {
1633         s->frame_bits = 0;
1634     }
1635     assert((s->frame_bits & 7) == 0);
1636
1637     pkt->size = s->frame_bits / 8;
1638     *got_packet = !!pkt->size;
1639     return 0;
1640 }
1641
1642 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1643                                                 int n, int threshold)
1644 {
1645     static const char tab[64] = {
1646         3, 2, 2, 1, 1, 1, 1, 1,
1647         1, 1, 1, 1, 1, 1, 1, 1,
1648         1, 1, 1, 1, 1, 1, 1, 1,
1649         0, 0, 0, 0, 0, 0, 0, 0,
1650         0, 0, 0, 0, 0, 0, 0, 0,
1651         0, 0, 0, 0, 0, 0, 0, 0,
1652         0, 0, 0, 0, 0, 0, 0, 0,
1653         0, 0, 0, 0, 0, 0, 0, 0
1654     };
1655     int score = 0;
1656     int run = 0;
1657     int i;
1658     int16_t *block = s->block[n];
1659     const int last_index = s->block_last_index[n];
1660     int skip_dc;
1661
1662     if (threshold < 0) {
1663         skip_dc = 0;
1664         threshold = -threshold;
1665     } else
1666         skip_dc = 1;
1667
1668     /* Are all we could set to zero already zero? */
1669     if (last_index <= skip_dc - 1)
1670         return;
1671
1672     for (i = 0; i <= last_index; i++) {
1673         const int j = s->intra_scantable.permutated[i];
1674         const int level = FFABS(block[j]);
1675         if (level == 1) {
1676             if (skip_dc && i == 0)
1677                 continue;
1678             score += tab[run];
1679             run = 0;
1680         } else if (level > 1) {
1681             return;
1682         } else {
1683             run++;
1684         }
1685     }
1686     if (score >= threshold)
1687         return;
1688     for (i = skip_dc; i <= last_index; i++) {
1689         const int j = s->intra_scantable.permutated[i];
1690         block[j] = 0;
1691     }
1692     if (block[0])
1693         s->block_last_index[n] = 0;
1694     else
1695         s->block_last_index[n] = -1;
1696 }
1697
1698 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1699                                int last_index)
1700 {
1701     int i;
1702     const int maxlevel = s->max_qcoeff;
1703     const int minlevel = s->min_qcoeff;
1704     int overflow = 0;
1705
1706     if (s->mb_intra) {
1707         i = 1; // skip clipping of intra dc
1708     } else
1709         i = 0;
1710
1711     for (; i <= last_index; i++) {
1712         const int j = s->intra_scantable.permutated[i];
1713         int level = block[j];
1714
1715         if (level > maxlevel) {
1716             level = maxlevel;
1717             overflow++;
1718         } else if (level < minlevel) {
1719             level = minlevel;
1720             overflow++;
1721         }
1722
1723         block[j] = level;
1724     }
1725
1726     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1727         av_log(s->avctx, AV_LOG_INFO,
1728                "warning, clipping %d dct coefficients to %d..%d\n",
1729                overflow, minlevel, maxlevel);
1730 }
1731
1732 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1733 {
1734     int x, y;
1735     // FIXME optimize
1736     for (y = 0; y < 8; y++) {
1737         for (x = 0; x < 8; x++) {
1738             int x2, y2;
1739             int sum = 0;
1740             int sqr = 0;
1741             int count = 0;
1742
1743             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1744                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1745                     int v = ptr[x2 + y2 * stride];
1746                     sum += v;
1747                     sqr += v * v;
1748                     count++;
1749                 }
1750             }
1751             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1752         }
1753     }
1754 }
1755
1756 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1757                                                 int motion_x, int motion_y,
1758                                                 int mb_block_height,
1759                                                 int mb_block_width,
1760                                                 int mb_block_count)
1761 {
1762     int16_t weight[12][64];
1763     int16_t orig[12][64];
1764     const int mb_x = s->mb_x;
1765     const int mb_y = s->mb_y;
1766     int i;
1767     int skip_dct[12];
1768     int dct_offset = s->linesize * 8; // default for progressive frames
1769     int uv_dct_offset = s->uvlinesize * 8;
1770     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1771     int wrap_y, wrap_c;
1772
1773     for (i = 0; i < mb_block_count; i++)
1774         skip_dct[i] = s->skipdct;
1775
1776     if (s->adaptive_quant) {
1777         const int last_qp = s->qscale;
1778         const int mb_xy = mb_x + mb_y * s->mb_stride;
1779
1780         s->lambda = s->lambda_table[mb_xy];
1781         update_qscale(s);
1782
1783         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1784             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1785             s->dquant = s->qscale - last_qp;
1786
1787             if (s->out_format == FMT_H263) {
1788                 s->dquant = av_clip(s->dquant, -2, 2);
1789
1790                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1791                     if (!s->mb_intra) {
1792                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1793                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1794                                 s->dquant = 0;
1795                         }
1796                         if (s->mv_type == MV_TYPE_8X8)
1797                             s->dquant = 0;
1798                     }
1799                 }
1800             }
1801         }
1802         ff_set_qscale(s, last_qp + s->dquant);
1803     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1804         ff_set_qscale(s, s->qscale + s->dquant);
1805
1806     wrap_y = s->linesize;
1807     wrap_c = s->uvlinesize;
1808     ptr_y  = s->new_picture.f.data[0] +
1809              (mb_y * 16 * wrap_y)              + mb_x * 16;
1810     ptr_cb = s->new_picture.f.data[1] +
1811              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1812     ptr_cr = s->new_picture.f.data[2] +
1813              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1814
1815     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1816         uint8_t *ebuf = s->edge_emu_buffer + 32;
1817         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
1818         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
1819         s->vdsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1820                                  mb_y * 16, s->width, s->height);
1821         ptr_y = ebuf;
1822         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, mb_block_width,
1823                                  mb_block_height, mb_x * mb_block_width, mb_y * mb_block_height,
1824                                  cw, ch);
1825         ptr_cb = ebuf + 18 * wrap_y;
1826         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr, wrap_c, mb_block_width,
1827                                  mb_block_height, mb_x * mb_block_width, mb_y * mb_block_height,
1828                                  cw, ch);
1829         ptr_cr = ebuf + 18 * wrap_y + 16;
1830     }
1831
1832     if (s->mb_intra) {
1833         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1834             int progressive_score, interlaced_score;
1835
1836             s->interlaced_dct = 0;
1837             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1838                                                     NULL, wrap_y, 8) +
1839                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1840                                                     NULL, wrap_y, 8) - 400;
1841
1842             if (progressive_score > 0) {
1843                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1844                                                        NULL, wrap_y * 2, 8) +
1845                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1846                                                        NULL, wrap_y * 2, 8);
1847                 if (progressive_score > interlaced_score) {
1848                     s->interlaced_dct = 1;
1849
1850                     dct_offset = wrap_y;
1851                     uv_dct_offset = wrap_c;
1852                     wrap_y <<= 1;
1853                     if (s->chroma_format == CHROMA_422 ||
1854                         s->chroma_format == CHROMA_444)
1855                         wrap_c <<= 1;
1856                 }
1857             }
1858         }
1859
1860         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1861         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1862         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1863         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1864
1865         if (s->flags & CODEC_FLAG_GRAY) {
1866             skip_dct[4] = 1;
1867             skip_dct[5] = 1;
1868         } else {
1869             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1870             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1871             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
1872                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
1873                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
1874             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
1875                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
1876                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
1877                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
1878                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
1879                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
1880                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
1881             }
1882         }
1883     } else {
1884         op_pixels_func (*op_pix)[4];
1885         qpel_mc_func (*op_qpix)[16];
1886         uint8_t *dest_y, *dest_cb, *dest_cr;
1887
1888         dest_y  = s->dest[0];
1889         dest_cb = s->dest[1];
1890         dest_cr = s->dest[2];
1891
1892         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1893             op_pix  = s->hdsp.put_pixels_tab;
1894             op_qpix = s->dsp.put_qpel_pixels_tab;
1895         } else {
1896             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1897             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1898         }
1899
1900         if (s->mv_dir & MV_DIR_FORWARD) {
1901             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1902                           s->last_picture.f.data,
1903                           op_pix, op_qpix);
1904             op_pix  = s->hdsp.avg_pixels_tab;
1905             op_qpix = s->dsp.avg_qpel_pixels_tab;
1906         }
1907         if (s->mv_dir & MV_DIR_BACKWARD) {
1908             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1909                           s->next_picture.f.data,
1910                           op_pix, op_qpix);
1911         }
1912
1913         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1914             int progressive_score, interlaced_score;
1915
1916             s->interlaced_dct = 0;
1917             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1918                                                     ptr_y,              wrap_y,
1919                                                     8) +
1920                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1921                                                     ptr_y + wrap_y * 8, wrap_y,
1922                                                     8) - 400;
1923
1924             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1925                 progressive_score -= 400;
1926
1927             if (progressive_score > 0) {
1928                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1929                                                        ptr_y,
1930                                                        wrap_y * 2, 8) +
1931                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1932                                                        ptr_y + wrap_y,
1933                                                        wrap_y * 2, 8);
1934
1935                 if (progressive_score > interlaced_score) {
1936                     s->interlaced_dct = 1;
1937
1938                     dct_offset = wrap_y;
1939                     uv_dct_offset = wrap_c;
1940                     wrap_y <<= 1;
1941                     if (s->chroma_format == CHROMA_422)
1942                         wrap_c <<= 1;
1943                 }
1944             }
1945         }
1946
1947         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1948         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1949         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1950                            dest_y + dct_offset, wrap_y);
1951         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1952                            dest_y + dct_offset + 8, wrap_y);
1953
1954         if (s->flags & CODEC_FLAG_GRAY) {
1955             skip_dct[4] = 1;
1956             skip_dct[5] = 1;
1957         } else {
1958             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1959             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1960             if (!s->chroma_y_shift) { /* 422 */
1961                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
1962                                    dest_cb + uv_dct_offset, wrap_c);
1963                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
1964                                    dest_cr + uv_dct_offset, wrap_c);
1965             }
1966         }
1967         /* pre quantization */
1968         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1969                 2 * s->qscale * s->qscale) {
1970             // FIXME optimize
1971             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1972                               wrap_y, 8) < 20 * s->qscale)
1973                 skip_dct[0] = 1;
1974             if (s->dsp.sad[1](NULL, ptr_y + 8,
1975                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1976                 skip_dct[1] = 1;
1977             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1978                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1979                 skip_dct[2] = 1;
1980             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1981                               dest_y + dct_offset + 8,
1982                               wrap_y, 8) < 20 * s->qscale)
1983                 skip_dct[3] = 1;
1984             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1985                               wrap_c, 8) < 20 * s->qscale)
1986                 skip_dct[4] = 1;
1987             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1988                               wrap_c, 8) < 20 * s->qscale)
1989                 skip_dct[5] = 1;
1990             if (!s->chroma_y_shift) { /* 422 */
1991                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
1992                                   dest_cb + uv_dct_offset,
1993                                   wrap_c, 8) < 20 * s->qscale)
1994                     skip_dct[6] = 1;
1995                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
1996                                   dest_cr + uv_dct_offset,
1997                                   wrap_c, 8) < 20 * s->qscale)
1998                     skip_dct[7] = 1;
1999             }
2000         }
2001     }
2002
2003     if (s->quantizer_noise_shaping) {
2004         if (!skip_dct[0])
2005             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2006         if (!skip_dct[1])
2007             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2008         if (!skip_dct[2])
2009             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2010         if (!skip_dct[3])
2011             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2012         if (!skip_dct[4])
2013             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2014         if (!skip_dct[5])
2015             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2016         if (!s->chroma_y_shift) { /* 422 */
2017             if (!skip_dct[6])
2018                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2019                                   wrap_c);
2020             if (!skip_dct[7])
2021                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2022                                   wrap_c);
2023         }
2024         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2025     }
2026
2027     /* DCT & quantize */
2028     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2029     {
2030         for (i = 0; i < mb_block_count; i++) {
2031             if (!skip_dct[i]) {
2032                 int overflow;
2033                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2034                 // FIXME we could decide to change to quantizer instead of
2035                 // clipping
2036                 // JS: I don't think that would be a good idea it could lower
2037                 //     quality instead of improve it. Just INTRADC clipping
2038                 //     deserves changes in quantizer
2039                 if (overflow)
2040                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2041             } else
2042                 s->block_last_index[i] = -1;
2043         }
2044         if (s->quantizer_noise_shaping) {
2045             for (i = 0; i < mb_block_count; i++) {
2046                 if (!skip_dct[i]) {
2047                     s->block_last_index[i] =
2048                         dct_quantize_refine(s, s->block[i], weight[i],
2049                                             orig[i], i, s->qscale);
2050                 }
2051             }
2052         }
2053
2054         if (s->luma_elim_threshold && !s->mb_intra)
2055             for (i = 0; i < 4; i++)
2056                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2057         if (s->chroma_elim_threshold && !s->mb_intra)
2058             for (i = 4; i < mb_block_count; i++)
2059                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2060
2061         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2062             for (i = 0; i < mb_block_count; i++) {
2063                 if (s->block_last_index[i] == -1)
2064                     s->coded_score[i] = INT_MAX / 256;
2065             }
2066         }
2067     }
2068
2069     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2070         s->block_last_index[4] =
2071         s->block_last_index[5] = 0;
2072         s->block[4][0] =
2073         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2074         if (!s->chroma_y_shift) { /* 422 / 444 */
2075             for (i=6; i<12; i++) {
2076                 s->block_last_index[i] = 0;
2077                 s->block[i][0] = s->block[4][0];
2078             }
2079         }
2080     }
2081
2082     // non c quantize code returns incorrect block_last_index FIXME
2083     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2084         for (i = 0; i < mb_block_count; i++) {
2085             int j;
2086             if (s->block_last_index[i] > 0) {
2087                 for (j = 63; j > 0; j--) {
2088                     if (s->block[i][s->intra_scantable.permutated[j]])
2089                         break;
2090                 }
2091                 s->block_last_index[i] = j;
2092             }
2093         }
2094     }
2095
2096     /* huffman encode */
2097     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2098     case AV_CODEC_ID_MPEG1VIDEO:
2099     case AV_CODEC_ID_MPEG2VIDEO:
2100         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2101             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2102         break;
2103     case AV_CODEC_ID_MPEG4:
2104         if (CONFIG_MPEG4_ENCODER)
2105             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2106         break;
2107     case AV_CODEC_ID_MSMPEG4V2:
2108     case AV_CODEC_ID_MSMPEG4V3:
2109     case AV_CODEC_ID_WMV1:
2110         if (CONFIG_MSMPEG4_ENCODER)
2111             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2112         break;
2113     case AV_CODEC_ID_WMV2:
2114         if (CONFIG_WMV2_ENCODER)
2115             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2116         break;
2117     case AV_CODEC_ID_H261:
2118         if (CONFIG_H261_ENCODER)
2119             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2120         break;
2121     case AV_CODEC_ID_H263:
2122     case AV_CODEC_ID_H263P:
2123     case AV_CODEC_ID_FLV1:
2124     case AV_CODEC_ID_RV10:
2125     case AV_CODEC_ID_RV20:
2126         if (CONFIG_H263_ENCODER)
2127             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2128         break;
2129     case AV_CODEC_ID_MJPEG:
2130     case AV_CODEC_ID_AMV:
2131         if (CONFIG_MJPEG_ENCODER)
2132             ff_mjpeg_encode_mb(s, s->block);
2133         break;
2134     default:
2135         av_assert1(0);
2136     }
2137 }
2138
2139 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2140 {
2141     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2142     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2143     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2144 }
2145
2146 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2147     int i;
2148
2149     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2150
2151     /* mpeg1 */
2152     d->mb_skip_run= s->mb_skip_run;
2153     for(i=0; i<3; i++)
2154         d->last_dc[i] = s->last_dc[i];
2155
2156     /* statistics */
2157     d->mv_bits= s->mv_bits;
2158     d->i_tex_bits= s->i_tex_bits;
2159     d->p_tex_bits= s->p_tex_bits;
2160     d->i_count= s->i_count;
2161     d->f_count= s->f_count;
2162     d->b_count= s->b_count;
2163     d->skip_count= s->skip_count;
2164     d->misc_bits= s->misc_bits;
2165     d->last_bits= 0;
2166
2167     d->mb_skipped= 0;
2168     d->qscale= s->qscale;
2169     d->dquant= s->dquant;
2170
2171     d->esc3_level_length= s->esc3_level_length;
2172 }
2173
2174 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2175     int i;
2176
2177     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2178     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2179
2180     /* mpeg1 */
2181     d->mb_skip_run= s->mb_skip_run;
2182     for(i=0; i<3; i++)
2183         d->last_dc[i] = s->last_dc[i];
2184
2185     /* statistics */
2186     d->mv_bits= s->mv_bits;
2187     d->i_tex_bits= s->i_tex_bits;
2188     d->p_tex_bits= s->p_tex_bits;
2189     d->i_count= s->i_count;
2190     d->f_count= s->f_count;
2191     d->b_count= s->b_count;
2192     d->skip_count= s->skip_count;
2193     d->misc_bits= s->misc_bits;
2194
2195     d->mb_intra= s->mb_intra;
2196     d->mb_skipped= s->mb_skipped;
2197     d->mv_type= s->mv_type;
2198     d->mv_dir= s->mv_dir;
2199     d->pb= s->pb;
2200     if(s->data_partitioning){
2201         d->pb2= s->pb2;
2202         d->tex_pb= s->tex_pb;
2203     }
2204     d->block= s->block;
2205     for(i=0; i<8; i++)
2206         d->block_last_index[i]= s->block_last_index[i];
2207     d->interlaced_dct= s->interlaced_dct;
2208     d->qscale= s->qscale;
2209
2210     d->esc3_level_length= s->esc3_level_length;
2211 }
2212
2213 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2214                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2215                            int *dmin, int *next_block, int motion_x, int motion_y)
2216 {
2217     int score;
2218     uint8_t *dest_backup[3];
2219
2220     copy_context_before_encode(s, backup, type);
2221
2222     s->block= s->blocks[*next_block];
2223     s->pb= pb[*next_block];
2224     if(s->data_partitioning){
2225         s->pb2   = pb2   [*next_block];
2226         s->tex_pb= tex_pb[*next_block];
2227     }
2228
2229     if(*next_block){
2230         memcpy(dest_backup, s->dest, sizeof(s->dest));
2231         s->dest[0] = s->rd_scratchpad;
2232         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2233         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2234         assert(s->linesize >= 32); //FIXME
2235     }
2236
2237     encode_mb(s, motion_x, motion_y);
2238
2239     score= put_bits_count(&s->pb);
2240     if(s->data_partitioning){
2241         score+= put_bits_count(&s->pb2);
2242         score+= put_bits_count(&s->tex_pb);
2243     }
2244
2245     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2246         ff_MPV_decode_mb(s, s->block);
2247
2248         score *= s->lambda2;
2249         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2250     }
2251
2252     if(*next_block){
2253         memcpy(s->dest, dest_backup, sizeof(s->dest));
2254     }
2255
2256     if(score<*dmin){
2257         *dmin= score;
2258         *next_block^=1;
2259
2260         copy_context_after_encode(best, s, type);
2261     }
2262 }
2263
2264 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2265     uint32_t *sq = ff_squareTbl + 256;
2266     int acc=0;
2267     int x,y;
2268
2269     if(w==16 && h==16)
2270         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2271     else if(w==8 && h==8)
2272         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2273
2274     for(y=0; y<h; y++){
2275         for(x=0; x<w; x++){
2276             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2277         }
2278     }
2279
2280     av_assert2(acc>=0);
2281
2282     return acc;
2283 }
2284
2285 static int sse_mb(MpegEncContext *s){
2286     int w= 16;
2287     int h= 16;
2288
2289     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2290     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2291
2292     if(w==16 && h==16)
2293       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2294         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2295                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2296                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2297       }else{
2298         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2299                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2300                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2301       }
2302     else
2303         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2304                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2305                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2306 }
2307
2308 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2309     MpegEncContext *s= *(void**)arg;
2310
2311
2312     s->me.pre_pass=1;
2313     s->me.dia_size= s->avctx->pre_dia_size;
2314     s->first_slice_line=1;
2315     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2316         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2317             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2318         }
2319         s->first_slice_line=0;
2320     }
2321
2322     s->me.pre_pass=0;
2323
2324     return 0;
2325 }
2326
2327 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2328     MpegEncContext *s= *(void**)arg;
2329
2330     ff_check_alignment();
2331
2332     s->me.dia_size= s->avctx->dia_size;
2333     s->first_slice_line=1;
2334     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2335         s->mb_x=0; //for block init below
2336         ff_init_block_index(s);
2337         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2338             s->block_index[0]+=2;
2339             s->block_index[1]+=2;
2340             s->block_index[2]+=2;
2341             s->block_index[3]+=2;
2342
2343             /* compute motion vector & mb_type and store in context */
2344             if(s->pict_type==AV_PICTURE_TYPE_B)
2345                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2346             else
2347                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2348         }
2349         s->first_slice_line=0;
2350     }
2351     return 0;
2352 }
2353
2354 static int mb_var_thread(AVCodecContext *c, void *arg){
2355     MpegEncContext *s= *(void**)arg;
2356     int mb_x, mb_y;
2357
2358     ff_check_alignment();
2359
2360     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2361         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2362             int xx = mb_x * 16;
2363             int yy = mb_y * 16;
2364             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2365             int varc;
2366             int sum = s->dsp.pix_sum(pix, s->linesize);
2367
2368             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2369
2370             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2371             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2372             s->me.mb_var_sum_temp    += varc;
2373         }
2374     }
2375     return 0;
2376 }
2377
2378 static void write_slice_end(MpegEncContext *s){
2379     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2380         if(s->partitioned_frame){
2381             ff_mpeg4_merge_partitions(s);
2382         }
2383
2384         ff_mpeg4_stuffing(&s->pb);
2385     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2386         ff_mjpeg_encode_stuffing(s);
2387     }
2388
2389     avpriv_align_put_bits(&s->pb);
2390     flush_put_bits(&s->pb);
2391
2392     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2393         s->misc_bits+= get_bits_diff(s);
2394 }
2395
2396 static void write_mb_info(MpegEncContext *s)
2397 {
2398     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2399     int offset = put_bits_count(&s->pb);
2400     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2401     int gobn = s->mb_y / s->gob_index;
2402     int pred_x, pred_y;
2403     if (CONFIG_H263_ENCODER)
2404         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2405     bytestream_put_le32(&ptr, offset);
2406     bytestream_put_byte(&ptr, s->qscale);
2407     bytestream_put_byte(&ptr, gobn);
2408     bytestream_put_le16(&ptr, mba);
2409     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2410     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2411     /* 4MV not implemented */
2412     bytestream_put_byte(&ptr, 0); /* hmv2 */
2413     bytestream_put_byte(&ptr, 0); /* vmv2 */
2414 }
2415
2416 static void update_mb_info(MpegEncContext *s, int startcode)
2417 {
2418     if (!s->mb_info)
2419         return;
2420     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2421         s->mb_info_size += 12;
2422         s->prev_mb_info = s->last_mb_info;
2423     }
2424     if (startcode) {
2425         s->prev_mb_info = put_bits_count(&s->pb)/8;
2426         /* This might have incremented mb_info_size above, and we return without
2427          * actually writing any info into that slot yet. But in that case,
2428          * this will be called again at the start of the after writing the
2429          * start code, actually writing the mb info. */
2430         return;
2431     }
2432
2433     s->last_mb_info = put_bits_count(&s->pb)/8;
2434     if (!s->mb_info_size)
2435         s->mb_info_size += 12;
2436     write_mb_info(s);
2437 }
2438
2439 static int encode_thread(AVCodecContext *c, void *arg){
2440     MpegEncContext *s= *(void**)arg;
2441     int mb_x, mb_y, pdif = 0;
2442     int chr_h= 16>>s->chroma_y_shift;
2443     int i, j;
2444     MpegEncContext best_s, backup_s;
2445     uint8_t bit_buf[2][MAX_MB_BYTES];
2446     uint8_t bit_buf2[2][MAX_MB_BYTES];
2447     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2448     PutBitContext pb[2], pb2[2], tex_pb[2];
2449
2450     ff_check_alignment();
2451
2452     for(i=0; i<2; i++){
2453         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2454         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2455         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2456     }
2457
2458     s->last_bits= put_bits_count(&s->pb);
2459     s->mv_bits=0;
2460     s->misc_bits=0;
2461     s->i_tex_bits=0;
2462     s->p_tex_bits=0;
2463     s->i_count=0;
2464     s->f_count=0;
2465     s->b_count=0;
2466     s->skip_count=0;
2467
2468     for(i=0; i<3; i++){
2469         /* init last dc values */
2470         /* note: quant matrix value (8) is implied here */
2471         s->last_dc[i] = 128 << s->intra_dc_precision;
2472
2473         s->current_picture.f.error[i] = 0;
2474     }
2475     if(s->codec_id==AV_CODEC_ID_AMV){
2476         s->last_dc[0] = 128*8/13;
2477         s->last_dc[1] = 128*8/14;
2478         s->last_dc[2] = 128*8/14;
2479     }
2480     s->mb_skip_run = 0;
2481     memset(s->last_mv, 0, sizeof(s->last_mv));
2482
2483     s->last_mv_dir = 0;
2484
2485     switch(s->codec_id){
2486     case AV_CODEC_ID_H263:
2487     case AV_CODEC_ID_H263P:
2488     case AV_CODEC_ID_FLV1:
2489         if (CONFIG_H263_ENCODER)
2490             s->gob_index = ff_h263_get_gob_height(s);
2491         break;
2492     case AV_CODEC_ID_MPEG4:
2493         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2494             ff_mpeg4_init_partitions(s);
2495         break;
2496     }
2497
2498     s->resync_mb_x=0;
2499     s->resync_mb_y=0;
2500     s->first_slice_line = 1;
2501     s->ptr_lastgob = s->pb.buf;
2502     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2503         s->mb_x=0;
2504         s->mb_y= mb_y;
2505
2506         ff_set_qscale(s, s->qscale);
2507         ff_init_block_index(s);
2508
2509         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2510             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2511             int mb_type= s->mb_type[xy];
2512 //            int d;
2513             int dmin= INT_MAX;
2514             int dir;
2515
2516             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2517                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2518                 return -1;
2519             }
2520             if(s->data_partitioning){
2521                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2522                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2523                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2524                     return -1;
2525                 }
2526             }
2527
2528             s->mb_x = mb_x;
2529             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2530             ff_update_block_index(s);
2531
2532             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2533                 ff_h261_reorder_mb_index(s);
2534                 xy= s->mb_y*s->mb_stride + s->mb_x;
2535                 mb_type= s->mb_type[xy];
2536             }
2537
2538             /* write gob / video packet header  */
2539             if(s->rtp_mode){
2540                 int current_packet_size, is_gob_start;
2541
2542                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2543
2544                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2545
2546                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2547
2548                 switch(s->codec_id){
2549                 case AV_CODEC_ID_H263:
2550                 case AV_CODEC_ID_H263P:
2551                     if(!s->h263_slice_structured)
2552                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2553                     break;
2554                 case AV_CODEC_ID_MPEG2VIDEO:
2555                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2556                 case AV_CODEC_ID_MPEG1VIDEO:
2557                     if(s->mb_skip_run) is_gob_start=0;
2558                     break;
2559                 case AV_CODEC_ID_MJPEG:
2560                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2561                     break;
2562                 }
2563
2564                 if(is_gob_start){
2565                     if(s->start_mb_y != mb_y || mb_x!=0){
2566                         write_slice_end(s);
2567                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2568                             ff_mpeg4_init_partitions(s);
2569                         }
2570                     }
2571
2572                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2573                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2574
2575                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2576                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2577                         int d= 100 / s->avctx->error_rate;
2578                         if(r % d == 0){
2579                             current_packet_size=0;
2580                             s->pb.buf_ptr= s->ptr_lastgob;
2581                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2582                         }
2583                     }
2584
2585                     if (s->avctx->rtp_callback){
2586                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2587                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2588                     }
2589                     update_mb_info(s, 1);
2590
2591                     switch(s->codec_id){
2592                     case AV_CODEC_ID_MPEG4:
2593                         if (CONFIG_MPEG4_ENCODER) {
2594                             ff_mpeg4_encode_video_packet_header(s);
2595                             ff_mpeg4_clean_buffers(s);
2596                         }
2597                     break;
2598                     case AV_CODEC_ID_MPEG1VIDEO:
2599                     case AV_CODEC_ID_MPEG2VIDEO:
2600                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2601                             ff_mpeg1_encode_slice_header(s);
2602                             ff_mpeg1_clean_buffers(s);
2603                         }
2604                     break;
2605                     case AV_CODEC_ID_H263:
2606                     case AV_CODEC_ID_H263P:
2607                         if (CONFIG_H263_ENCODER)
2608                             ff_h263_encode_gob_header(s, mb_y);
2609                     break;
2610                     }
2611
2612                     if(s->flags&CODEC_FLAG_PASS1){
2613                         int bits= put_bits_count(&s->pb);
2614                         s->misc_bits+= bits - s->last_bits;
2615                         s->last_bits= bits;
2616                     }
2617
2618                     s->ptr_lastgob += current_packet_size;
2619                     s->first_slice_line=1;
2620                     s->resync_mb_x=mb_x;
2621                     s->resync_mb_y=mb_y;
2622                 }
2623             }
2624
2625             if(  (s->resync_mb_x   == s->mb_x)
2626                && s->resync_mb_y+1 == s->mb_y){
2627                 s->first_slice_line=0;
2628             }
2629
2630             s->mb_skipped=0;
2631             s->dquant=0; //only for QP_RD
2632
2633             update_mb_info(s, 0);
2634
2635             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2636                 int next_block=0;
2637                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2638
2639                 copy_context_before_encode(&backup_s, s, -1);
2640                 backup_s.pb= s->pb;
2641                 best_s.data_partitioning= s->data_partitioning;
2642                 best_s.partitioned_frame= s->partitioned_frame;
2643                 if(s->data_partitioning){
2644                     backup_s.pb2= s->pb2;
2645                     backup_s.tex_pb= s->tex_pb;
2646                 }
2647
2648                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2649                     s->mv_dir = MV_DIR_FORWARD;
2650                     s->mv_type = MV_TYPE_16X16;
2651                     s->mb_intra= 0;
2652                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2653                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2654                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2655                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2656                 }
2657                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2658                     s->mv_dir = MV_DIR_FORWARD;
2659                     s->mv_type = MV_TYPE_FIELD;
2660                     s->mb_intra= 0;
2661                     for(i=0; i<2; i++){
2662                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2663                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2664                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2665                     }
2666                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2667                                  &dmin, &next_block, 0, 0);
2668                 }
2669                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2670                     s->mv_dir = MV_DIR_FORWARD;
2671                     s->mv_type = MV_TYPE_16X16;
2672                     s->mb_intra= 0;
2673                     s->mv[0][0][0] = 0;
2674                     s->mv[0][0][1] = 0;
2675                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2676                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2677                 }
2678                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2679                     s->mv_dir = MV_DIR_FORWARD;
2680                     s->mv_type = MV_TYPE_8X8;
2681                     s->mb_intra= 0;
2682                     for(i=0; i<4; i++){
2683                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2684                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2685                     }
2686                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2687                                  &dmin, &next_block, 0, 0);
2688                 }
2689                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2690                     s->mv_dir = MV_DIR_FORWARD;
2691                     s->mv_type = MV_TYPE_16X16;
2692                     s->mb_intra= 0;
2693                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2694                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2695                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2696                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2697                 }
2698                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2699                     s->mv_dir = MV_DIR_BACKWARD;
2700                     s->mv_type = MV_TYPE_16X16;
2701                     s->mb_intra= 0;
2702                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2703                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2704                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2705                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2706                 }
2707                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2708                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2709                     s->mv_type = MV_TYPE_16X16;
2710                     s->mb_intra= 0;
2711                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2712                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2713                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2714                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2715                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2716                                  &dmin, &next_block, 0, 0);
2717                 }
2718                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2719                     s->mv_dir = MV_DIR_FORWARD;
2720                     s->mv_type = MV_TYPE_FIELD;
2721                     s->mb_intra= 0;
2722                     for(i=0; i<2; i++){
2723                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2724                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2725                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2726                     }
2727                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2728                                  &dmin, &next_block, 0, 0);
2729                 }
2730                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2731                     s->mv_dir = MV_DIR_BACKWARD;
2732                     s->mv_type = MV_TYPE_FIELD;
2733                     s->mb_intra= 0;
2734                     for(i=0; i<2; i++){
2735                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2736                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2737                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2738                     }
2739                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2740                                  &dmin, &next_block, 0, 0);
2741                 }
2742                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2743                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2744                     s->mv_type = MV_TYPE_FIELD;
2745                     s->mb_intra= 0;
2746                     for(dir=0; dir<2; dir++){
2747                         for(i=0; i<2; i++){
2748                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2749                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2750                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2751                         }
2752                     }
2753                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2754                                  &dmin, &next_block, 0, 0);
2755                 }
2756                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2757                     s->mv_dir = 0;
2758                     s->mv_type = MV_TYPE_16X16;
2759                     s->mb_intra= 1;
2760                     s->mv[0][0][0] = 0;
2761                     s->mv[0][0][1] = 0;
2762                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2763                                  &dmin, &next_block, 0, 0);
2764                     if(s->h263_pred || s->h263_aic){
2765                         if(best_s.mb_intra)
2766                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2767                         else
2768                             ff_clean_intra_table_entries(s); //old mode?
2769                     }
2770                 }
2771
2772                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2773                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2774                         const int last_qp= backup_s.qscale;
2775                         int qpi, qp, dc[6];
2776                         int16_t ac[6][16];
2777                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2778                         static const int dquant_tab[4]={-1,1,-2,2};
2779                         int storecoefs = s->mb_intra && s->dc_val[0];
2780
2781                         av_assert2(backup_s.dquant == 0);
2782
2783                         //FIXME intra
2784                         s->mv_dir= best_s.mv_dir;
2785                         s->mv_type = MV_TYPE_16X16;
2786                         s->mb_intra= best_s.mb_intra;
2787                         s->mv[0][0][0] = best_s.mv[0][0][0];
2788                         s->mv[0][0][1] = best_s.mv[0][0][1];
2789                         s->mv[1][0][0] = best_s.mv[1][0][0];
2790                         s->mv[1][0][1] = best_s.mv[1][0][1];
2791
2792                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2793                         for(; qpi<4; qpi++){
2794                             int dquant= dquant_tab[qpi];
2795                             qp= last_qp + dquant;
2796                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2797                                 continue;
2798                             backup_s.dquant= dquant;
2799                             if(storecoefs){
2800                                 for(i=0; i<6; i++){
2801                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2802                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2803                                 }
2804                             }
2805
2806                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2807                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2808                             if(best_s.qscale != qp){
2809                                 if(storecoefs){
2810                                     for(i=0; i<6; i++){
2811                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2812                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2813                                     }
2814                                 }
2815                             }
2816                         }
2817                     }
2818                 }
2819                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2820                     int mx= s->b_direct_mv_table[xy][0];
2821                     int my= s->b_direct_mv_table[xy][1];
2822
2823                     backup_s.dquant = 0;
2824                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2825                     s->mb_intra= 0;
2826                     ff_mpeg4_set_direct_mv(s, mx, my);
2827                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2828                                  &dmin, &next_block, mx, my);
2829                 }
2830                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2831                     backup_s.dquant = 0;
2832                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2833                     s->mb_intra= 0;
2834                     ff_mpeg4_set_direct_mv(s, 0, 0);
2835                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2836                                  &dmin, &next_block, 0, 0);
2837                 }
2838                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2839                     int coded=0;
2840                     for(i=0; i<6; i++)
2841                         coded |= s->block_last_index[i];
2842                     if(coded){
2843                         int mx,my;
2844                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2845                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2846                             mx=my=0; //FIXME find the one we actually used
2847                             ff_mpeg4_set_direct_mv(s, mx, my);
2848                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2849                             mx= s->mv[1][0][0];
2850                             my= s->mv[1][0][1];
2851                         }else{
2852                             mx= s->mv[0][0][0];
2853                             my= s->mv[0][0][1];
2854                         }
2855
2856                         s->mv_dir= best_s.mv_dir;
2857                         s->mv_type = best_s.mv_type;
2858                         s->mb_intra= 0;
2859 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2860                         s->mv[0][0][1] = best_s.mv[0][0][1];
2861                         s->mv[1][0][0] = best_s.mv[1][0][0];
2862                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2863                         backup_s.dquant= 0;
2864                         s->skipdct=1;
2865                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2866                                         &dmin, &next_block, mx, my);
2867                         s->skipdct=0;
2868                     }
2869                 }
2870
2871                 s->current_picture.qscale_table[xy] = best_s.qscale;
2872
2873                 copy_context_after_encode(s, &best_s, -1);
2874
2875                 pb_bits_count= put_bits_count(&s->pb);
2876                 flush_put_bits(&s->pb);
2877                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2878                 s->pb= backup_s.pb;
2879
2880                 if(s->data_partitioning){
2881                     pb2_bits_count= put_bits_count(&s->pb2);
2882                     flush_put_bits(&s->pb2);
2883                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2884                     s->pb2= backup_s.pb2;
2885
2886                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2887                     flush_put_bits(&s->tex_pb);
2888                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2889                     s->tex_pb= backup_s.tex_pb;
2890                 }
2891                 s->last_bits= put_bits_count(&s->pb);
2892
2893                 if (CONFIG_H263_ENCODER &&
2894                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2895                     ff_h263_update_motion_val(s);
2896
2897                 if(next_block==0){ //FIXME 16 vs linesize16
2898                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2899                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2900                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2901                 }
2902
2903                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2904                     ff_MPV_decode_mb(s, s->block);
2905             } else {
2906                 int motion_x = 0, motion_y = 0;
2907                 s->mv_type=MV_TYPE_16X16;
2908                 // only one MB-Type possible
2909
2910                 switch(mb_type){
2911                 case CANDIDATE_MB_TYPE_INTRA:
2912                     s->mv_dir = 0;
2913                     s->mb_intra= 1;
2914                     motion_x= s->mv[0][0][0] = 0;
2915                     motion_y= s->mv[0][0][1] = 0;
2916                     break;
2917                 case CANDIDATE_MB_TYPE_INTER:
2918                     s->mv_dir = MV_DIR_FORWARD;
2919                     s->mb_intra= 0;
2920                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2921                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2922                     break;
2923                 case CANDIDATE_MB_TYPE_INTER_I:
2924                     s->mv_dir = MV_DIR_FORWARD;
2925                     s->mv_type = MV_TYPE_FIELD;
2926                     s->mb_intra= 0;
2927                     for(i=0; i<2; i++){
2928                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2929                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2930                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2931                     }
2932                     break;
2933                 case CANDIDATE_MB_TYPE_INTER4V:
2934                     s->mv_dir = MV_DIR_FORWARD;
2935                     s->mv_type = MV_TYPE_8X8;
2936                     s->mb_intra= 0;
2937                     for(i=0; i<4; i++){
2938                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2939                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2940                     }
2941                     break;
2942                 case CANDIDATE_MB_TYPE_DIRECT:
2943                     if (CONFIG_MPEG4_ENCODER) {
2944                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2945                         s->mb_intra= 0;
2946                         motion_x=s->b_direct_mv_table[xy][0];
2947                         motion_y=s->b_direct_mv_table[xy][1];
2948                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2949                     }
2950                     break;
2951                 case CANDIDATE_MB_TYPE_DIRECT0:
2952                     if (CONFIG_MPEG4_ENCODER) {
2953                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2954                         s->mb_intra= 0;
2955                         ff_mpeg4_set_direct_mv(s, 0, 0);
2956                     }
2957                     break;
2958                 case CANDIDATE_MB_TYPE_BIDIR:
2959                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2960                     s->mb_intra= 0;
2961                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2962                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2963                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2964                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2965                     break;
2966                 case CANDIDATE_MB_TYPE_BACKWARD:
2967                     s->mv_dir = MV_DIR_BACKWARD;
2968                     s->mb_intra= 0;
2969                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2970                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2971                     break;
2972                 case CANDIDATE_MB_TYPE_FORWARD:
2973                     s->mv_dir = MV_DIR_FORWARD;
2974                     s->mb_intra= 0;
2975                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2976                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2977                     break;
2978                 case CANDIDATE_MB_TYPE_FORWARD_I:
2979                     s->mv_dir = MV_DIR_FORWARD;
2980                     s->mv_type = MV_TYPE_FIELD;
2981                     s->mb_intra= 0;
2982                     for(i=0; i<2; i++){
2983                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2984                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2985                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2986                     }
2987                     break;
2988                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2989                     s->mv_dir = MV_DIR_BACKWARD;
2990                     s->mv_type = MV_TYPE_FIELD;
2991                     s->mb_intra= 0;
2992                     for(i=0; i<2; i++){
2993                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2994                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2995                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2996                     }
2997                     break;
2998                 case CANDIDATE_MB_TYPE_BIDIR_I:
2999                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3000                     s->mv_type = MV_TYPE_FIELD;
3001                     s->mb_intra= 0;
3002                     for(dir=0; dir<2; dir++){
3003                         for(i=0; i<2; i++){
3004                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3005                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3006                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3007                         }
3008                     }
3009                     break;
3010                 default:
3011                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3012                 }
3013
3014                 encode_mb(s, motion_x, motion_y);
3015
3016                 // RAL: Update last macroblock type
3017                 s->last_mv_dir = s->mv_dir;
3018
3019                 if (CONFIG_H263_ENCODER &&
3020                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3021                     ff_h263_update_motion_val(s);
3022
3023                 ff_MPV_decode_mb(s, s->block);
3024             }
3025
3026             /* clean the MV table in IPS frames for direct mode in B frames */
3027             if(s->mb_intra /* && I,P,S_TYPE */){
3028                 s->p_mv_table[xy][0]=0;
3029                 s->p_mv_table[xy][1]=0;
3030             }
3031
3032             if(s->flags&CODEC_FLAG_PSNR){
3033                 int w= 16;
3034                 int h= 16;
3035
3036                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3037                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3038
3039                 s->current_picture.f.error[0] += sse(
3040                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3041                     s->dest[0], w, h, s->linesize);
3042                 s->current_picture.f.error[1] += sse(
3043                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3044                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3045                 s->current_picture.f.error[2] += sse(
3046                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3047                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3048             }
3049             if(s->loop_filter){
3050                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3051                     ff_h263_loop_filter(s);
3052             }
3053             av_dlog(s->avctx, "MB %d %d bits\n",
3054                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3055         }
3056     }
3057
3058     //not beautiful here but we must write it before flushing so it has to be here
3059     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3060         ff_msmpeg4_encode_ext_header(s);
3061
3062     write_slice_end(s);
3063
3064     /* Send the last GOB if RTP */
3065     if (s->avctx->rtp_callback) {
3066         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3067         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3068         /* Call the RTP callback to send the last GOB */
3069         emms_c();
3070         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3071     }
3072
3073     return 0;
3074 }
3075
3076 #define MERGE(field) dst->field += src->field; src->field=0
3077 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3078     MERGE(me.scene_change_score);
3079     MERGE(me.mc_mb_var_sum_temp);
3080     MERGE(me.mb_var_sum_temp);
3081 }
3082
3083 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3084     int i;
3085
3086     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3087     MERGE(dct_count[1]);
3088     MERGE(mv_bits);
3089     MERGE(i_tex_bits);
3090     MERGE(p_tex_bits);
3091     MERGE(i_count);
3092     MERGE(f_count);
3093     MERGE(b_count);
3094     MERGE(skip_count);
3095     MERGE(misc_bits);
3096     MERGE(er.error_count);
3097     MERGE(padding_bug_score);
3098     MERGE(current_picture.f.error[0]);
3099     MERGE(current_picture.f.error[1]);
3100     MERGE(current_picture.f.error[2]);
3101
3102     if(dst->avctx->noise_reduction){
3103         for(i=0; i<64; i++){
3104             MERGE(dct_error_sum[0][i]);
3105             MERGE(dct_error_sum[1][i]);
3106         }
3107     }
3108
3109     assert(put_bits_count(&src->pb) % 8 ==0);
3110     assert(put_bits_count(&dst->pb) % 8 ==0);
3111     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3112     flush_put_bits(&dst->pb);
3113 }
3114
3115 static int estimate_qp(MpegEncContext *s, int dry_run){
3116     if (s->next_lambda){
3117         s->current_picture_ptr->f.quality =
3118         s->current_picture.f.quality = s->next_lambda;
3119         if(!dry_run) s->next_lambda= 0;
3120     } else if (!s->fixed_qscale) {
3121         s->current_picture_ptr->f.quality =
3122         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3123         if (s->current_picture.f.quality < 0)
3124             return -1;
3125     }
3126
3127     if(s->adaptive_quant){
3128         switch(s->codec_id){
3129         case AV_CODEC_ID_MPEG4:
3130             if (CONFIG_MPEG4_ENCODER)
3131                 ff_clean_mpeg4_qscales(s);
3132             break;
3133         case AV_CODEC_ID_H263:
3134         case AV_CODEC_ID_H263P:
3135         case AV_CODEC_ID_FLV1:
3136             if (CONFIG_H263_ENCODER)
3137                 ff_clean_h263_qscales(s);
3138             break;
3139         default:
3140             ff_init_qscale_tab(s);
3141         }
3142
3143         s->lambda= s->lambda_table[0];
3144         //FIXME broken
3145     }else
3146         s->lambda = s->current_picture.f.quality;
3147     update_qscale(s);
3148     return 0;
3149 }
3150
3151 /* must be called before writing the header */
3152 static void set_frame_distances(MpegEncContext * s){
3153     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3154     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3155
3156     if(s->pict_type==AV_PICTURE_TYPE_B){
3157         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3158         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3159     }else{
3160         s->pp_time= s->time - s->last_non_b_time;
3161         s->last_non_b_time= s->time;
3162         assert(s->picture_number==0 || s->pp_time > 0);
3163     }
3164 }
3165
3166 static int encode_picture(MpegEncContext *s, int picture_number)
3167 {
3168     int i, ret;
3169     int bits;
3170     int context_count = s->slice_context_count;
3171
3172     s->picture_number = picture_number;
3173
3174     /* Reset the average MB variance */
3175     s->me.mb_var_sum_temp    =
3176     s->me.mc_mb_var_sum_temp = 0;
3177
3178     /* we need to initialize some time vars before we can encode b-frames */
3179     // RAL: Condition added for MPEG1VIDEO
3180     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3181         set_frame_distances(s);
3182     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3183         ff_set_mpeg4_time(s);
3184
3185     s->me.scene_change_score=0;
3186
3187 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3188
3189     if(s->pict_type==AV_PICTURE_TYPE_I){
3190         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3191         else                        s->no_rounding=0;
3192     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3193         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3194             s->no_rounding ^= 1;
3195     }
3196
3197     if(s->flags & CODEC_FLAG_PASS2){
3198         if (estimate_qp(s,1) < 0)
3199             return -1;
3200         ff_get_2pass_fcode(s);
3201     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3202         if(s->pict_type==AV_PICTURE_TYPE_B)
3203             s->lambda= s->last_lambda_for[s->pict_type];
3204         else
3205             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3206         update_qscale(s);
3207     }
3208
3209     if(s->codec_id != AV_CODEC_ID_AMV){
3210         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3211         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3212         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3213         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3214     }
3215
3216     s->mb_intra=0; //for the rate distortion & bit compare functions
3217     for(i=1; i<context_count; i++){
3218         ret = ff_update_duplicate_context(s->thread_context[i], s);
3219         if (ret < 0)
3220             return ret;
3221     }
3222
3223     if(ff_init_me(s)<0)
3224         return -1;
3225
3226     /* Estimate motion for every MB */
3227     if(s->pict_type != AV_PICTURE_TYPE_I){
3228         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3229         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3230         if (s->pict_type != AV_PICTURE_TYPE_B) {
3231             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3232                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3233             }
3234         }
3235
3236         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3237     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3238         /* I-Frame */
3239         for(i=0; i<s->mb_stride*s->mb_height; i++)
3240             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3241
3242         if(!s->fixed_qscale){
3243             /* finding spatial complexity for I-frame rate control */
3244             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3245         }
3246     }
3247     for(i=1; i<context_count; i++){
3248         merge_context_after_me(s, s->thread_context[i]);
3249     }
3250     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3251     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3252     emms_c();
3253
3254     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3255         s->pict_type= AV_PICTURE_TYPE_I;
3256         for(i=0; i<s->mb_stride*s->mb_height; i++)
3257             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3258         if(s->msmpeg4_version >= 3)
3259             s->no_rounding=1;
3260         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3261                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3262     }
3263
3264     if(!s->umvplus){
3265         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3266             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3267
3268             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3269                 int a,b;
3270                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3271                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3272                 s->f_code= FFMAX3(s->f_code, a, b);
3273             }
3274
3275             ff_fix_long_p_mvs(s);
3276             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3277             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3278                 int j;
3279                 for(i=0; i<2; i++){
3280                     for(j=0; j<2; j++)
3281                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3282                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3283                 }
3284             }
3285         }
3286
3287         if(s->pict_type==AV_PICTURE_TYPE_B){
3288             int a, b;
3289
3290             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3291             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3292             s->f_code = FFMAX(a, b);
3293
3294             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3295             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3296             s->b_code = FFMAX(a, b);
3297
3298             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3299             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3300             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3301             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3302             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3303                 int dir, j;
3304                 for(dir=0; dir<2; dir++){
3305                     for(i=0; i<2; i++){
3306                         for(j=0; j<2; j++){
3307                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3308                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3309                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3310                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3311                         }
3312                     }
3313                 }
3314             }
3315         }
3316     }
3317
3318     if (estimate_qp(s, 0) < 0)
3319         return -1;
3320
3321     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3322         s->qscale= 3; //reduce clipping problems
3323
3324     if (s->out_format == FMT_MJPEG) {
3325         /* for mjpeg, we do include qscale in the matrix */
3326         for(i=1;i<64;i++){
3327             int j= s->dsp.idct_permutation[i];
3328
3329             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3330         }
3331         s->y_dc_scale_table=
3332         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3333         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3334         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3335                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3336         s->qscale= 8;
3337     }
3338     if(s->codec_id == AV_CODEC_ID_AMV){
3339         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3340         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3341         for(i=1;i<64;i++){
3342             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3343
3344             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3345             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3346         }
3347         s->y_dc_scale_table= y;
3348         s->c_dc_scale_table= c;
3349         s->intra_matrix[0] = 13;
3350         s->chroma_intra_matrix[0] = 14;
3351         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3352                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3353         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3354                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3355         s->qscale= 8;
3356     }
3357
3358     //FIXME var duplication
3359     s->current_picture_ptr->f.key_frame =
3360     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3361     s->current_picture_ptr->f.pict_type =
3362     s->current_picture.f.pict_type = s->pict_type;
3363
3364     if (s->current_picture.f.key_frame)
3365         s->picture_in_gop_number=0;
3366
3367     s->mb_x = s->mb_y = 0;
3368     s->last_bits= put_bits_count(&s->pb);
3369     switch(s->out_format) {
3370     case FMT_MJPEG:
3371         if (CONFIG_MJPEG_ENCODER)
3372             ff_mjpeg_encode_picture_header(s);
3373         break;
3374     case FMT_H261:
3375         if (CONFIG_H261_ENCODER)
3376             ff_h261_encode_picture_header(s, picture_number);
3377         break;
3378     case FMT_H263:
3379         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3380             ff_wmv2_encode_picture_header(s, picture_number);
3381         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3382             ff_msmpeg4_encode_picture_header(s, picture_number);
3383         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3384             ff_mpeg4_encode_picture_header(s, picture_number);
3385         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3386             ff_rv10_encode_picture_header(s, picture_number);
3387         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3388             ff_rv20_encode_picture_header(s, picture_number);
3389         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3390             ff_flv_encode_picture_header(s, picture_number);
3391         else if (CONFIG_H263_ENCODER)
3392             ff_h263_encode_picture_header(s, picture_number);
3393         break;
3394     case FMT_MPEG1:
3395         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3396             ff_mpeg1_encode_picture_header(s, picture_number);
3397         break;
3398     default:
3399         av_assert0(0);
3400     }
3401     bits= put_bits_count(&s->pb);
3402     s->header_bits= bits - s->last_bits;
3403
3404     for(i=1; i<context_count; i++){
3405         update_duplicate_context_after_me(s->thread_context[i], s);
3406     }
3407     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3408     for(i=1; i<context_count; i++){
3409         merge_context_after_encode(s, s->thread_context[i]);
3410     }
3411     emms_c();
3412     return 0;
3413 }
3414
3415 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3416     const int intra= s->mb_intra;
3417     int i;
3418
3419     s->dct_count[intra]++;
3420
3421     for(i=0; i<64; i++){
3422         int level= block[i];
3423
3424         if(level){
3425             if(level>0){
3426                 s->dct_error_sum[intra][i] += level;
3427                 level -= s->dct_offset[intra][i];
3428                 if(level<0) level=0;
3429             }else{
3430                 s->dct_error_sum[intra][i] -= level;
3431                 level += s->dct_offset[intra][i];
3432                 if(level>0) level=0;
3433             }
3434             block[i]= level;
3435         }
3436     }
3437 }
3438
3439 static int dct_quantize_trellis_c(MpegEncContext *s,
3440                                   int16_t *block, int n,
3441                                   int qscale, int *overflow){
3442     const int *qmat;
3443     const uint8_t *scantable= s->intra_scantable.scantable;
3444     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3445     int max=0;
3446     unsigned int threshold1, threshold2;
3447     int bias=0;
3448     int run_tab[65];
3449     int level_tab[65];
3450     int score_tab[65];
3451     int survivor[65];
3452     int survivor_count;
3453     int last_run=0;
3454     int last_level=0;
3455     int last_score= 0;
3456     int last_i;
3457     int coeff[2][64];
3458     int coeff_count[64];
3459     int qmul, qadd, start_i, last_non_zero, i, dc;
3460     const int esc_length= s->ac_esc_length;
3461     uint8_t * length;
3462     uint8_t * last_length;
3463     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3464
3465     s->dsp.fdct (block);
3466
3467     if(s->dct_error_sum)
3468         s->denoise_dct(s, block);
3469     qmul= qscale*16;
3470     qadd= ((qscale-1)|1)*8;
3471
3472     if (s->mb_intra) {
3473         int q;
3474         if (!s->h263_aic) {
3475             if (n < 4)
3476                 q = s->y_dc_scale;
3477             else
3478                 q = s->c_dc_scale;
3479             q = q << 3;
3480         } else{
3481             /* For AIC we skip quant/dequant of INTRADC */
3482             q = 1 << 3;
3483             qadd=0;
3484         }
3485
3486         /* note: block[0] is assumed to be positive */
3487         block[0] = (block[0] + (q >> 1)) / q;
3488         start_i = 1;
3489         last_non_zero = 0;
3490         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3491         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3492             bias= 1<<(QMAT_SHIFT-1);
3493         length     = s->intra_ac_vlc_length;
3494         last_length= s->intra_ac_vlc_last_length;
3495     } else {
3496         start_i = 0;
3497         last_non_zero = -1;
3498         qmat = s->q_inter_matrix[qscale];
3499         length     = s->inter_ac_vlc_length;
3500         last_length= s->inter_ac_vlc_last_length;
3501     }
3502     last_i= start_i;
3503
3504     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3505     threshold2= (threshold1<<1);
3506
3507     for(i=63; i>=start_i; i--) {
3508         const int j = scantable[i];
3509         int level = block[j] * qmat[j];
3510
3511         if(((unsigned)(level+threshold1))>threshold2){
3512             last_non_zero = i;
3513             break;
3514         }
3515     }
3516
3517     for(i=start_i; i<=last_non_zero; i++) {
3518         const int j = scantable[i];
3519         int level = block[j] * qmat[j];
3520
3521 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3522 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3523         if(((unsigned)(level+threshold1))>threshold2){
3524             if(level>0){
3525                 level= (bias + level)>>QMAT_SHIFT;
3526                 coeff[0][i]= level;
3527                 coeff[1][i]= level-1;
3528 //                coeff[2][k]= level-2;
3529             }else{
3530                 level= (bias - level)>>QMAT_SHIFT;
3531                 coeff[0][i]= -level;
3532                 coeff[1][i]= -level+1;
3533 //                coeff[2][k]= -level+2;
3534             }
3535             coeff_count[i]= FFMIN(level, 2);
3536             av_assert2(coeff_count[i]);
3537             max |=level;
3538         }else{
3539             coeff[0][i]= (level>>31)|1;
3540             coeff_count[i]= 1;
3541         }
3542     }
3543
3544     *overflow= s->max_qcoeff < max; //overflow might have happened
3545
3546     if(last_non_zero < start_i){
3547         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3548         return last_non_zero;
3549     }
3550
3551     score_tab[start_i]= 0;
3552     survivor[0]= start_i;
3553     survivor_count= 1;
3554
3555     for(i=start_i; i<=last_non_zero; i++){
3556         int level_index, j, zero_distortion;
3557         int dct_coeff= FFABS(block[ scantable[i] ]);
3558         int best_score=256*256*256*120;
3559
3560         if (s->dsp.fdct == ff_fdct_ifast)
3561             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3562         zero_distortion= dct_coeff*dct_coeff;
3563
3564         for(level_index=0; level_index < coeff_count[i]; level_index++){
3565             int distortion;
3566             int level= coeff[level_index][i];
3567             const int alevel= FFABS(level);
3568             int unquant_coeff;
3569
3570             av_assert2(level);
3571
3572             if(s->out_format == FMT_H263){
3573                 unquant_coeff= alevel*qmul + qadd;
3574             }else{ //MPEG1
3575                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3576                 if(s->mb_intra){
3577                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3578                         unquant_coeff =   (unquant_coeff - 1) | 1;
3579                 }else{
3580                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3581                         unquant_coeff =   (unquant_coeff - 1) | 1;
3582                 }
3583                 unquant_coeff<<= 3;
3584             }
3585
3586             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3587             level+=64;
3588             if((level&(~127)) == 0){
3589                 for(j=survivor_count-1; j>=0; j--){
3590                     int run= i - survivor[j];
3591                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3592                     score += score_tab[i-run];
3593
3594                     if(score < best_score){
3595                         best_score= score;
3596                         run_tab[i+1]= run;
3597                         level_tab[i+1]= level-64;
3598                     }
3599                 }
3600
3601                 if(s->out_format == FMT_H263){
3602                     for(j=survivor_count-1; j>=0; j--){
3603                         int run= i - survivor[j];
3604                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3605                         score += score_tab[i-run];
3606                         if(score < last_score){
3607                             last_score= score;
3608                             last_run= run;
3609                             last_level= level-64;
3610                             last_i= i+1;
3611                         }
3612                     }
3613                 }
3614             }else{
3615                 distortion += esc_length*lambda;
3616                 for(j=survivor_count-1; j>=0; j--){
3617                     int run= i - survivor[j];
3618                     int score= distortion + score_tab[i-run];
3619
3620                     if(score < best_score){
3621                         best_score= score;
3622                         run_tab[i+1]= run;
3623                         level_tab[i+1]= level-64;
3624                     }
3625                 }
3626
3627                 if(s->out_format == FMT_H263){
3628                   for(j=survivor_count-1; j>=0; j--){
3629                         int run= i - survivor[j];
3630                         int score= distortion + score_tab[i-run];
3631                         if(score < last_score){
3632                             last_score= score;
3633                             last_run= run;
3634                             last_level= level-64;
3635                             last_i= i+1;
3636                         }
3637                     }
3638                 }
3639             }
3640         }
3641
3642         score_tab[i+1]= best_score;
3643
3644         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3645         if(last_non_zero <= 27){
3646             for(; survivor_count; survivor_count--){
3647                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3648                     break;
3649             }
3650         }else{
3651             for(; survivor_count; survivor_count--){
3652                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3653                     break;
3654             }
3655         }
3656
3657         survivor[ survivor_count++ ]= i+1;
3658     }
3659
3660     if(s->out_format != FMT_H263){
3661         last_score= 256*256*256*120;
3662         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3663             int score= score_tab[i];
3664             if(i) score += lambda*2; //FIXME exacter?
3665
3666             if(score < last_score){
3667                 last_score= score;
3668                 last_i= i;
3669                 last_level= level_tab[i];
3670                 last_run= run_tab[i];
3671             }
3672         }
3673     }
3674
3675     s->coded_score[n] = last_score;
3676
3677     dc= FFABS(block[0]);
3678     last_non_zero= last_i - 1;
3679     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3680
3681     if(last_non_zero < start_i)
3682         return last_non_zero;
3683
3684     if(last_non_zero == 0 && start_i == 0){
3685         int best_level= 0;
3686         int best_score= dc * dc;
3687
3688         for(i=0; i<coeff_count[0]; i++){
3689             int level= coeff[i][0];
3690             int alevel= FFABS(level);
3691             int unquant_coeff, score, distortion;
3692
3693             if(s->out_format == FMT_H263){
3694                     unquant_coeff= (alevel*qmul + qadd)>>3;
3695             }else{ //MPEG1
3696                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3697                     unquant_coeff =   (unquant_coeff - 1) | 1;
3698             }
3699             unquant_coeff = (unquant_coeff + 4) >> 3;
3700             unquant_coeff<<= 3 + 3;
3701
3702             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3703             level+=64;
3704             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3705             else                    score= distortion + esc_length*lambda;
3706
3707             if(score < best_score){
3708                 best_score= score;
3709                 best_level= level - 64;
3710             }
3711         }
3712         block[0]= best_level;
3713         s->coded_score[n] = best_score - dc*dc;
3714         if(best_level == 0) return -1;
3715         else                return last_non_zero;
3716     }
3717
3718     i= last_i;
3719     av_assert2(last_level);
3720
3721     block[ perm_scantable[last_non_zero] ]= last_level;
3722     i -= last_run + 1;
3723
3724     for(; i>start_i; i -= run_tab[i] + 1){
3725         block[ perm_scantable[i-1] ]= level_tab[i];
3726     }
3727
3728     return last_non_zero;
3729 }
3730
3731 //#define REFINE_STATS 1
3732 static int16_t basis[64][64];
3733
3734 static void build_basis(uint8_t *perm){
3735     int i, j, x, y;
3736     emms_c();
3737     for(i=0; i<8; i++){
3738         for(j=0; j<8; j++){
3739             for(y=0; y<8; y++){
3740                 for(x=0; x<8; x++){
3741                     double s= 0.25*(1<<BASIS_SHIFT);
3742                     int index= 8*i + j;
3743                     int perm_index= perm[index];
3744                     if(i==0) s*= sqrt(0.5);
3745                     if(j==0) s*= sqrt(0.5);
3746                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3747                 }
3748             }
3749         }
3750     }
3751 }
3752
3753 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3754                         int16_t *block, int16_t *weight, int16_t *orig,
3755                         int n, int qscale){
3756     int16_t rem[64];
3757     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3758     const uint8_t *scantable= s->intra_scantable.scantable;
3759     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3760 //    unsigned int threshold1, threshold2;
3761 //    int bias=0;
3762     int run_tab[65];
3763     int prev_run=0;
3764     int prev_level=0;
3765     int qmul, qadd, start_i, last_non_zero, i, dc;
3766     uint8_t * length;
3767     uint8_t * last_length;
3768     int lambda;
3769     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3770 #ifdef REFINE_STATS
3771 static int count=0;
3772 static int after_last=0;
3773 static int to_zero=0;
3774 static int from_zero=0;
3775 static int raise=0;
3776 static int lower=0;
3777 static int messed_sign=0;
3778 #endif
3779
3780     if(basis[0][0] == 0)
3781         build_basis(s->dsp.idct_permutation);
3782
3783     qmul= qscale*2;
3784     qadd= (qscale-1)|1;
3785     if (s->mb_intra) {
3786         if (!s->h263_aic) {
3787             if (n < 4)
3788                 q = s->y_dc_scale;
3789             else
3790                 q = s->c_dc_scale;
3791         } else{
3792             /* For AIC we skip quant/dequant of INTRADC */
3793             q = 1;
3794             qadd=0;
3795         }
3796         q <<= RECON_SHIFT-3;
3797         /* note: block[0] is assumed to be positive */
3798         dc= block[0]*q;
3799 //        block[0] = (block[0] + (q >> 1)) / q;
3800         start_i = 1;
3801 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3802 //            bias= 1<<(QMAT_SHIFT-1);
3803         length     = s->intra_ac_vlc_length;
3804         last_length= s->intra_ac_vlc_last_length;
3805     } else {
3806         dc= 0;
3807         start_i = 0;
3808         length     = s->inter_ac_vlc_length;
3809         last_length= s->inter_ac_vlc_last_length;
3810     }
3811     last_non_zero = s->block_last_index[n];
3812
3813 #ifdef REFINE_STATS
3814 {START_TIMER
3815 #endif
3816     dc += (1<<(RECON_SHIFT-1));
3817     for(i=0; i<64; i++){
3818         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3819     }
3820 #ifdef REFINE_STATS
3821 STOP_TIMER("memset rem[]")}
3822 #endif
3823     sum=0;
3824     for(i=0; i<64; i++){
3825         int one= 36;
3826         int qns=4;
3827         int w;
3828
3829         w= FFABS(weight[i]) + qns*one;
3830         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3831
3832         weight[i] = w;
3833 //        w=weight[i] = (63*qns + (w/2)) / w;
3834
3835         av_assert2(w>0);
3836         av_assert2(w<(1<<6));
3837         sum += w*w;
3838     }
3839     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3840 #ifdef REFINE_STATS
3841 {START_TIMER
3842 #endif
3843     run=0;
3844     rle_index=0;
3845     for(i=start_i; i<=last_non_zero; i++){
3846         int j= perm_scantable[i];
3847         const int level= block[j];
3848         int coeff;
3849
3850         if(level){
3851             if(level<0) coeff= qmul*level - qadd;
3852             else        coeff= qmul*level + qadd;
3853             run_tab[rle_index++]=run;
3854             run=0;
3855
3856             s->dsp.add_8x8basis(rem, basis[j], coeff);
3857         }else{
3858             run++;
3859         }
3860     }
3861 #ifdef REFINE_STATS
3862 if(last_non_zero>0){
3863 STOP_TIMER("init rem[]")
3864 }
3865 }
3866
3867 {START_TIMER
3868 #endif
3869     for(;;){
3870         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3871         int best_coeff=0;
3872         int best_change=0;
3873         int run2, best_unquant_change=0, analyze_gradient;
3874 #ifdef REFINE_STATS
3875 {START_TIMER
3876 #endif
3877         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3878
3879         if(analyze_gradient){
3880 #ifdef REFINE_STATS
3881 {START_TIMER
3882 #endif
3883             for(i=0; i<64; i++){
3884                 int w= weight[i];
3885
3886                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3887             }
3888 #ifdef REFINE_STATS
3889 STOP_TIMER("rem*w*w")}
3890 {START_TIMER
3891 #endif
3892             s->dsp.fdct(d1);
3893 #ifdef REFINE_STATS
3894 STOP_TIMER("dct")}
3895 #endif
3896         }
3897
3898         if(start_i){
3899             const int level= block[0];
3900             int change, old_coeff;
3901
3902             av_assert2(s->mb_intra);
3903
3904             old_coeff= q*level;
3905
3906             for(change=-1; change<=1; change+=2){
3907                 int new_level= level + change;
3908                 int score, new_coeff;
3909
3910                 new_coeff= q*new_level;
3911                 if(new_coeff >= 2048 || new_coeff < 0)
3912                     continue;
3913
3914                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3915                 if(score<best_score){
3916                     best_score= score;
3917                     best_coeff= 0;
3918                     best_change= change;
3919                     best_unquant_change= new_coeff - old_coeff;
3920                 }
3921             }
3922         }
3923
3924         run=0;
3925         rle_index=0;
3926         run2= run_tab[rle_index++];
3927         prev_level=0;
3928         prev_run=0;
3929
3930         for(i=start_i; i<64; i++){
3931             int j= perm_scantable[i];
3932             const int level= block[j];
3933             int change, old_coeff;
3934
3935             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3936                 break;
3937
3938             if(level){
3939                 if(level<0) old_coeff= qmul*level - qadd;
3940                 else        old_coeff= qmul*level + qadd;
3941                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3942             }else{
3943                 old_coeff=0;
3944                 run2--;
3945                 av_assert2(run2>=0 || i >= last_non_zero );
3946             }
3947
3948             for(change=-1; change<=1; change+=2){
3949                 int new_level= level + change;
3950                 int score, new_coeff, unquant_change;
3951
3952                 score=0;
3953                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3954                    continue;
3955
3956                 if(new_level){
3957                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3958                     else            new_coeff= qmul*new_level + qadd;
3959                     if(new_coeff >= 2048 || new_coeff <= -2048)
3960                         continue;
3961                     //FIXME check for overflow
3962
3963                     if(level){
3964                         if(level < 63 && level > -63){
3965                             if(i < last_non_zero)
3966                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3967                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3968                             else
3969                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3970                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3971                         }
3972                     }else{
3973                         av_assert2(FFABS(new_level)==1);
3974
3975                         if(analyze_gradient){
3976                             int g= d1[ scantable[i] ];
3977                             if(g && (g^new_level) >= 0)
3978                                 continue;
3979                         }
3980
3981                         if(i < last_non_zero){
3982                             int next_i= i + run2 + 1;
3983                             int next_level= block[ perm_scantable[next_i] ] + 64;
3984
3985                             if(next_level&(~127))
3986                                 next_level= 0;
3987
3988                             if(next_i < last_non_zero)
3989                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3990                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3991                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3992                             else
3993                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3994                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3995                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3996                         }else{
3997                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3998                             if(prev_level){
3999                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4000                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4001                             }
4002                         }
4003                     }
4004                 }else{
4005                     new_coeff=0;
4006                     av_assert2(FFABS(level)==1);
4007
4008                     if(i < last_non_zero){
4009                         int next_i= i + run2 + 1;
4010                         int next_level= block[ perm_scantable[next_i] ] + 64;
4011
4012                         if(next_level&(~127))
4013                             next_level= 0;
4014
4015                         if(next_i < last_non_zero)
4016                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4017                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4018                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4019                         else
4020                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4021                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4022                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4023                     }else{
4024                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4025                         if(prev_level){
4026                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4027                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4028                         }
4029                     }
4030                 }
4031
4032                 score *= lambda;
4033
4034                 unquant_change= new_coeff - old_coeff;
4035                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4036
4037                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4038                 if(score<best_score){
4039                     best_score= score;
4040                     best_coeff= i;
4041                     best_change= change;
4042                     best_unquant_change= unquant_change;
4043                 }
4044             }
4045             if(level){
4046                 prev_level= level + 64;
4047                 if(prev_level&(~127))
4048                     prev_level= 0;
4049                 prev_run= run;
4050                 run=0;
4051             }else{
4052                 run++;
4053             }
4054         }
4055 #ifdef REFINE_STATS
4056 STOP_TIMER("iterative step")}
4057 #endif
4058
4059         if(best_change){
4060             int j= perm_scantable[ best_coeff ];
4061
4062             block[j] += best_change;
4063
4064             if(best_coeff > last_non_zero){
4065                 last_non_zero= best_coeff;
4066                 av_assert2(block[j]);
4067 #ifdef REFINE_STATS
4068 after_last++;
4069 #endif
4070             }else{
4071 #ifdef REFINE_STATS
4072 if(block[j]){
4073     if(block[j] - best_change){
4074         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4075             raise++;
4076         }else{
4077             lower++;
4078         }
4079     }else{
4080         from_zero++;
4081     }
4082 }else{
4083     to_zero++;
4084 }
4085 #endif
4086                 for(; last_non_zero>=start_i; last_non_zero--){
4087                     if(block[perm_scantable[last_non_zero]])
4088                         break;
4089                 }
4090             }
4091 #ifdef REFINE_STATS
4092 count++;
4093 if(256*256*256*64 % count == 0){
4094     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4095 }
4096 #endif
4097             run=0;
4098             rle_index=0;
4099             for(i=start_i; i<=last_non_zero; i++){
4100                 int j= perm_scantable[i];
4101                 const int level= block[j];
4102
4103                  if(level){
4104                      run_tab[rle_index++]=run;
4105                      run=0;
4106                  }else{
4107                      run++;
4108                  }
4109             }
4110
4111             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4112         }else{
4113             break;
4114         }
4115     }
4116 #ifdef REFINE_STATS
4117 if(last_non_zero>0){
4118 STOP_TIMER("iterative search")
4119 }
4120 }
4121 #endif
4122
4123     return last_non_zero;
4124 }
4125
4126 int ff_dct_quantize_c(MpegEncContext *s,
4127                         int16_t *block, int n,
4128                         int qscale, int *overflow)
4129 {
4130     int i, j, level, last_non_zero, q, start_i;
4131     const int *qmat;
4132     const uint8_t *scantable= s->intra_scantable.scantable;
4133     int bias;
4134     int max=0;
4135     unsigned int threshold1, threshold2;
4136
4137     s->dsp.fdct (block);
4138
4139     if(s->dct_error_sum)
4140         s->denoise_dct(s, block);
4141
4142     if (s->mb_intra) {
4143         if (!s->h263_aic) {
4144             if (n < 4)
4145                 q = s->y_dc_scale;
4146             else
4147                 q = s->c_dc_scale;
4148             q = q << 3;
4149         } else
4150             /* For AIC we skip quant/dequant of INTRADC */
4151             q = 1 << 3;
4152
4153         /* note: block[0] is assumed to be positive */
4154         block[0] = (block[0] + (q >> 1)) / q;
4155         start_i = 1;
4156         last_non_zero = 0;
4157         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4158         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4159     } else {
4160         start_i = 0;
4161         last_non_zero = -1;
4162         qmat = s->q_inter_matrix[qscale];
4163         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4164     }
4165     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4166     threshold2= (threshold1<<1);
4167     for(i=63;i>=start_i;i--) {
4168         j = scantable[i];
4169         level = block[j] * qmat[j];
4170
4171         if(((unsigned)(level+threshold1))>threshold2){
4172             last_non_zero = i;
4173             break;
4174         }else{
4175             block[j]=0;
4176         }
4177     }
4178     for(i=start_i; i<=last_non_zero; i++) {
4179         j = scantable[i];
4180         level = block[j] * qmat[j];
4181
4182 //        if(   bias+level >= (1<<QMAT_SHIFT)
4183 //           || bias-level >= (1<<QMAT_SHIFT)){
4184         if(((unsigned)(level+threshold1))>threshold2){
4185             if(level>0){
4186                 level= (bias + level)>>QMAT_SHIFT;
4187                 block[j]= level;
4188             }else{
4189                 level= (bias - level)>>QMAT_SHIFT;
4190                 block[j]= -level;
4191             }
4192             max |=level;
4193         }else{
4194             block[j]=0;
4195         }
4196     }
4197     *overflow= s->max_qcoeff < max; //overflow might have happened
4198
4199     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4200     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4201         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4202
4203     return last_non_zero;
4204 }
4205
4206 #define OFFSET(x) offsetof(MpegEncContext, x)
4207 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4208 static const AVOption h263_options[] = {
4209     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4210     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4211     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4212     FF_MPV_COMMON_OPTS
4213     { NULL },
4214 };
4215
4216 static const AVClass h263_class = {
4217     .class_name = "H.263 encoder",
4218     .item_name  = av_default_item_name,
4219     .option     = h263_options,
4220     .version    = LIBAVUTIL_VERSION_INT,
4221 };
4222
4223 AVCodec ff_h263_encoder = {
4224     .name           = "h263",
4225     .type           = AVMEDIA_TYPE_VIDEO,
4226     .id             = AV_CODEC_ID_H263,
4227     .priv_data_size = sizeof(MpegEncContext),
4228     .init           = ff_MPV_encode_init,
4229     .encode2        = ff_MPV_encode_picture,
4230     .close          = ff_MPV_encode_end,
4231     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4232     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4233     .priv_class     = &h263_class,
4234 };
4235
4236 static const AVOption h263p_options[] = {
4237     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4238     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4239     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4240     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4241     FF_MPV_COMMON_OPTS
4242     { NULL },
4243 };
4244 static const AVClass h263p_class = {
4245     .class_name = "H.263p encoder",
4246     .item_name  = av_default_item_name,
4247     .option     = h263p_options,
4248     .version    = LIBAVUTIL_VERSION_INT,
4249 };
4250
4251 AVCodec ff_h263p_encoder = {
4252     .name           = "h263p",
4253     .type           = AVMEDIA_TYPE_VIDEO,
4254     .id             = AV_CODEC_ID_H263P,
4255     .priv_data_size = sizeof(MpegEncContext),
4256     .init           = ff_MPV_encode_init,
4257     .encode2        = ff_MPV_encode_picture,
4258     .close          = ff_MPV_encode_end,
4259     .capabilities   = CODEC_CAP_SLICE_THREADS,
4260     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4261     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4262     .priv_class     = &h263p_class,
4263 };
4264
4265 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4266
4267 AVCodec ff_msmpeg4v2_encoder = {
4268     .name           = "msmpeg4v2",
4269     .type           = AVMEDIA_TYPE_VIDEO,
4270     .id             = AV_CODEC_ID_MSMPEG4V2,
4271     .priv_data_size = sizeof(MpegEncContext),
4272     .init           = ff_MPV_encode_init,
4273     .encode2        = ff_MPV_encode_picture,
4274     .close          = ff_MPV_encode_end,
4275     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4276     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4277     .priv_class     = &msmpeg4v2_class,
4278 };
4279
4280 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4281
4282 AVCodec ff_msmpeg4v3_encoder = {
4283     .name           = "msmpeg4",
4284     .type           = AVMEDIA_TYPE_VIDEO,
4285     .id             = AV_CODEC_ID_MSMPEG4V3,
4286     .priv_data_size = sizeof(MpegEncContext),
4287     .init           = ff_MPV_encode_init,
4288     .encode2        = ff_MPV_encode_picture,
4289     .close          = ff_MPV_encode_end,
4290     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4291     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4292     .priv_class     = &msmpeg4v3_class,
4293 };
4294
4295 FF_MPV_GENERIC_CLASS(wmv1)
4296
4297 AVCodec ff_wmv1_encoder = {
4298     .name           = "wmv1",
4299     .type           = AVMEDIA_TYPE_VIDEO,
4300     .id             = AV_CODEC_ID_WMV1,
4301     .priv_data_size = sizeof(MpegEncContext),
4302     .init           = ff_MPV_encode_init,
4303     .encode2        = ff_MPV_encode_picture,
4304     .close          = ff_MPV_encode_end,
4305     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4306     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4307     .priv_class     = &wmv1_class,
4308 };