]> git.sesse.net Git - ffmpeg/blob - libavcodec/mpegvideo_enc.c
Merge commit '3feb3d6ce4be0a09a9f8f13d613bed25b523b6e7'
[ffmpeg] / libavcodec / mpegvideo_enc.c
1 /*
2  * The simplest mpeg encoder (well, it was the simplest!)
3  * Copyright (c) 2000,2001 Fabrice Bellard
4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24
25 /**
26  * @file
27  * The simplest mpeg encoder (well, it was the simplest!).
28  */
29
30 #include "libavutil/internal.h"
31 #include "libavutil/intmath.h"
32 #include "libavutil/mathematics.h"
33 #include "libavutil/pixdesc.h"
34 #include "libavutil/opt.h"
35 #include "avcodec.h"
36 #include "dct.h"
37 #include "dsputil.h"
38 #include "mpeg12.h"
39 #include "mpegvideo.h"
40 #include "h261.h"
41 #include "h263.h"
42 #include "mathops.h"
43 #include "mjpegenc.h"
44 #include "msmpeg4.h"
45 #include "faandct.h"
46 #include "thread.h"
47 #include "aandcttab.h"
48 #include "flv.h"
49 #include "mpeg4video.h"
50 #include "internal.h"
51 #include "bytestream.h"
52 #include <limits.h>
53 #include "sp5x.h"
54
55 static int encode_picture(MpegEncContext *s, int picture_number);
56 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
57 static int sse_mb(MpegEncContext *s);
58 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
59 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
60
61 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_MV * 2 + 1];
62 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
63
64 const AVOption ff_mpv_generic_options[] = {
65     FF_MPV_COMMON_OPTS
66     { NULL },
67 };
68
69 void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64],
70                        uint16_t (*qmat16)[2][64],
71                        const uint16_t *quant_matrix,
72                        int bias, int qmin, int qmax, int intra)
73 {
74     int qscale;
75     int shift = 0;
76
77     for (qscale = qmin; qscale <= qmax; qscale++) {
78         int i;
79         if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
80             dsp->fdct == ff_jpeg_fdct_islow_10 ||
81             dsp->fdct == ff_faandct) {
82             for (i = 0; i < 64; i++) {
83                 const int j = dsp->idct_permutation[i];
84                 /* 16 <= qscale * quant_matrix[i] <= 7905
85                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
86                  *             19952 <=              x  <= 249205026
87                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
88                  *           3444240 >= (1 << 36) / (x) >= 275 */
89
90                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
91                                         (qscale * quant_matrix[j]));
92             }
93         } else if (dsp->fdct == ff_fdct_ifast) {
94             for (i = 0; i < 64; i++) {
95                 const int j = dsp->idct_permutation[i];
96                 /* 16 <= qscale * quant_matrix[i] <= 7905
97                  * Assume x = ff_aanscales[i] * qscale * quant_matrix[i]
98                  *             19952 <=              x  <= 249205026
99                  * (1 << 36) / 19952 >= (1 << 36) / (x) >= (1 << 36) / 249205026
100                  *           3444240 >= (1 << 36) / (x) >= 275 */
101
102                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
103                                         (ff_aanscales[i] * (int64_t)qscale * quant_matrix[j]));
104             }
105         } else {
106             for (i = 0; i < 64; i++) {
107                 const int j = dsp->idct_permutation[i];
108                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
109                  * Assume x = qscale * quant_matrix[i]
110                  * So             16 <=              x  <= 7905
111                  * so (1 << 19) / 16 >= (1 << 19) / (x) >= (1 << 19) / 7905
112                  * so          32768 >= (1 << 19) / (x) >= 67 */
113                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
114                                         (qscale * quant_matrix[j]));
115                 //qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) /
116                 //                    (qscale * quant_matrix[i]);
117                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) /
118                                        (qscale * quant_matrix[j]);
119
120                 if (qmat16[qscale][0][i] == 0 ||
121                     qmat16[qscale][0][i] == 128 * 256)
122                     qmat16[qscale][0][i] = 128 * 256 - 1;
123                 qmat16[qscale][1][i] =
124                     ROUNDED_DIV(bias << (16 - QUANT_BIAS_SHIFT),
125                                 qmat16[qscale][0][i]);
126             }
127         }
128
129         for (i = intra; i < 64; i++) {
130             int64_t max = 8191;
131             if (dsp->fdct == ff_fdct_ifast) {
132                 max = (8191LL * ff_aanscales[i]) >> 14;
133             }
134             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
135                 shift++;
136             }
137         }
138     }
139     if (shift) {
140         av_log(NULL, AV_LOG_INFO,
141                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
142                QMAT_SHIFT - shift);
143     }
144 }
145
146 static inline void update_qscale(MpegEncContext *s)
147 {
148     s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
149                 (FF_LAMBDA_SHIFT + 7);
150     s->qscale = av_clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
151
152     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
153                  FF_LAMBDA_SHIFT;
154 }
155
156 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
157 {
158     int i;
159
160     if (matrix) {
161         put_bits(pb, 1, 1);
162         for (i = 0; i < 64; i++) {
163             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
164         }
165     } else
166         put_bits(pb, 1, 0);
167 }
168
169 /**
170  * init s->current_picture.qscale_table from s->lambda_table
171  */
172 void ff_init_qscale_tab(MpegEncContext *s)
173 {
174     int8_t * const qscale_table = s->current_picture.qscale_table;
175     int i;
176
177     for (i = 0; i < s->mb_num; i++) {
178         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
179         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
180         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
181                                                   s->avctx->qmax);
182     }
183 }
184
185 static void update_duplicate_context_after_me(MpegEncContext *dst,
186                                               MpegEncContext *src)
187 {
188 #define COPY(a) dst->a= src->a
189     COPY(pict_type);
190     COPY(current_picture);
191     COPY(f_code);
192     COPY(b_code);
193     COPY(qscale);
194     COPY(lambda);
195     COPY(lambda2);
196     COPY(picture_in_gop_number);
197     COPY(gop_picture_number);
198     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
199     COPY(progressive_frame);    // FIXME don't set in encode_header
200     COPY(partitioned_frame);    // FIXME don't set in encode_header
201 #undef COPY
202 }
203
204 /**
205  * Set the given MpegEncContext to defaults for encoding.
206  * the changed fields will not depend upon the prior state of the MpegEncContext.
207  */
208 static void MPV_encode_defaults(MpegEncContext *s)
209 {
210     int i;
211     ff_MPV_common_defaults(s);
212
213     for (i = -16; i < 16; i++) {
214         default_fcode_tab[i + MAX_MV] = 1;
215     }
216     s->me.mv_penalty = default_mv_penalty;
217     s->fcode_tab     = default_fcode_tab;
218 }
219
220 av_cold int ff_dct_encode_init(MpegEncContext *s) {
221     if (ARCH_X86)
222         ff_dct_encode_init_x86(s);
223
224     if (!s->dct_quantize)
225         s->dct_quantize = ff_dct_quantize_c;
226     if (!s->denoise_dct)
227         s->denoise_dct  = denoise_dct_c;
228     s->fast_dct_quantize = s->dct_quantize;
229     if (s->avctx->trellis)
230         s->dct_quantize  = dct_quantize_trellis_c;
231
232     return 0;
233 }
234
235 /* init video encoder */
236 av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
237 {
238     MpegEncContext *s = avctx->priv_data;
239     int i;
240     int chroma_h_shift, chroma_v_shift;
241
242     MPV_encode_defaults(s);
243
244     switch (avctx->codec_id) {
245     case AV_CODEC_ID_MPEG2VIDEO:
246         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
247             avctx->pix_fmt != AV_PIX_FMT_YUV422P) {
248             av_log(avctx, AV_LOG_ERROR,
249                    "only YUV420 and YUV422 are supported\n");
250             return -1;
251         }
252         break;
253     case AV_CODEC_ID_LJPEG:
254         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
255             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
256             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
257             avctx->pix_fmt != AV_PIX_FMT_BGR0     &&
258             avctx->pix_fmt != AV_PIX_FMT_BGRA     &&
259             avctx->pix_fmt != AV_PIX_FMT_BGR24    &&
260             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
261               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
262               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
263              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
264             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in LJPEG\n");
265             return -1;
266         }
267         break;
268     case AV_CODEC_ID_MJPEG:
269     case AV_CODEC_ID_AMV:
270         if (avctx->pix_fmt != AV_PIX_FMT_YUVJ420P &&
271             avctx->pix_fmt != AV_PIX_FMT_YUVJ422P &&
272             avctx->pix_fmt != AV_PIX_FMT_YUVJ444P &&
273             ((avctx->pix_fmt != AV_PIX_FMT_YUV420P &&
274               avctx->pix_fmt != AV_PIX_FMT_YUV422P &&
275               avctx->pix_fmt != AV_PIX_FMT_YUV444P) ||
276              avctx->strict_std_compliance > FF_COMPLIANCE_UNOFFICIAL)) {
277             av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
278             return -1;
279         }
280         break;
281     default:
282         if (avctx->pix_fmt != AV_PIX_FMT_YUV420P) {
283             av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
284             return -1;
285         }
286     }
287
288     switch (avctx->pix_fmt) {
289     case AV_PIX_FMT_YUVJ444P:
290     case AV_PIX_FMT_YUV444P:
291         s->chroma_format = CHROMA_444;
292         break;
293     case AV_PIX_FMT_YUVJ422P:
294     case AV_PIX_FMT_YUV422P:
295         s->chroma_format = CHROMA_422;
296         break;
297     case AV_PIX_FMT_YUVJ420P:
298     case AV_PIX_FMT_YUV420P:
299     default:
300         s->chroma_format = CHROMA_420;
301         break;
302     }
303
304     s->bit_rate = avctx->bit_rate;
305     s->width    = avctx->width;
306     s->height   = avctx->height;
307     if (avctx->gop_size > 600 &&
308         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
309         av_log(avctx, AV_LOG_WARNING,
310                "keyframe interval too large!, reducing it from %d to %d\n",
311                avctx->gop_size, 600);
312         avctx->gop_size = 600;
313     }
314     s->gop_size     = avctx->gop_size;
315     s->avctx        = avctx;
316     s->flags        = avctx->flags;
317     s->flags2       = avctx->flags2;
318     s->max_b_frames = avctx->max_b_frames;
319     s->codec_id     = avctx->codec->id;
320     s->strict_std_compliance = avctx->strict_std_compliance;
321     s->quarter_sample     = (avctx->flags & CODEC_FLAG_QPEL) != 0;
322     s->mpeg_quant         = avctx->mpeg_quant;
323     s->rtp_mode           = !!avctx->rtp_payload_size;
324     s->intra_dc_precision = avctx->intra_dc_precision;
325     s->user_specified_pts = AV_NOPTS_VALUE;
326
327     if (s->gop_size <= 1) {
328         s->intra_only = 1;
329         s->gop_size   = 12;
330     } else {
331         s->intra_only = 0;
332     }
333
334     s->me_method = avctx->me_method;
335
336     /* Fixed QSCALE */
337     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
338
339     s->adaptive_quant = (s->avctx->lumi_masking ||
340                          s->avctx->dark_masking ||
341                          s->avctx->temporal_cplx_masking ||
342                          s->avctx->spatial_cplx_masking  ||
343                          s->avctx->p_masking      ||
344                          s->avctx->border_masking ||
345                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
346                         !s->fixed_qscale;
347
348     s->loop_filter      = !!(s->flags & CODEC_FLAG_LOOP_FILTER);
349
350     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
351         switch(avctx->codec_id) {
352         case AV_CODEC_ID_MPEG1VIDEO:
353         case AV_CODEC_ID_MPEG2VIDEO:
354             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112L / 15000000 * 16384;
355             break;
356         case AV_CODEC_ID_MPEG4:
357         case AV_CODEC_ID_MSMPEG4V1:
358         case AV_CODEC_ID_MSMPEG4V2:
359         case AV_CODEC_ID_MSMPEG4V3:
360             if       (avctx->rc_max_rate >= 15000000) {
361                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000L) * (760-320) / (38400000 - 15000000);
362             } else if(avctx->rc_max_rate >=  2000000) {
363                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000L) * (320- 80) / (15000000 -  2000000);
364             } else if(avctx->rc_max_rate >=   384000) {
365                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000L) * ( 80- 40) / ( 2000000 -   384000);
366             } else
367                 avctx->rc_buffer_size = 40;
368             avctx->rc_buffer_size *= 16384;
369             break;
370         }
371         if (avctx->rc_buffer_size) {
372             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
373         }
374     }
375
376     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
377         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
378         if (avctx->rc_max_rate && !avctx->rc_buffer_size)
379             return -1;
380     }
381
382     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
383         av_log(avctx, AV_LOG_INFO,
384                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
385     }
386
387     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
388         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
389         return -1;
390     }
391
392     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
393         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
394         return -1;
395     }
396
397     if (avctx->rc_max_rate &&
398         avctx->rc_max_rate == avctx->bit_rate &&
399         avctx->rc_max_rate != avctx->rc_min_rate) {
400         av_log(avctx, AV_LOG_INFO,
401                "impossible bitrate constraints, this will fail\n");
402     }
403
404     if (avctx->rc_buffer_size &&
405         avctx->bit_rate * (int64_t)avctx->time_base.num >
406             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
407         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
408         return -1;
409     }
410
411     if (!s->fixed_qscale &&
412         avctx->bit_rate * av_q2d(avctx->time_base) >
413             avctx->bit_rate_tolerance) {
414         av_log(avctx, AV_LOG_ERROR,
415                "bitrate tolerance too small for bitrate\n");
416         return -1;
417     }
418
419     if (s->avctx->rc_max_rate &&
420         s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
421         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
422          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
423         90000LL * (avctx->rc_buffer_size - 1) >
424             s->avctx->rc_max_rate * 0xFFFFLL) {
425         av_log(avctx, AV_LOG_INFO,
426                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
427                "specified vbv buffer is too large for the given bitrate!\n");
428     }
429
430     if ((s->flags & CODEC_FLAG_4MV)  && s->codec_id != AV_CODEC_ID_MPEG4 &&
431         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
432         s->codec_id != AV_CODEC_ID_FLV1) {
433         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
434         return -1;
435     }
436
437     if (s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
438         av_log(avctx, AV_LOG_ERROR,
439                "OBMC is only supported with simple mb decision\n");
440         return -1;
441     }
442
443     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
444         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
445         return -1;
446     }
447
448     if (s->max_b_frames                    &&
449         s->codec_id != AV_CODEC_ID_MPEG4      &&
450         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
451         s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
452         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
453         return -1;
454     }
455
456     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
457          s->codec_id == AV_CODEC_ID_H263  ||
458          s->codec_id == AV_CODEC_ID_H263P) &&
459         (avctx->sample_aspect_ratio.num > 255 ||
460          avctx->sample_aspect_ratio.den > 255)) {
461         av_log(avctx, AV_LOG_WARNING,
462                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
463                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
464         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
465                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
466     }
467
468     if ((s->codec_id == AV_CODEC_ID_H263  ||
469          s->codec_id == AV_CODEC_ID_H263P) &&
470         (avctx->width  > 2048 ||
471          avctx->height > 1152 )) {
472         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
473         return -1;
474     }
475     if ((s->codec_id == AV_CODEC_ID_H263  ||
476          s->codec_id == AV_CODEC_ID_H263P) &&
477         ((avctx->width &3) ||
478          (avctx->height&3) )) {
479         av_log(avctx, AV_LOG_ERROR, "w/h must be a multiple of 4\n");
480         return -1;
481     }
482
483     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO &&
484         (avctx->width  > 4095 ||
485          avctx->height > 4095 )) {
486         av_log(avctx, AV_LOG_ERROR, "MPEG-1 does not support resolutions above 4095x4095\n");
487         return -1;
488     }
489
490     if (s->codec_id == AV_CODEC_ID_MPEG2VIDEO &&
491         (avctx->width  > 16383 ||
492          avctx->height > 16383 )) {
493         av_log(avctx, AV_LOG_ERROR, "MPEG-2 does not support resolutions above 16383x16383\n");
494         return -1;
495     }
496
497     if (s->codec_id == AV_CODEC_ID_RV10 &&
498         (avctx->width &15 ||
499          avctx->height&15 )) {
500         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
501         return AVERROR(EINVAL);
502     }
503
504     if (s->codec_id == AV_CODEC_ID_RV20 &&
505         (avctx->width &3 ||
506          avctx->height&3 )) {
507         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
508         return AVERROR(EINVAL);
509     }
510
511     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
512          s->codec_id == AV_CODEC_ID_WMV2) &&
513          avctx->width & 1) {
514          av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
515          return -1;
516     }
517
518     if ((s->flags & (CODEC_FLAG_INTERLACED_DCT | CODEC_FLAG_INTERLACED_ME)) &&
519         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
520         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
521         return -1;
522     }
523
524     // FIXME mpeg2 uses that too
525     if (s->mpeg_quant && (   s->codec_id != AV_CODEC_ID_MPEG4
526                           && s->codec_id != AV_CODEC_ID_MPEG2VIDEO)) {
527         av_log(avctx, AV_LOG_ERROR,
528                "mpeg2 style quantization not supported by codec\n");
529         return -1;
530     }
531
532     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
533         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
534         return -1;
535     }
536
537     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
538         s->avctx->mb_decision != FF_MB_DECISION_RD) {
539         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
540         return -1;
541     }
542
543     if (s->avctx->scenechange_threshold < 1000000000 &&
544         (s->flags & CODEC_FLAG_CLOSED_GOP)) {
545         av_log(avctx, AV_LOG_ERROR,
546                "closed gop with scene change detection are not supported yet, "
547                "set threshold to 1000000000\n");
548         return -1;
549     }
550
551     if (s->flags & CODEC_FLAG_LOW_DELAY) {
552         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
553             av_log(avctx, AV_LOG_ERROR,
554                   "low delay forcing is only available for mpeg2\n");
555             return -1;
556         }
557         if (s->max_b_frames != 0) {
558             av_log(avctx, AV_LOG_ERROR,
559                    "b frames cannot be used with low delay\n");
560             return -1;
561         }
562     }
563
564     if (s->q_scale_type == 1) {
565         if (avctx->qmax > 12) {
566             av_log(avctx, AV_LOG_ERROR,
567                    "non linear quant only supports qmax <= 12 currently\n");
568             return -1;
569         }
570     }
571
572     if (s->avctx->thread_count > 1         &&
573         s->codec_id != AV_CODEC_ID_MPEG4      &&
574         s->codec_id != AV_CODEC_ID_MPEG1VIDEO &&
575         s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
576         s->codec_id != AV_CODEC_ID_MJPEG      &&
577         (s->codec_id != AV_CODEC_ID_H263P)) {
578         av_log(avctx, AV_LOG_ERROR,
579                "multi threaded encoding not supported by codec\n");
580         return -1;
581     }
582
583     if (s->avctx->thread_count < 1) {
584         av_log(avctx, AV_LOG_ERROR,
585                "automatic thread number detection not supported by codec, "
586                "patch welcome\n");
587         return -1;
588     }
589
590     if (s->avctx->slices > 1 || s->avctx->thread_count > 1)
591         s->rtp_mode = 1;
592
593     if (s->avctx->thread_count > 1 && s->codec_id == AV_CODEC_ID_H263P)
594         s->h263_slice_structured = 1;
595
596     if (!avctx->time_base.den || !avctx->time_base.num) {
597         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
598         return -1;
599     }
600
601     i = (INT_MAX / 2 + 128) >> 8;
602     if (avctx->mb_threshold >= i) {
603         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n",
604                i - 1);
605         return -1;
606     }
607
608     if (avctx->b_frame_strategy && (avctx->flags & CODEC_FLAG_PASS2)) {
609         av_log(avctx, AV_LOG_INFO,
610                "notice: b_frame_strategy only affects the first pass\n");
611         avctx->b_frame_strategy = 0;
612     }
613
614     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
615     if (i > 1) {
616         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
617         avctx->time_base.den /= i;
618         avctx->time_base.num /= i;
619         //return -1;
620     }
621
622     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id==AV_CODEC_ID_AMV) {
623         // (a + x * 3 / 8) / x
624         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
625         s->inter_quant_bias = 0;
626     } else {
627         s->intra_quant_bias = 0;
628         // (a - x / 4) / x
629         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
630     }
631
632     if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
633         s->intra_quant_bias = avctx->intra_quant_bias;
634     if (avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
635         s->inter_quant_bias = avctx->inter_quant_bias;
636
637     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
638
639     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
640
641     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
642         s->avctx->time_base.den > (1 << 16) - 1) {
643         av_log(avctx, AV_LOG_ERROR,
644                "timebase %d/%d not supported by MPEG 4 standard, "
645                "the maximum admitted value for the timebase denominator "
646                "is %d\n", s->avctx->time_base.num, s->avctx->time_base.den,
647                (1 << 16) - 1);
648         return -1;
649     }
650     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
651
652     switch (avctx->codec->id) {
653     case AV_CODEC_ID_MPEG1VIDEO:
654         s->out_format = FMT_MPEG1;
655         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
656         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
657         break;
658     case AV_CODEC_ID_MPEG2VIDEO:
659         s->out_format = FMT_MPEG1;
660         s->low_delay  = !!(s->flags & CODEC_FLAG_LOW_DELAY);
661         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
662         s->rtp_mode   = 1;
663         break;
664     case AV_CODEC_ID_LJPEG:
665     case AV_CODEC_ID_MJPEG:
666     case AV_CODEC_ID_AMV:
667         s->out_format = FMT_MJPEG;
668         s->intra_only = 1; /* force intra only for jpeg */
669         if (avctx->codec->id == AV_CODEC_ID_LJPEG &&
670             (avctx->pix_fmt == AV_PIX_FMT_BGR0
671              || s->avctx->pix_fmt == AV_PIX_FMT_BGRA
672              || s->avctx->pix_fmt == AV_PIX_FMT_BGR24)) {
673             s->mjpeg_vsample[0] = s->mjpeg_hsample[0] =
674             s->mjpeg_vsample[1] = s->mjpeg_hsample[1] =
675             s->mjpeg_vsample[2] = s->mjpeg_hsample[2] = 1;
676         } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P || avctx->pix_fmt == AV_PIX_FMT_YUVJ444P) {
677             s->mjpeg_vsample[0] = s->mjpeg_vsample[1] = s->mjpeg_vsample[2] = 2;
678             s->mjpeg_hsample[0] = s->mjpeg_hsample[1] = s->mjpeg_hsample[2] = 1;
679         } else {
680             s->mjpeg_vsample[0] = 2;
681             s->mjpeg_vsample[1] = 2 >> chroma_v_shift;
682             s->mjpeg_vsample[2] = 2 >> chroma_v_shift;
683             s->mjpeg_hsample[0] = 2;
684             s->mjpeg_hsample[1] = 2 >> chroma_h_shift;
685             s->mjpeg_hsample[2] = 2 >> chroma_h_shift;
686         }
687         if (!(CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) ||
688             ff_mjpeg_encode_init(s) < 0)
689             return -1;
690         avctx->delay = 0;
691         s->low_delay = 1;
692         break;
693     case AV_CODEC_ID_H261:
694         if (!CONFIG_H261_ENCODER)
695             return -1;
696         if (ff_h261_get_picture_format(s->width, s->height) < 0) {
697             av_log(avctx, AV_LOG_ERROR,
698                    "The specified picture size of %dx%d is not valid for the "
699                    "H.261 codec.\nValid sizes are 176x144, 352x288\n",
700                     s->width, s->height);
701             return -1;
702         }
703         s->out_format = FMT_H261;
704         avctx->delay  = 0;
705         s->low_delay  = 1;
706         break;
707     case AV_CODEC_ID_H263:
708         if (!CONFIG_H263_ENCODER)
709             return -1;
710         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
711                              s->width, s->height) == 8) {
712             av_log(avctx, AV_LOG_ERROR,
713                    "The specified picture size of %dx%d is not valid for "
714                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
715                    "352x288, 704x576, and 1408x1152. "
716                    "Try H.263+.\n", s->width, s->height);
717             return -1;
718         }
719         s->out_format = FMT_H263;
720         avctx->delay  = 0;
721         s->low_delay  = 1;
722         break;
723     case AV_CODEC_ID_H263P:
724         s->out_format = FMT_H263;
725         s->h263_plus  = 1;
726         /* Fx */
727         s->h263_aic        = (avctx->flags & CODEC_FLAG_AC_PRED) ? 1 : 0;
728         s->modified_quant  = s->h263_aic;
729         s->loop_filter     = (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
730         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
731
732         /* /Fx */
733         /* These are just to be sure */
734         avctx->delay = 0;
735         s->low_delay = 1;
736         break;
737     case AV_CODEC_ID_FLV1:
738         s->out_format      = FMT_H263;
739         s->h263_flv        = 2; /* format = 1; 11-bit codes */
740         s->unrestricted_mv = 1;
741         s->rtp_mode  = 0; /* don't allow GOB */
742         avctx->delay = 0;
743         s->low_delay = 1;
744         break;
745     case AV_CODEC_ID_RV10:
746         s->out_format = FMT_H263;
747         avctx->delay  = 0;
748         s->low_delay  = 1;
749         break;
750     case AV_CODEC_ID_RV20:
751         s->out_format      = FMT_H263;
752         avctx->delay       = 0;
753         s->low_delay       = 1;
754         s->modified_quant  = 1;
755         s->h263_aic        = 1;
756         s->h263_plus       = 1;
757         s->loop_filter     = 1;
758         s->unrestricted_mv = 0;
759         break;
760     case AV_CODEC_ID_MPEG4:
761         s->out_format      = FMT_H263;
762         s->h263_pred       = 1;
763         s->unrestricted_mv = 1;
764         s->low_delay       = s->max_b_frames ? 0 : 1;
765         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
766         break;
767     case AV_CODEC_ID_MSMPEG4V2:
768         s->out_format      = FMT_H263;
769         s->h263_pred       = 1;
770         s->unrestricted_mv = 1;
771         s->msmpeg4_version = 2;
772         avctx->delay       = 0;
773         s->low_delay       = 1;
774         break;
775     case AV_CODEC_ID_MSMPEG4V3:
776         s->out_format        = FMT_H263;
777         s->h263_pred         = 1;
778         s->unrestricted_mv   = 1;
779         s->msmpeg4_version   = 3;
780         s->flipflop_rounding = 1;
781         avctx->delay         = 0;
782         s->low_delay         = 1;
783         break;
784     case AV_CODEC_ID_WMV1:
785         s->out_format        = FMT_H263;
786         s->h263_pred         = 1;
787         s->unrestricted_mv   = 1;
788         s->msmpeg4_version   = 4;
789         s->flipflop_rounding = 1;
790         avctx->delay         = 0;
791         s->low_delay         = 1;
792         break;
793     case AV_CODEC_ID_WMV2:
794         s->out_format        = FMT_H263;
795         s->h263_pred         = 1;
796         s->unrestricted_mv   = 1;
797         s->msmpeg4_version   = 5;
798         s->flipflop_rounding = 1;
799         avctx->delay         = 0;
800         s->low_delay         = 1;
801         break;
802     default:
803         return -1;
804     }
805
806     avctx->has_b_frames = !s->low_delay;
807
808     s->encoding = 1;
809
810     s->progressive_frame    =
811     s->progressive_sequence = !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT |
812                                                 CODEC_FLAG_INTERLACED_ME) ||
813                                 s->alternate_scan);
814
815     /* init */
816     if (ff_MPV_common_init(s) < 0)
817         return -1;
818
819     ff_dct_encode_init(s);
820
821     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
822         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
823
824     s->quant_precision = 5;
825
826     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
827     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
828
829     if (CONFIG_H261_ENCODER && s->out_format == FMT_H261)
830         ff_h261_encode_init(s);
831     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263)
832         ff_h263_encode_init(s);
833     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
834         ff_msmpeg4_encode_init(s);
835     if ((CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
836         && s->out_format == FMT_MPEG1)
837         ff_mpeg1_encode_init(s);
838
839     /* init q matrix */
840     for (i = 0; i < 64; i++) {
841         int j = s->dsp.idct_permutation[i];
842         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
843             s->mpeg_quant) {
844             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
845             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
846         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
847             s->intra_matrix[j] =
848             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
849         } else {
850             /* mpeg1/2 */
851             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
852             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
853         }
854         if (s->avctx->intra_matrix)
855             s->intra_matrix[j] = s->avctx->intra_matrix[i];
856         if (s->avctx->inter_matrix)
857             s->inter_matrix[j] = s->avctx->inter_matrix[i];
858     }
859
860     /* precompute matrix */
861     /* for mjpeg, we do include qscale in the matrix */
862     if (s->out_format != FMT_MJPEG) {
863         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
864                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
865                           31, 1);
866         ff_convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
867                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
868                           31, 0);
869     }
870
871     if (ff_rate_control_init(s) < 0)
872         return -1;
873
874     return 0;
875 }
876
877 av_cold int ff_MPV_encode_end(AVCodecContext *avctx)
878 {
879     MpegEncContext *s = avctx->priv_data;
880
881     ff_rate_control_uninit(s);
882
883     ff_MPV_common_end(s);
884     if ((CONFIG_MJPEG_ENCODER || CONFIG_LJPEG_ENCODER) &&
885         s->out_format == FMT_MJPEG)
886         ff_mjpeg_encode_close(s);
887
888     av_freep(&avctx->extradata);
889
890     return 0;
891 }
892
893 static int get_sae(uint8_t *src, int ref, int stride)
894 {
895     int x,y;
896     int acc = 0;
897
898     for (y = 0; y < 16; y++) {
899         for (x = 0; x < 16; x++) {
900             acc += FFABS(src[x + y * stride] - ref);
901         }
902     }
903
904     return acc;
905 }
906
907 static int get_intra_count(MpegEncContext *s, uint8_t *src,
908                            uint8_t *ref, int stride)
909 {
910     int x, y, w, h;
911     int acc = 0;
912
913     w = s->width  & ~15;
914     h = s->height & ~15;
915
916     for (y = 0; y < h; y += 16) {
917         for (x = 0; x < w; x += 16) {
918             int offset = x + y * stride;
919             int sad  = s->dsp.sad[0](NULL, src + offset, ref + offset, stride,
920                                      16);
921             int mean = (s->dsp.pix_sum(src + offset, stride) + 128) >> 8;
922             int sae  = get_sae(src + offset, mean, stride);
923
924             acc += sae + 500 < sad;
925         }
926     }
927     return acc;
928 }
929
930
931 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
932 {
933     Picture *pic = NULL;
934     int64_t pts;
935     int i, display_picture_number = 0, ret;
936     const int encoding_delay = s->max_b_frames ? s->max_b_frames :
937                                                  (s->low_delay ? 0 : 1);
938     int direct = 1;
939
940     if (pic_arg) {
941         pts = pic_arg->pts;
942         display_picture_number = s->input_picture_number++;
943
944         if (pts != AV_NOPTS_VALUE) {
945             if (s->user_specified_pts != AV_NOPTS_VALUE) {
946                 int64_t last = s->user_specified_pts;
947
948                 if (pts <= last) {
949                     av_log(s->avctx, AV_LOG_ERROR,
950                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
951                            pts, last);
952                     return AVERROR(EINVAL);
953                 }
954
955                 if (!s->low_delay && display_picture_number == 1)
956                     s->dts_delta = pts - last;
957             }
958             s->user_specified_pts = pts;
959         } else {
960             if (s->user_specified_pts != AV_NOPTS_VALUE) {
961                 s->user_specified_pts =
962                 pts = s->user_specified_pts + 1;
963                 av_log(s->avctx, AV_LOG_INFO,
964                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
965                        pts);
966             } else {
967                 pts = display_picture_number;
968             }
969         }
970     }
971
972     if (pic_arg) {
973         if (!pic_arg->buf[0])
974             direct = 0;
975         if (pic_arg->linesize[0] != s->linesize)
976             direct = 0;
977         if (pic_arg->linesize[1] != s->uvlinesize)
978             direct = 0;
979         if (pic_arg->linesize[2] != s->uvlinesize)
980             direct = 0;
981
982         av_dlog(s->avctx, "%d %d %d %d\n", pic_arg->linesize[0],
983                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
984
985         if (direct) {
986             i = ff_find_unused_picture(s, 1);
987             if (i < 0)
988                 return i;
989
990             pic = &s->picture[i];
991             pic->reference = 3;
992
993             if ((ret = av_frame_ref(&pic->f, pic_arg)) < 0)
994                 return ret;
995             if (ff_alloc_picture(s, pic, 1) < 0) {
996                 return -1;
997             }
998         } else {
999             i = ff_find_unused_picture(s, 0);
1000             if (i < 0)
1001                 return i;
1002
1003             pic = &s->picture[i];
1004             pic->reference = 3;
1005
1006             if (ff_alloc_picture(s, pic, 0) < 0) {
1007                 return -1;
1008             }
1009
1010             if (pic->f.data[0] + INPLACE_OFFSET == pic_arg->data[0] &&
1011                 pic->f.data[1] + INPLACE_OFFSET == pic_arg->data[1] &&
1012                 pic->f.data[2] + INPLACE_OFFSET == pic_arg->data[2]) {
1013                 // empty
1014             } else {
1015                 int h_chroma_shift, v_chroma_shift;
1016                 av_pix_fmt_get_chroma_sub_sample(s->avctx->pix_fmt,
1017                                                  &h_chroma_shift,
1018                                                  &v_chroma_shift);
1019
1020                 for (i = 0; i < 3; i++) {
1021                     int src_stride = pic_arg->linesize[i];
1022                     int dst_stride = i ? s->uvlinesize : s->linesize;
1023                     int h_shift = i ? h_chroma_shift : 0;
1024                     int v_shift = i ? v_chroma_shift : 0;
1025                     int w = s->width  >> h_shift;
1026                     int h = s->height >> v_shift;
1027                     uint8_t *src = pic_arg->data[i];
1028                     uint8_t *dst = pic->f.data[i];
1029
1030                     if (s->codec_id == AV_CODEC_ID_AMV && !(s->avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1031                         h = ((s->height + 15)/16*16) >> v_shift;
1032                     }
1033
1034                     if (!s->avctx->rc_buffer_size)
1035                         dst += INPLACE_OFFSET;
1036
1037                     if (src_stride == dst_stride)
1038                         memcpy(dst, src, src_stride * h);
1039                     else {
1040                         int h2 = h;
1041                         uint8_t *dst2 = dst;
1042                         while (h2--) {
1043                             memcpy(dst2, src, w);
1044                             dst2 += dst_stride;
1045                             src += src_stride;
1046                         }
1047                     }
1048                     if ((s->width & 15) || (s->height & 15)) {
1049                         s->dsp.draw_edges(dst, dst_stride,
1050                                           w, h,
1051                                           16>>h_shift,
1052                                           16>>v_shift,
1053                                           EDGE_BOTTOM);
1054                     }
1055                 }
1056             }
1057         }
1058         ret = av_frame_copy_props(&pic->f, pic_arg);
1059         if (ret < 0)
1060             return ret;
1061
1062         pic->f.display_picture_number = display_picture_number;
1063         pic->f.pts = pts; // we set this here to avoid modifiying pic_arg
1064     }
1065
1066     /* shift buffer entries */
1067     for (i = 1; i < MAX_PICTURE_COUNT /*s->encoding_delay + 1*/; i++)
1068         s->input_picture[i - 1] = s->input_picture[i];
1069
1070     s->input_picture[encoding_delay] = (Picture*) pic;
1071
1072     return 0;
1073 }
1074
1075 static int skip_check(MpegEncContext *s, Picture *p, Picture *ref)
1076 {
1077     int x, y, plane;
1078     int score = 0;
1079     int64_t score64 = 0;
1080
1081     for (plane = 0; plane < 3; plane++) {
1082         const int stride = p->f.linesize[plane];
1083         const int bw = plane ? 1 : 2;
1084         for (y = 0; y < s->mb_height * bw; y++) {
1085             for (x = 0; x < s->mb_width * bw; x++) {
1086                 int off = p->shared ? 0 : 16;
1087                 uint8_t *dptr = p->f.data[plane] + 8 * (x + y * stride) + off;
1088                 uint8_t *rptr = ref->f.data[plane] + 8 * (x + y * stride);
1089                 int v   = s->dsp.frame_skip_cmp[1](s, dptr, rptr, stride, 8);
1090
1091                 switch (s->avctx->frame_skip_exp) {
1092                 case 0: score    =  FFMAX(score, v);          break;
1093                 case 1: score   += FFABS(v);                  break;
1094                 case 2: score   += v * v;                     break;
1095                 case 3: score64 += FFABS(v * v * (int64_t)v); break;
1096                 case 4: score64 += v * v * (int64_t)(v * v);  break;
1097                 }
1098             }
1099         }
1100     }
1101
1102     if (score)
1103         score64 = score;
1104
1105     if (score64 < s->avctx->frame_skip_threshold)
1106         return 1;
1107     if (score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda) >> 8))
1108         return 1;
1109     return 0;
1110 }
1111
1112 static int encode_frame(AVCodecContext *c, AVFrame *frame)
1113 {
1114     AVPacket pkt = { 0 };
1115     int ret, got_output;
1116
1117     av_init_packet(&pkt);
1118     ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
1119     if (ret < 0)
1120         return ret;
1121
1122     ret = pkt.size;
1123     av_free_packet(&pkt);
1124     return ret;
1125 }
1126
1127 static int estimate_best_b_count(MpegEncContext *s)
1128 {
1129     AVCodec *codec    = avcodec_find_encoder(s->avctx->codec_id);
1130     AVCodecContext *c = avcodec_alloc_context3(NULL);
1131     AVFrame input[FF_MAX_B_FRAMES + 2];
1132     const int scale = s->avctx->brd_scale;
1133     int i, j, out_size, p_lambda, b_lambda, lambda2;
1134     int64_t best_rd  = INT64_MAX;
1135     int best_b_count = -1;
1136
1137     av_assert0(scale >= 0 && scale <= 3);
1138
1139     //emms_c();
1140     //s->next_picture_ptr->quality;
1141     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1142     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1143     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1144     if (!b_lambda) // FIXME we should do this somewhere else
1145         b_lambda = p_lambda;
1146     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1147                FF_LAMBDA_SHIFT;
1148
1149     c->width        = s->width  >> scale;
1150     c->height       = s->height >> scale;
1151     c->flags        = CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR |
1152                       CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
1153     c->flags       |= s->avctx->flags & CODEC_FLAG_QPEL;
1154     c->mb_decision  = s->avctx->mb_decision;
1155     c->me_cmp       = s->avctx->me_cmp;
1156     c->mb_cmp       = s->avctx->mb_cmp;
1157     c->me_sub_cmp   = s->avctx->me_sub_cmp;
1158     c->pix_fmt      = AV_PIX_FMT_YUV420P;
1159     c->time_base    = s->avctx->time_base;
1160     c->max_b_frames = s->max_b_frames;
1161
1162     if (avcodec_open2(c, codec, NULL) < 0)
1163         return -1;
1164
1165     for (i = 0; i < s->max_b_frames + 2; i++) {
1166         int ysize = c->width * c->height;
1167         int csize = (c->width / 2) * (c->height / 2);
1168         Picture pre_input, *pre_input_ptr = i ? s->input_picture[i - 1] :
1169                                                 s->next_picture_ptr;
1170
1171         avcodec_get_frame_defaults(&input[i]);
1172         input[i].data[0]     = av_malloc(ysize + 2 * csize);
1173         input[i].data[1]     = input[i].data[0] + ysize;
1174         input[i].data[2]     = input[i].data[1] + csize;
1175         input[i].linesize[0] = c->width;
1176         input[i].linesize[1] =
1177         input[i].linesize[2] = c->width / 2;
1178
1179         if (pre_input_ptr && (!i || s->input_picture[i - 1])) {
1180             pre_input = *pre_input_ptr;
1181
1182             if (!pre_input.shared && i) {
1183                 pre_input.f.data[0] += INPLACE_OFFSET;
1184                 pre_input.f.data[1] += INPLACE_OFFSET;
1185                 pre_input.f.data[2] += INPLACE_OFFSET;
1186             }
1187
1188             s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0],
1189                                  pre_input.f.data[0], pre_input.f.linesize[0],
1190                                  c->width,      c->height);
1191             s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1],
1192                                  pre_input.f.data[1], pre_input.f.linesize[1],
1193                                  c->width >> 1, c->height >> 1);
1194             s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2],
1195                                  pre_input.f.data[2], pre_input.f.linesize[2],
1196                                  c->width >> 1, c->height >> 1);
1197         }
1198     }
1199
1200     for (j = 0; j < s->max_b_frames + 1; j++) {
1201         int64_t rd = 0;
1202
1203         if (!s->input_picture[j])
1204             break;
1205
1206         c->error[0] = c->error[1] = c->error[2] = 0;
1207
1208         input[0].pict_type = AV_PICTURE_TYPE_I;
1209         input[0].quality   = 1 * FF_QP2LAMBDA;
1210
1211         out_size = encode_frame(c, &input[0]);
1212
1213         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1214
1215         for (i = 0; i < s->max_b_frames + 1; i++) {
1216             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1217
1218             input[i + 1].pict_type = is_p ?
1219                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1220             input[i + 1].quality   = is_p ? p_lambda : b_lambda;
1221
1222             out_size = encode_frame(c, &input[i + 1]);
1223
1224             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1225         }
1226
1227         /* get the delayed frames */
1228         while (out_size) {
1229             out_size = encode_frame(c, NULL);
1230             rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
1231         }
1232
1233         rd += c->error[0] + c->error[1] + c->error[2];
1234
1235         if (rd < best_rd) {
1236             best_rd = rd;
1237             best_b_count = j;
1238         }
1239     }
1240
1241     avcodec_close(c);
1242     av_freep(&c);
1243
1244     for (i = 0; i < s->max_b_frames + 2; i++) {
1245         av_freep(&input[i].data[0]);
1246     }
1247
1248     return best_b_count;
1249 }
1250
1251 static int select_input_picture(MpegEncContext *s)
1252 {
1253     int i, ret;
1254
1255     for (i = 1; i < MAX_PICTURE_COUNT; i++)
1256         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1257     s->reordered_input_picture[MAX_PICTURE_COUNT - 1] = NULL;
1258
1259     /* set next picture type & ordering */
1260     if (s->reordered_input_picture[0] == NULL && s->input_picture[0]) {
1261         if (/*s->picture_in_gop_number >= s->gop_size ||*/
1262             s->next_picture_ptr == NULL || s->intra_only) {
1263             s->reordered_input_picture[0] = s->input_picture[0];
1264             s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_I;
1265             s->reordered_input_picture[0]->f.coded_picture_number =
1266                 s->coded_picture_number++;
1267         } else {
1268             int b_frames;
1269
1270             if (s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor) {
1271                 if (s->picture_in_gop_number < s->gop_size &&
1272                     skip_check(s, s->input_picture[0], s->next_picture_ptr)) {
1273                     // FIXME check that te gop check above is +-1 correct
1274                     av_frame_unref(&s->input_picture[0]->f);
1275
1276                     emms_c();
1277                     ff_vbv_update(s, 0);
1278
1279                     goto no_output_pic;
1280                 }
1281             }
1282
1283             if (s->flags & CODEC_FLAG_PASS2) {
1284                 for (i = 0; i < s->max_b_frames + 1; i++) {
1285                     int pict_num = s->input_picture[0]->f.display_picture_number + i;
1286
1287                     if (pict_num >= s->rc_context.num_entries)
1288                         break;
1289                     if (!s->input_picture[i]) {
1290                         s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1291                         break;
1292                     }
1293
1294                     s->input_picture[i]->f.pict_type =
1295                         s->rc_context.entry[pict_num].new_pict_type;
1296                 }
1297             }
1298
1299             if (s->avctx->b_frame_strategy == 0) {
1300                 b_frames = s->max_b_frames;
1301                 while (b_frames && !s->input_picture[b_frames])
1302                     b_frames--;
1303             } else if (s->avctx->b_frame_strategy == 1) {
1304                 for (i = 1; i < s->max_b_frames + 1; i++) {
1305                     if (s->input_picture[i] &&
1306                         s->input_picture[i]->b_frame_score == 0) {
1307                         s->input_picture[i]->b_frame_score =
1308                             get_intra_count(s,
1309                                             s->input_picture[i    ]->f.data[0],
1310                                             s->input_picture[i - 1]->f.data[0],
1311                                             s->linesize) + 1;
1312                     }
1313                 }
1314                 for (i = 0; i < s->max_b_frames + 1; i++) {
1315                     if (s->input_picture[i] == NULL ||
1316                         s->input_picture[i]->b_frame_score - 1 >
1317                             s->mb_num / s->avctx->b_sensitivity)
1318                         break;
1319                 }
1320
1321                 b_frames = FFMAX(0, i - 1);
1322
1323                 /* reset scores */
1324                 for (i = 0; i < b_frames + 1; i++) {
1325                     s->input_picture[i]->b_frame_score = 0;
1326                 }
1327             } else if (s->avctx->b_frame_strategy == 2) {
1328                 b_frames = estimate_best_b_count(s);
1329             } else {
1330                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
1331                 b_frames = 0;
1332             }
1333
1334             emms_c();
1335
1336             for (i = b_frames - 1; i >= 0; i--) {
1337                 int type = s->input_picture[i]->f.pict_type;
1338                 if (type && type != AV_PICTURE_TYPE_B)
1339                     b_frames = i;
1340             }
1341             if (s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_B &&
1342                 b_frames == s->max_b_frames) {
1343                 av_log(s->avctx, AV_LOG_ERROR,
1344                        "warning, too many b frames in a row\n");
1345             }
1346
1347             if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1348                 if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1349                     s->gop_size > s->picture_in_gop_number) {
1350                     b_frames = s->gop_size - s->picture_in_gop_number - 1;
1351                 } else {
1352                     if (s->flags & CODEC_FLAG_CLOSED_GOP)
1353                         b_frames = 0;
1354                     s->input_picture[b_frames]->f.pict_type = AV_PICTURE_TYPE_I;
1355                 }
1356             }
1357
1358             if ((s->flags & CODEC_FLAG_CLOSED_GOP) && b_frames &&
1359                 s->input_picture[b_frames]->f.pict_type == AV_PICTURE_TYPE_I)
1360                 b_frames--;
1361
1362             s->reordered_input_picture[0] = s->input_picture[b_frames];
1363             if (s->reordered_input_picture[0]->f.pict_type != AV_PICTURE_TYPE_I)
1364                 s->reordered_input_picture[0]->f.pict_type = AV_PICTURE_TYPE_P;
1365             s->reordered_input_picture[0]->f.coded_picture_number =
1366                 s->coded_picture_number++;
1367             for (i = 0; i < b_frames; i++) {
1368                 s->reordered_input_picture[i + 1] = s->input_picture[i];
1369                 s->reordered_input_picture[i + 1]->f.pict_type =
1370                     AV_PICTURE_TYPE_B;
1371                 s->reordered_input_picture[i + 1]->f.coded_picture_number =
1372                     s->coded_picture_number++;
1373             }
1374         }
1375     }
1376 no_output_pic:
1377     if (s->reordered_input_picture[0]) {
1378         s->reordered_input_picture[0]->reference =
1379            s->reordered_input_picture[0]->f.pict_type !=
1380                AV_PICTURE_TYPE_B ? 3 : 0;
1381
1382         ff_mpeg_unref_picture(s, &s->new_picture);
1383         if ((ret = ff_mpeg_ref_picture(s, &s->new_picture, s->reordered_input_picture[0])))
1384             return ret;
1385
1386         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1387             // input is a shared pix, so we can't modifiy it -> alloc a new
1388             // one & ensure that the shared one is reuseable
1389
1390             Picture *pic;
1391             int i = ff_find_unused_picture(s, 0);
1392             if (i < 0)
1393                 return i;
1394             pic = &s->picture[i];
1395
1396             pic->reference = s->reordered_input_picture[0]->reference;
1397             if (ff_alloc_picture(s, pic, 0) < 0) {
1398                 return -1;
1399             }
1400
1401             ret = av_frame_copy_props(&pic->f, &s->reordered_input_picture[0]->f);
1402             if (ret < 0)
1403                 return ret;
1404
1405             /* mark us unused / free shared pic */
1406             av_frame_unref(&s->reordered_input_picture[0]->f);
1407             s->reordered_input_picture[0]->shared = 0;
1408
1409             s->current_picture_ptr = pic;
1410         } else {
1411             // input is not a shared pix -> reuse buffer for current_pix
1412             s->current_picture_ptr = s->reordered_input_picture[0];
1413             for (i = 0; i < 4; i++) {
1414                 s->new_picture.f.data[i] += INPLACE_OFFSET;
1415             }
1416         }
1417         ff_mpeg_unref_picture(s, &s->current_picture);
1418         if ((ret = ff_mpeg_ref_picture(s, &s->current_picture,
1419                                        s->current_picture_ptr)) < 0)
1420             return ret;
1421
1422         s->picture_number = s->new_picture.f.display_picture_number;
1423     } else {
1424         ff_mpeg_unref_picture(s, &s->new_picture);
1425     }
1426     return 0;
1427 }
1428
1429 int ff_MPV_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1430                           AVFrame *pic_arg, int *got_packet)
1431 {
1432     MpegEncContext *s = avctx->priv_data;
1433     int i, stuffing_count, ret;
1434     int context_count = s->slice_context_count;
1435
1436     s->picture_in_gop_number++;
1437
1438     if (load_input_picture(s, pic_arg) < 0)
1439         return -1;
1440
1441     if (select_input_picture(s) < 0) {
1442         return -1;
1443     }
1444
1445     /* output? */
1446     if (s->new_picture.f.data[0]) {
1447         if ((ret = ff_alloc_packet2(avctx, pkt, s->mb_width*s->mb_height*(MAX_MB_BYTES+100)+10000)) < 0)
1448             return ret;
1449         if (s->mb_info) {
1450             s->mb_info_ptr = av_packet_new_side_data(pkt,
1451                                  AV_PKT_DATA_H263_MB_INFO,
1452                                  s->mb_width*s->mb_height*12);
1453             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1454         }
1455
1456         for (i = 0; i < context_count; i++) {
1457             int start_y = s->thread_context[i]->start_mb_y;
1458             int   end_y = s->thread_context[i]->  end_mb_y;
1459             int h       = s->mb_height;
1460             uint8_t *start = pkt->data + (size_t)(((int64_t) pkt->size) * start_y / h);
1461             uint8_t *end   = pkt->data + (size_t)(((int64_t) pkt->size) *   end_y / h);
1462
1463             init_put_bits(&s->thread_context[i]->pb, start, end - start);
1464         }
1465
1466         s->pict_type = s->new_picture.f.pict_type;
1467         //emms_c();
1468         if (ff_MPV_frame_start(s, avctx) < 0)
1469             return -1;
1470 vbv_retry:
1471         if (encode_picture(s, s->picture_number) < 0)
1472             return -1;
1473
1474         avctx->header_bits = s->header_bits;
1475         avctx->mv_bits     = s->mv_bits;
1476         avctx->misc_bits   = s->misc_bits;
1477         avctx->i_tex_bits  = s->i_tex_bits;
1478         avctx->p_tex_bits  = s->p_tex_bits;
1479         avctx->i_count     = s->i_count;
1480         // FIXME f/b_count in avctx
1481         avctx->p_count     = s->mb_num - s->i_count - s->skip_count;
1482         avctx->skip_count  = s->skip_count;
1483
1484         ff_MPV_frame_end(s);
1485
1486         if (CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG)
1487             ff_mjpeg_encode_picture_trailer(s);
1488
1489         if (avctx->rc_buffer_size) {
1490             RateControlContext *rcc = &s->rc_context;
1491             int max_size = rcc->buffer_index * avctx->rc_max_available_vbv_use;
1492
1493             if (put_bits_count(&s->pb) > max_size &&
1494                 s->lambda < s->avctx->lmax) {
1495                 s->next_lambda = FFMAX(s->lambda + 1, s->lambda *
1496                                        (s->qscale + 1) / s->qscale);
1497                 if (s->adaptive_quant) {
1498                     int i;
1499                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1500                         s->lambda_table[i] =
1501                             FFMAX(s->lambda_table[i] + 1,
1502                                   s->lambda_table[i] * (s->qscale + 1) /
1503                                   s->qscale);
1504                 }
1505                 s->mb_skipped = 0;        // done in MPV_frame_start()
1506                 // done in encode_picture() so we must undo it
1507                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1508                     if (s->flipflop_rounding          ||
1509                         s->codec_id == AV_CODEC_ID_H263P ||
1510                         s->codec_id == AV_CODEC_ID_MPEG4)
1511                         s->no_rounding ^= 1;
1512                 }
1513                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1514                     s->time_base       = s->last_time_base;
1515                     s->last_non_b_time = s->time - s->pp_time;
1516                 }
1517                 for (i = 0; i < context_count; i++) {
1518                     PutBitContext *pb = &s->thread_context[i]->pb;
1519                     init_put_bits(pb, pb->buf, pb->buf_end - pb->buf);
1520                 }
1521                 goto vbv_retry;
1522             }
1523
1524             assert(s->avctx->rc_max_rate);
1525         }
1526
1527         if (s->flags & CODEC_FLAG_PASS1)
1528             ff_write_pass1_stats(s);
1529
1530         for (i = 0; i < 4; i++) {
1531             s->current_picture_ptr->f.error[i] = s->current_picture.f.error[i];
1532             avctx->error[i] += s->current_picture_ptr->f.error[i];
1533         }
1534
1535         if (s->flags & CODEC_FLAG_PASS1)
1536             assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits +
1537                    avctx->i_tex_bits + avctx->p_tex_bits ==
1538                        put_bits_count(&s->pb));
1539         flush_put_bits(&s->pb);
1540         s->frame_bits  = put_bits_count(&s->pb);
1541
1542         stuffing_count = ff_vbv_update(s, s->frame_bits);
1543         s->stuffing_bits = 8*stuffing_count;
1544         if (stuffing_count) {
1545             if (s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb) >> 3) <
1546                     stuffing_count + 50) {
1547                 av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
1548                 return -1;
1549             }
1550
1551             switch (s->codec_id) {
1552             case AV_CODEC_ID_MPEG1VIDEO:
1553             case AV_CODEC_ID_MPEG2VIDEO:
1554                 while (stuffing_count--) {
1555                     put_bits(&s->pb, 8, 0);
1556                 }
1557             break;
1558             case AV_CODEC_ID_MPEG4:
1559                 put_bits(&s->pb, 16, 0);
1560                 put_bits(&s->pb, 16, 0x1C3);
1561                 stuffing_count -= 4;
1562                 while (stuffing_count--) {
1563                     put_bits(&s->pb, 8, 0xFF);
1564                 }
1565             break;
1566             default:
1567                 av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1568             }
1569             flush_put_bits(&s->pb);
1570             s->frame_bits  = put_bits_count(&s->pb);
1571         }
1572
1573         /* update mpeg1/2 vbv_delay for CBR */
1574         if (s->avctx->rc_max_rate                          &&
1575             s->avctx->rc_min_rate == s->avctx->rc_max_rate &&
1576             s->out_format == FMT_MPEG1                     &&
1577             90000LL * (avctx->rc_buffer_size - 1) <=
1578                 s->avctx->rc_max_rate * 0xFFFFLL) {
1579             int vbv_delay, min_delay;
1580             double inbits  = s->avctx->rc_max_rate *
1581                              av_q2d(s->avctx->time_base);
1582             int    minbits = s->frame_bits - 8 *
1583                              (s->vbv_delay_ptr - s->pb.buf - 1);
1584             double bits    = s->rc_context.buffer_index + minbits - inbits;
1585
1586             if (bits < 0)
1587                 av_log(s->avctx, AV_LOG_ERROR,
1588                        "Internal error, negative bits\n");
1589
1590             assert(s->repeat_first_field == 0);
1591
1592             vbv_delay = bits * 90000 / s->avctx->rc_max_rate;
1593             min_delay = (minbits * 90000LL + s->avctx->rc_max_rate - 1) /
1594                         s->avctx->rc_max_rate;
1595
1596             vbv_delay = FFMAX(vbv_delay, min_delay);
1597
1598             av_assert0(vbv_delay < 0xFFFF);
1599
1600             s->vbv_delay_ptr[0] &= 0xF8;
1601             s->vbv_delay_ptr[0] |= vbv_delay >> 13;
1602             s->vbv_delay_ptr[1]  = vbv_delay >> 5;
1603             s->vbv_delay_ptr[2] &= 0x07;
1604             s->vbv_delay_ptr[2] |= vbv_delay << 3;
1605             avctx->vbv_delay     = vbv_delay * 300;
1606         }
1607         s->total_bits     += s->frame_bits;
1608         avctx->frame_bits  = s->frame_bits;
1609
1610         pkt->pts = s->current_picture.f.pts;
1611         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
1612             if (!s->current_picture.f.coded_picture_number)
1613                 pkt->dts = pkt->pts - s->dts_delta;
1614             else
1615                 pkt->dts = s->reordered_pts;
1616             s->reordered_pts = pkt->pts;
1617         } else
1618             pkt->dts = pkt->pts;
1619         if (s->current_picture.f.key_frame)
1620             pkt->flags |= AV_PKT_FLAG_KEY;
1621         if (s->mb_info)
1622             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
1623     } else {
1624         s->frame_bits = 0;
1625     }
1626
1627     /* release non-reference frames */
1628     for (i = 0; i < MAX_PICTURE_COUNT; i++) {
1629         if (!s->picture[i].reference)
1630             ff_mpeg_unref_picture(s, &s->picture[i]);
1631     }
1632
1633     assert((s->frame_bits & 7) == 0);
1634
1635     pkt->size = s->frame_bits / 8;
1636     *got_packet = !!pkt->size;
1637     return 0;
1638 }
1639
1640 static inline void dct_single_coeff_elimination(MpegEncContext *s,
1641                                                 int n, int threshold)
1642 {
1643     static const char tab[64] = {
1644         3, 2, 2, 1, 1, 1, 1, 1,
1645         1, 1, 1, 1, 1, 1, 1, 1,
1646         1, 1, 1, 1, 1, 1, 1, 1,
1647         0, 0, 0, 0, 0, 0, 0, 0,
1648         0, 0, 0, 0, 0, 0, 0, 0,
1649         0, 0, 0, 0, 0, 0, 0, 0,
1650         0, 0, 0, 0, 0, 0, 0, 0,
1651         0, 0, 0, 0, 0, 0, 0, 0
1652     };
1653     int score = 0;
1654     int run = 0;
1655     int i;
1656     int16_t *block = s->block[n];
1657     const int last_index = s->block_last_index[n];
1658     int skip_dc;
1659
1660     if (threshold < 0) {
1661         skip_dc = 0;
1662         threshold = -threshold;
1663     } else
1664         skip_dc = 1;
1665
1666     /* Are all we could set to zero already zero? */
1667     if (last_index <= skip_dc - 1)
1668         return;
1669
1670     for (i = 0; i <= last_index; i++) {
1671         const int j = s->intra_scantable.permutated[i];
1672         const int level = FFABS(block[j]);
1673         if (level == 1) {
1674             if (skip_dc && i == 0)
1675                 continue;
1676             score += tab[run];
1677             run = 0;
1678         } else if (level > 1) {
1679             return;
1680         } else {
1681             run++;
1682         }
1683     }
1684     if (score >= threshold)
1685         return;
1686     for (i = skip_dc; i <= last_index; i++) {
1687         const int j = s->intra_scantable.permutated[i];
1688         block[j] = 0;
1689     }
1690     if (block[0])
1691         s->block_last_index[n] = 0;
1692     else
1693         s->block_last_index[n] = -1;
1694 }
1695
1696 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
1697                                int last_index)
1698 {
1699     int i;
1700     const int maxlevel = s->max_qcoeff;
1701     const int minlevel = s->min_qcoeff;
1702     int overflow = 0;
1703
1704     if (s->mb_intra) {
1705         i = 1; // skip clipping of intra dc
1706     } else
1707         i = 0;
1708
1709     for (; i <= last_index; i++) {
1710         const int j = s->intra_scantable.permutated[i];
1711         int level = block[j];
1712
1713         if (level > maxlevel) {
1714             level = maxlevel;
1715             overflow++;
1716         } else if (level < minlevel) {
1717             level = minlevel;
1718             overflow++;
1719         }
1720
1721         block[j] = level;
1722     }
1723
1724     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
1725         av_log(s->avctx, AV_LOG_INFO,
1726                "warning, clipping %d dct coefficients to %d..%d\n",
1727                overflow, minlevel, maxlevel);
1728 }
1729
1730 static void get_visual_weight(int16_t *weight, uint8_t *ptr, int stride)
1731 {
1732     int x, y;
1733     // FIXME optimize
1734     for (y = 0; y < 8; y++) {
1735         for (x = 0; x < 8; x++) {
1736             int x2, y2;
1737             int sum = 0;
1738             int sqr = 0;
1739             int count = 0;
1740
1741             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
1742                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
1743                     int v = ptr[x2 + y2 * stride];
1744                     sum += v;
1745                     sqr += v * v;
1746                     count++;
1747                 }
1748             }
1749             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
1750         }
1751     }
1752 }
1753
1754 static av_always_inline void encode_mb_internal(MpegEncContext *s,
1755                                                 int motion_x, int motion_y,
1756                                                 int mb_block_height,
1757                                                 int mb_block_width,
1758                                                 int mb_block_count)
1759 {
1760     int16_t weight[12][64];
1761     int16_t orig[12][64];
1762     const int mb_x = s->mb_x;
1763     const int mb_y = s->mb_y;
1764     int i;
1765     int skip_dct[12];
1766     int dct_offset = s->linesize * 8; // default for progressive frames
1767     int uv_dct_offset = s->uvlinesize * 8;
1768     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
1769     int wrap_y, wrap_c;
1770
1771     for (i = 0; i < mb_block_count; i++)
1772         skip_dct[i] = s->skipdct;
1773
1774     if (s->adaptive_quant) {
1775         const int last_qp = s->qscale;
1776         const int mb_xy = mb_x + mb_y * s->mb_stride;
1777
1778         s->lambda = s->lambda_table[mb_xy];
1779         update_qscale(s);
1780
1781         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
1782             s->qscale = s->current_picture_ptr->qscale_table[mb_xy];
1783             s->dquant = s->qscale - last_qp;
1784
1785             if (s->out_format == FMT_H263) {
1786                 s->dquant = av_clip(s->dquant, -2, 2);
1787
1788                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
1789                     if (!s->mb_intra) {
1790                         if (s->pict_type == AV_PICTURE_TYPE_B) {
1791                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
1792                                 s->dquant = 0;
1793                         }
1794                         if (s->mv_type == MV_TYPE_8X8)
1795                             s->dquant = 0;
1796                     }
1797                 }
1798             }
1799         }
1800         ff_set_qscale(s, last_qp + s->dquant);
1801     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
1802         ff_set_qscale(s, s->qscale + s->dquant);
1803
1804     wrap_y = s->linesize;
1805     wrap_c = s->uvlinesize;
1806     ptr_y  = s->new_picture.f.data[0] +
1807              (mb_y * 16 * wrap_y)              + mb_x * 16;
1808     ptr_cb = s->new_picture.f.data[1] +
1809              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1810     ptr_cr = s->new_picture.f.data[2] +
1811              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
1812
1813     if((mb_x*16+16 > s->width || mb_y*16+16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
1814         uint8_t *ebuf = s->edge_emu_buffer + 32;
1815         int cw = (s->width  + s->chroma_x_shift) >> s->chroma_x_shift;
1816         int ch = (s->height + s->chroma_y_shift) >> s->chroma_y_shift;
1817         s->vdsp.emulated_edge_mc(ebuf, ptr_y, wrap_y, 16, 16, mb_x * 16,
1818                                  mb_y * 16, s->width, s->height);
1819         ptr_y = ebuf;
1820         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y, ptr_cb, wrap_c, mb_block_width,
1821                                  mb_block_height, mb_x * mb_block_width, mb_y * mb_block_height,
1822                                  cw, ch);
1823         ptr_cb = ebuf + 18 * wrap_y;
1824         s->vdsp.emulated_edge_mc(ebuf + 18 * wrap_y + 16, ptr_cr, wrap_c, mb_block_width,
1825                                  mb_block_height, mb_x * mb_block_width, mb_y * mb_block_height,
1826                                  cw, ch);
1827         ptr_cr = ebuf + 18 * wrap_y + 16;
1828     }
1829
1830     if (s->mb_intra) {
1831         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1832             int progressive_score, interlaced_score;
1833
1834             s->interlaced_dct = 0;
1835             progressive_score = s->dsp.ildct_cmp[4](s, ptr_y,
1836                                                     NULL, wrap_y, 8) +
1837                                 s->dsp.ildct_cmp[4](s, ptr_y + wrap_y * 8,
1838                                                     NULL, wrap_y, 8) - 400;
1839
1840             if (progressive_score > 0) {
1841                 interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y,
1842                                                        NULL, wrap_y * 2, 8) +
1843                                    s->dsp.ildct_cmp[4](s, ptr_y + wrap_y,
1844                                                        NULL, wrap_y * 2, 8);
1845                 if (progressive_score > interlaced_score) {
1846                     s->interlaced_dct = 1;
1847
1848                     dct_offset = wrap_y;
1849                     uv_dct_offset = wrap_c;
1850                     wrap_y <<= 1;
1851                     if (s->chroma_format == CHROMA_422 ||
1852                         s->chroma_format == CHROMA_444)
1853                         wrap_c <<= 1;
1854                 }
1855             }
1856         }
1857
1858         s->dsp.get_pixels(s->block[0], ptr_y                  , wrap_y);
1859         s->dsp.get_pixels(s->block[1], ptr_y              + 8 , wrap_y);
1860         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset     , wrap_y);
1861         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y);
1862
1863         if (s->flags & CODEC_FLAG_GRAY) {
1864             skip_dct[4] = 1;
1865             skip_dct[5] = 1;
1866         } else {
1867             s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
1868             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
1869             if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */
1870                 s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
1871                 s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
1872             } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */
1873                 s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c);
1874                 s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c);
1875                 s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c);
1876                 s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c);
1877                 s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
1878                 s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
1879             }
1880         }
1881     } else {
1882         op_pixels_func (*op_pix)[4];
1883         qpel_mc_func (*op_qpix)[16];
1884         uint8_t *dest_y, *dest_cb, *dest_cr;
1885
1886         dest_y  = s->dest[0];
1887         dest_cb = s->dest[1];
1888         dest_cr = s->dest[2];
1889
1890         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
1891             op_pix  = s->hdsp.put_pixels_tab;
1892             op_qpix = s->dsp.put_qpel_pixels_tab;
1893         } else {
1894             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
1895             op_qpix = s->dsp.put_no_rnd_qpel_pixels_tab;
1896         }
1897
1898         if (s->mv_dir & MV_DIR_FORWARD) {
1899             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 0,
1900                           s->last_picture.f.data,
1901                           op_pix, op_qpix);
1902             op_pix  = s->hdsp.avg_pixels_tab;
1903             op_qpix = s->dsp.avg_qpel_pixels_tab;
1904         }
1905         if (s->mv_dir & MV_DIR_BACKWARD) {
1906             ff_MPV_motion(s, dest_y, dest_cb, dest_cr, 1,
1907                           s->next_picture.f.data,
1908                           op_pix, op_qpix);
1909         }
1910
1911         if (s->flags & CODEC_FLAG_INTERLACED_DCT) {
1912             int progressive_score, interlaced_score;
1913
1914             s->interlaced_dct = 0;
1915             progressive_score = s->dsp.ildct_cmp[0](s, dest_y,
1916                                                     ptr_y,              wrap_y,
1917                                                     8) +
1918                                 s->dsp.ildct_cmp[0](s, dest_y + wrap_y * 8,
1919                                                     ptr_y + wrap_y * 8, wrap_y,
1920                                                     8) - 400;
1921
1922             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
1923                 progressive_score -= 400;
1924
1925             if (progressive_score > 0) {
1926                 interlaced_score = s->dsp.ildct_cmp[0](s, dest_y,
1927                                                        ptr_y,
1928                                                        wrap_y * 2, 8) +
1929                                    s->dsp.ildct_cmp[0](s, dest_y + wrap_y,
1930                                                        ptr_y + wrap_y,
1931                                                        wrap_y * 2, 8);
1932
1933                 if (progressive_score > interlaced_score) {
1934                     s->interlaced_dct = 1;
1935
1936                     dct_offset = wrap_y;
1937                     uv_dct_offset = wrap_c;
1938                     wrap_y <<= 1;
1939                     if (s->chroma_format == CHROMA_422)
1940                         wrap_c <<= 1;
1941                 }
1942             }
1943         }
1944
1945         s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
1946         s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
1947         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset,
1948                            dest_y + dct_offset, wrap_y);
1949         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
1950                            dest_y + dct_offset + 8, wrap_y);
1951
1952         if (s->flags & CODEC_FLAG_GRAY) {
1953             skip_dct[4] = 1;
1954             skip_dct[5] = 1;
1955         } else {
1956             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
1957             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
1958             if (!s->chroma_y_shift) { /* 422 */
1959                 s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
1960                                    dest_cb + uv_dct_offset, wrap_c);
1961                 s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
1962                                    dest_cr + uv_dct_offset, wrap_c);
1963             }
1964         }
1965         /* pre quantization */
1966         if (s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] <
1967                 2 * s->qscale * s->qscale) {
1968             // FIXME optimize
1969             if (s->dsp.sad[1](NULL, ptr_y , dest_y,
1970                               wrap_y, 8) < 20 * s->qscale)
1971                 skip_dct[0] = 1;
1972             if (s->dsp.sad[1](NULL, ptr_y + 8,
1973                               dest_y + 8, wrap_y, 8) < 20 * s->qscale)
1974                 skip_dct[1] = 1;
1975             if (s->dsp.sad[1](NULL, ptr_y + dct_offset,
1976                               dest_y + dct_offset, wrap_y, 8) < 20 * s->qscale)
1977                 skip_dct[2] = 1;
1978             if (s->dsp.sad[1](NULL, ptr_y + dct_offset + 8,
1979                               dest_y + dct_offset + 8,
1980                               wrap_y, 8) < 20 * s->qscale)
1981                 skip_dct[3] = 1;
1982             if (s->dsp.sad[1](NULL, ptr_cb, dest_cb,
1983                               wrap_c, 8) < 20 * s->qscale)
1984                 skip_dct[4] = 1;
1985             if (s->dsp.sad[1](NULL, ptr_cr, dest_cr,
1986                               wrap_c, 8) < 20 * s->qscale)
1987                 skip_dct[5] = 1;
1988             if (!s->chroma_y_shift) { /* 422 */
1989                 if (s->dsp.sad[1](NULL, ptr_cb + uv_dct_offset,
1990                                   dest_cb + uv_dct_offset,
1991                                   wrap_c, 8) < 20 * s->qscale)
1992                     skip_dct[6] = 1;
1993                 if (s->dsp.sad[1](NULL, ptr_cr + uv_dct_offset,
1994                                   dest_cr + uv_dct_offset,
1995                                   wrap_c, 8) < 20 * s->qscale)
1996                     skip_dct[7] = 1;
1997             }
1998         }
1999     }
2000
2001     if (s->quantizer_noise_shaping) {
2002         if (!skip_dct[0])
2003             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2004         if (!skip_dct[1])
2005             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2006         if (!skip_dct[2])
2007             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2008         if (!skip_dct[3])
2009             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2010         if (!skip_dct[4])
2011             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2012         if (!skip_dct[5])
2013             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2014         if (!s->chroma_y_shift) { /* 422 */
2015             if (!skip_dct[6])
2016                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2017                                   wrap_c);
2018             if (!skip_dct[7])
2019                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2020                                   wrap_c);
2021         }
2022         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2023     }
2024
2025     /* DCT & quantize */
2026     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2027     {
2028         for (i = 0; i < mb_block_count; i++) {
2029             if (!skip_dct[i]) {
2030                 int overflow;
2031                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2032                 // FIXME we could decide to change to quantizer instead of
2033                 // clipping
2034                 // JS: I don't think that would be a good idea it could lower
2035                 //     quality instead of improve it. Just INTRADC clipping
2036                 //     deserves changes in quantizer
2037                 if (overflow)
2038                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2039             } else
2040                 s->block_last_index[i] = -1;
2041         }
2042         if (s->quantizer_noise_shaping) {
2043             for (i = 0; i < mb_block_count; i++) {
2044                 if (!skip_dct[i]) {
2045                     s->block_last_index[i] =
2046                         dct_quantize_refine(s, s->block[i], weight[i],
2047                                             orig[i], i, s->qscale);
2048                 }
2049             }
2050         }
2051
2052         if (s->luma_elim_threshold && !s->mb_intra)
2053             for (i = 0; i < 4; i++)
2054                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2055         if (s->chroma_elim_threshold && !s->mb_intra)
2056             for (i = 4; i < mb_block_count; i++)
2057                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2058
2059         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2060             for (i = 0; i < mb_block_count; i++) {
2061                 if (s->block_last_index[i] == -1)
2062                     s->coded_score[i] = INT_MAX / 256;
2063             }
2064         }
2065     }
2066
2067     if ((s->flags & CODEC_FLAG_GRAY) && s->mb_intra) {
2068         s->block_last_index[4] =
2069         s->block_last_index[5] = 0;
2070         s->block[4][0] =
2071         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2072         if (!s->chroma_y_shift) { /* 422 / 444 */
2073             for (i=6; i<12; i++) {
2074                 s->block_last_index[i] = 0;
2075                 s->block[i][0] = s->block[4][0];
2076             }
2077         }
2078     }
2079
2080     // non c quantize code returns incorrect block_last_index FIXME
2081     if (s->alternate_scan && s->dct_quantize != ff_dct_quantize_c) {
2082         for (i = 0; i < mb_block_count; i++) {
2083             int j;
2084             if (s->block_last_index[i] > 0) {
2085                 for (j = 63; j > 0; j--) {
2086                     if (s->block[i][s->intra_scantable.permutated[j]])
2087                         break;
2088                 }
2089                 s->block_last_index[i] = j;
2090             }
2091         }
2092     }
2093
2094     /* huffman encode */
2095     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2096     case AV_CODEC_ID_MPEG1VIDEO:
2097     case AV_CODEC_ID_MPEG2VIDEO:
2098         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2099             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2100         break;
2101     case AV_CODEC_ID_MPEG4:
2102         if (CONFIG_MPEG4_ENCODER)
2103             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2104         break;
2105     case AV_CODEC_ID_MSMPEG4V2:
2106     case AV_CODEC_ID_MSMPEG4V3:
2107     case AV_CODEC_ID_WMV1:
2108         if (CONFIG_MSMPEG4_ENCODER)
2109             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2110         break;
2111     case AV_CODEC_ID_WMV2:
2112         if (CONFIG_WMV2_ENCODER)
2113             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2114         break;
2115     case AV_CODEC_ID_H261:
2116         if (CONFIG_H261_ENCODER)
2117             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2118         break;
2119     case AV_CODEC_ID_H263:
2120     case AV_CODEC_ID_H263P:
2121     case AV_CODEC_ID_FLV1:
2122     case AV_CODEC_ID_RV10:
2123     case AV_CODEC_ID_RV20:
2124         if (CONFIG_H263_ENCODER)
2125             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2126         break;
2127     case AV_CODEC_ID_MJPEG:
2128     case AV_CODEC_ID_AMV:
2129         if (CONFIG_MJPEG_ENCODER)
2130             ff_mjpeg_encode_mb(s, s->block);
2131         break;
2132     default:
2133         av_assert1(0);
2134     }
2135 }
2136
2137 static av_always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2138 {
2139     if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y,  8, 8, 6);
2140     else if (s->chroma_format == CHROMA_422) encode_mb_internal(s, motion_x, motion_y, 16, 8, 8);
2141     else encode_mb_internal(s, motion_x, motion_y, 16, 16, 12);
2142 }
2143
2144 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2145     int i;
2146
2147     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2148
2149     /* mpeg1 */
2150     d->mb_skip_run= s->mb_skip_run;
2151     for(i=0; i<3; i++)
2152         d->last_dc[i] = s->last_dc[i];
2153
2154     /* statistics */
2155     d->mv_bits= s->mv_bits;
2156     d->i_tex_bits= s->i_tex_bits;
2157     d->p_tex_bits= s->p_tex_bits;
2158     d->i_count= s->i_count;
2159     d->f_count= s->f_count;
2160     d->b_count= s->b_count;
2161     d->skip_count= s->skip_count;
2162     d->misc_bits= s->misc_bits;
2163     d->last_bits= 0;
2164
2165     d->mb_skipped= 0;
2166     d->qscale= s->qscale;
2167     d->dquant= s->dquant;
2168
2169     d->esc3_level_length= s->esc3_level_length;
2170 }
2171
2172 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2173     int i;
2174
2175     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2176     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2177
2178     /* mpeg1 */
2179     d->mb_skip_run= s->mb_skip_run;
2180     for(i=0; i<3; i++)
2181         d->last_dc[i] = s->last_dc[i];
2182
2183     /* statistics */
2184     d->mv_bits= s->mv_bits;
2185     d->i_tex_bits= s->i_tex_bits;
2186     d->p_tex_bits= s->p_tex_bits;
2187     d->i_count= s->i_count;
2188     d->f_count= s->f_count;
2189     d->b_count= s->b_count;
2190     d->skip_count= s->skip_count;
2191     d->misc_bits= s->misc_bits;
2192
2193     d->mb_intra= s->mb_intra;
2194     d->mb_skipped= s->mb_skipped;
2195     d->mv_type= s->mv_type;
2196     d->mv_dir= s->mv_dir;
2197     d->pb= s->pb;
2198     if(s->data_partitioning){
2199         d->pb2= s->pb2;
2200         d->tex_pb= s->tex_pb;
2201     }
2202     d->block= s->block;
2203     for(i=0; i<8; i++)
2204         d->block_last_index[i]= s->block_last_index[i];
2205     d->interlaced_dct= s->interlaced_dct;
2206     d->qscale= s->qscale;
2207
2208     d->esc3_level_length= s->esc3_level_length;
2209 }
2210
2211 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2212                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2213                            int *dmin, int *next_block, int motion_x, int motion_y)
2214 {
2215     int score;
2216     uint8_t *dest_backup[3];
2217
2218     copy_context_before_encode(s, backup, type);
2219
2220     s->block= s->blocks[*next_block];
2221     s->pb= pb[*next_block];
2222     if(s->data_partitioning){
2223         s->pb2   = pb2   [*next_block];
2224         s->tex_pb= tex_pb[*next_block];
2225     }
2226
2227     if(*next_block){
2228         memcpy(dest_backup, s->dest, sizeof(s->dest));
2229         s->dest[0] = s->rd_scratchpad;
2230         s->dest[1] = s->rd_scratchpad + 16*s->linesize;
2231         s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
2232         assert(s->linesize >= 32); //FIXME
2233     }
2234
2235     encode_mb(s, motion_x, motion_y);
2236
2237     score= put_bits_count(&s->pb);
2238     if(s->data_partitioning){
2239         score+= put_bits_count(&s->pb2);
2240         score+= put_bits_count(&s->tex_pb);
2241     }
2242
2243     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2244         ff_MPV_decode_mb(s, s->block);
2245
2246         score *= s->lambda2;
2247         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2248     }
2249
2250     if(*next_block){
2251         memcpy(s->dest, dest_backup, sizeof(s->dest));
2252     }
2253
2254     if(score<*dmin){
2255         *dmin= score;
2256         *next_block^=1;
2257
2258         copy_context_after_encode(best, s, type);
2259     }
2260 }
2261
2262 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2263     uint32_t *sq = ff_squareTbl + 256;
2264     int acc=0;
2265     int x,y;
2266
2267     if(w==16 && h==16)
2268         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
2269     else if(w==8 && h==8)
2270         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
2271
2272     for(y=0; y<h; y++){
2273         for(x=0; x<w; x++){
2274             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2275         }
2276     }
2277
2278     av_assert2(acc>=0);
2279
2280     return acc;
2281 }
2282
2283 static int sse_mb(MpegEncContext *s){
2284     int w= 16;
2285     int h= 16;
2286
2287     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2288     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2289
2290     if(w==16 && h==16)
2291       if(s->avctx->mb_cmp == FF_CMP_NSSE){
2292         return  s->dsp.nsse[0](s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2293                +s->dsp.nsse[1](s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2294                +s->dsp.nsse[1](s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2295       }else{
2296         return  s->dsp.sse[0](NULL, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
2297                +s->dsp.sse[1](NULL, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
2298                +s->dsp.sse[1](NULL, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
2299       }
2300     else
2301         return  sse(s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize)
2302                +sse(s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize)
2303                +sse(s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
2304 }
2305
2306 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2307     MpegEncContext *s= *(void**)arg;
2308
2309
2310     s->me.pre_pass=1;
2311     s->me.dia_size= s->avctx->pre_dia_size;
2312     s->first_slice_line=1;
2313     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2314         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2315             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2316         }
2317         s->first_slice_line=0;
2318     }
2319
2320     s->me.pre_pass=0;
2321
2322     return 0;
2323 }
2324
2325 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2326     MpegEncContext *s= *(void**)arg;
2327
2328     ff_check_alignment();
2329
2330     s->me.dia_size= s->avctx->dia_size;
2331     s->first_slice_line=1;
2332     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2333         s->mb_x=0; //for block init below
2334         ff_init_block_index(s);
2335         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2336             s->block_index[0]+=2;
2337             s->block_index[1]+=2;
2338             s->block_index[2]+=2;
2339             s->block_index[3]+=2;
2340
2341             /* compute motion vector & mb_type and store in context */
2342             if(s->pict_type==AV_PICTURE_TYPE_B)
2343                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2344             else
2345                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2346         }
2347         s->first_slice_line=0;
2348     }
2349     return 0;
2350 }
2351
2352 static int mb_var_thread(AVCodecContext *c, void *arg){
2353     MpegEncContext *s= *(void**)arg;
2354     int mb_x, mb_y;
2355
2356     ff_check_alignment();
2357
2358     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2359         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2360             int xx = mb_x * 16;
2361             int yy = mb_y * 16;
2362             uint8_t *pix = s->new_picture.f.data[0] + (yy * s->linesize) + xx;
2363             int varc;
2364             int sum = s->dsp.pix_sum(pix, s->linesize);
2365
2366             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)sum*sum)>>8) + 500 + 128)>>8;
2367
2368             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
2369             s->current_picture.mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2370             s->me.mb_var_sum_temp    += varc;
2371         }
2372     }
2373     return 0;
2374 }
2375
2376 static void write_slice_end(MpegEncContext *s){
2377     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2378         if(s->partitioned_frame){
2379             ff_mpeg4_merge_partitions(s);
2380         }
2381
2382         ff_mpeg4_stuffing(&s->pb);
2383     }else if(CONFIG_MJPEG_ENCODER && s->out_format == FMT_MJPEG){
2384         ff_mjpeg_encode_stuffing(s);
2385     }
2386
2387     avpriv_align_put_bits(&s->pb);
2388     flush_put_bits(&s->pb);
2389
2390     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
2391         s->misc_bits+= get_bits_diff(s);
2392 }
2393
2394 static void write_mb_info(MpegEncContext *s)
2395 {
2396     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2397     int offset = put_bits_count(&s->pb);
2398     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2399     int gobn = s->mb_y / s->gob_index;
2400     int pred_x, pred_y;
2401     if (CONFIG_H263_ENCODER)
2402         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2403     bytestream_put_le32(&ptr, offset);
2404     bytestream_put_byte(&ptr, s->qscale);
2405     bytestream_put_byte(&ptr, gobn);
2406     bytestream_put_le16(&ptr, mba);
2407     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2408     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2409     /* 4MV not implemented */
2410     bytestream_put_byte(&ptr, 0); /* hmv2 */
2411     bytestream_put_byte(&ptr, 0); /* vmv2 */
2412 }
2413
2414 static void update_mb_info(MpegEncContext *s, int startcode)
2415 {
2416     if (!s->mb_info)
2417         return;
2418     if (put_bits_count(&s->pb) - s->prev_mb_info*8 >= s->mb_info*8) {
2419         s->mb_info_size += 12;
2420         s->prev_mb_info = s->last_mb_info;
2421     }
2422     if (startcode) {
2423         s->prev_mb_info = put_bits_count(&s->pb)/8;
2424         /* This might have incremented mb_info_size above, and we return without
2425          * actually writing any info into that slot yet. But in that case,
2426          * this will be called again at the start of the after writing the
2427          * start code, actually writing the mb info. */
2428         return;
2429     }
2430
2431     s->last_mb_info = put_bits_count(&s->pb)/8;
2432     if (!s->mb_info_size)
2433         s->mb_info_size += 12;
2434     write_mb_info(s);
2435 }
2436
2437 static int encode_thread(AVCodecContext *c, void *arg){
2438     MpegEncContext *s= *(void**)arg;
2439     int mb_x, mb_y, pdif = 0;
2440     int chr_h= 16>>s->chroma_y_shift;
2441     int i, j;
2442     MpegEncContext best_s, backup_s;
2443     uint8_t bit_buf[2][MAX_MB_BYTES];
2444     uint8_t bit_buf2[2][MAX_MB_BYTES];
2445     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2446     PutBitContext pb[2], pb2[2], tex_pb[2];
2447
2448     ff_check_alignment();
2449
2450     for(i=0; i<2; i++){
2451         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2452         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2453         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2454     }
2455
2456     s->last_bits= put_bits_count(&s->pb);
2457     s->mv_bits=0;
2458     s->misc_bits=0;
2459     s->i_tex_bits=0;
2460     s->p_tex_bits=0;
2461     s->i_count=0;
2462     s->f_count=0;
2463     s->b_count=0;
2464     s->skip_count=0;
2465
2466     for(i=0; i<3; i++){
2467         /* init last dc values */
2468         /* note: quant matrix value (8) is implied here */
2469         s->last_dc[i] = 128 << s->intra_dc_precision;
2470
2471         s->current_picture.f.error[i] = 0;
2472     }
2473     if(s->codec_id==AV_CODEC_ID_AMV){
2474         s->last_dc[0] = 128*8/13;
2475         s->last_dc[1] = 128*8/14;
2476         s->last_dc[2] = 128*8/14;
2477     }
2478     s->mb_skip_run = 0;
2479     memset(s->last_mv, 0, sizeof(s->last_mv));
2480
2481     s->last_mv_dir = 0;
2482
2483     switch(s->codec_id){
2484     case AV_CODEC_ID_H263:
2485     case AV_CODEC_ID_H263P:
2486     case AV_CODEC_ID_FLV1:
2487         if (CONFIG_H263_ENCODER)
2488             s->gob_index = ff_h263_get_gob_height(s);
2489         break;
2490     case AV_CODEC_ID_MPEG4:
2491         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2492             ff_mpeg4_init_partitions(s);
2493         break;
2494     }
2495
2496     s->resync_mb_x=0;
2497     s->resync_mb_y=0;
2498     s->first_slice_line = 1;
2499     s->ptr_lastgob = s->pb.buf;
2500     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2501         s->mb_x=0;
2502         s->mb_y= mb_y;
2503
2504         ff_set_qscale(s, s->qscale);
2505         ff_init_block_index(s);
2506
2507         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2508             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2509             int mb_type= s->mb_type[xy];
2510 //            int d;
2511             int dmin= INT_MAX;
2512             int dir;
2513
2514             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){
2515                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2516                 return -1;
2517             }
2518             if(s->data_partitioning){
2519                 if(   s->pb2   .buf_end - s->pb2   .buf - (put_bits_count(&s->    pb2)>>3) < MAX_MB_BYTES
2520                    || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){
2521                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2522                     return -1;
2523                 }
2524             }
2525
2526             s->mb_x = mb_x;
2527             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2528             ff_update_block_index(s);
2529
2530             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2531                 ff_h261_reorder_mb_index(s);
2532                 xy= s->mb_y*s->mb_stride + s->mb_x;
2533                 mb_type= s->mb_type[xy];
2534             }
2535
2536             /* write gob / video packet header  */
2537             if(s->rtp_mode){
2538                 int current_packet_size, is_gob_start;
2539
2540                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
2541
2542                 is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
2543
2544                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2545
2546                 switch(s->codec_id){
2547                 case AV_CODEC_ID_H263:
2548                 case AV_CODEC_ID_H263P:
2549                     if(!s->h263_slice_structured)
2550                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
2551                     break;
2552                 case AV_CODEC_ID_MPEG2VIDEO:
2553                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2554                 case AV_CODEC_ID_MPEG1VIDEO:
2555                     if(s->mb_skip_run) is_gob_start=0;
2556                     break;
2557                 case AV_CODEC_ID_MJPEG:
2558                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
2559                     break;
2560                 }
2561
2562                 if(is_gob_start){
2563                     if(s->start_mb_y != mb_y || mb_x!=0){
2564                         write_slice_end(s);
2565                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
2566                             ff_mpeg4_init_partitions(s);
2567                         }
2568                     }
2569
2570                     av_assert2((put_bits_count(&s->pb)&7) == 0);
2571                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
2572
2573                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
2574                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
2575                         int d= 100 / s->avctx->error_rate;
2576                         if(r % d == 0){
2577                             current_packet_size=0;
2578                             s->pb.buf_ptr= s->ptr_lastgob;
2579                             assert(put_bits_ptr(&s->pb) == s->ptr_lastgob);
2580                         }
2581                     }
2582
2583                     if (s->avctx->rtp_callback){
2584                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
2585                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
2586                     }
2587                     update_mb_info(s, 1);
2588
2589                     switch(s->codec_id){
2590                     case AV_CODEC_ID_MPEG4:
2591                         if (CONFIG_MPEG4_ENCODER) {
2592                             ff_mpeg4_encode_video_packet_header(s);
2593                             ff_mpeg4_clean_buffers(s);
2594                         }
2595                     break;
2596                     case AV_CODEC_ID_MPEG1VIDEO:
2597                     case AV_CODEC_ID_MPEG2VIDEO:
2598                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
2599                             ff_mpeg1_encode_slice_header(s);
2600                             ff_mpeg1_clean_buffers(s);
2601                         }
2602                     break;
2603                     case AV_CODEC_ID_H263:
2604                     case AV_CODEC_ID_H263P:
2605                         if (CONFIG_H263_ENCODER)
2606                             ff_h263_encode_gob_header(s, mb_y);
2607                     break;
2608                     }
2609
2610                     if(s->flags&CODEC_FLAG_PASS1){
2611                         int bits= put_bits_count(&s->pb);
2612                         s->misc_bits+= bits - s->last_bits;
2613                         s->last_bits= bits;
2614                     }
2615
2616                     s->ptr_lastgob += current_packet_size;
2617                     s->first_slice_line=1;
2618                     s->resync_mb_x=mb_x;
2619                     s->resync_mb_y=mb_y;
2620                 }
2621             }
2622
2623             if(  (s->resync_mb_x   == s->mb_x)
2624                && s->resync_mb_y+1 == s->mb_y){
2625                 s->first_slice_line=0;
2626             }
2627
2628             s->mb_skipped=0;
2629             s->dquant=0; //only for QP_RD
2630
2631             update_mb_info(s, 0);
2632
2633             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
2634                 int next_block=0;
2635                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2636
2637                 copy_context_before_encode(&backup_s, s, -1);
2638                 backup_s.pb= s->pb;
2639                 best_s.data_partitioning= s->data_partitioning;
2640                 best_s.partitioned_frame= s->partitioned_frame;
2641                 if(s->data_partitioning){
2642                     backup_s.pb2= s->pb2;
2643                     backup_s.tex_pb= s->tex_pb;
2644                 }
2645
2646                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
2647                     s->mv_dir = MV_DIR_FORWARD;
2648                     s->mv_type = MV_TYPE_16X16;
2649                     s->mb_intra= 0;
2650                     s->mv[0][0][0] = s->p_mv_table[xy][0];
2651                     s->mv[0][0][1] = s->p_mv_table[xy][1];
2652                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
2653                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2654                 }
2655                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
2656                     s->mv_dir = MV_DIR_FORWARD;
2657                     s->mv_type = MV_TYPE_FIELD;
2658                     s->mb_intra= 0;
2659                     for(i=0; i<2; i++){
2660                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2661                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2662                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2663                     }
2664                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
2665                                  &dmin, &next_block, 0, 0);
2666                 }
2667                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
2668                     s->mv_dir = MV_DIR_FORWARD;
2669                     s->mv_type = MV_TYPE_16X16;
2670                     s->mb_intra= 0;
2671                     s->mv[0][0][0] = 0;
2672                     s->mv[0][0][1] = 0;
2673                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
2674                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2675                 }
2676                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
2677                     s->mv_dir = MV_DIR_FORWARD;
2678                     s->mv_type = MV_TYPE_8X8;
2679                     s->mb_intra= 0;
2680                     for(i=0; i<4; i++){
2681                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2682                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2683                     }
2684                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
2685                                  &dmin, &next_block, 0, 0);
2686                 }
2687                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
2688                     s->mv_dir = MV_DIR_FORWARD;
2689                     s->mv_type = MV_TYPE_16X16;
2690                     s->mb_intra= 0;
2691                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2692                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2693                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
2694                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
2695                 }
2696                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
2697                     s->mv_dir = MV_DIR_BACKWARD;
2698                     s->mv_type = MV_TYPE_16X16;
2699                     s->mb_intra= 0;
2700                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2701                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2702                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
2703                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
2704                 }
2705                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
2706                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2707                     s->mv_type = MV_TYPE_16X16;
2708                     s->mb_intra= 0;
2709                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2710                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2711                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2712                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2713                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
2714                                  &dmin, &next_block, 0, 0);
2715                 }
2716                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
2717                     s->mv_dir = MV_DIR_FORWARD;
2718                     s->mv_type = MV_TYPE_FIELD;
2719                     s->mb_intra= 0;
2720                     for(i=0; i<2; i++){
2721                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2722                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2723                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2724                     }
2725                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
2726                                  &dmin, &next_block, 0, 0);
2727                 }
2728                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
2729                     s->mv_dir = MV_DIR_BACKWARD;
2730                     s->mv_type = MV_TYPE_FIELD;
2731                     s->mb_intra= 0;
2732                     for(i=0; i<2; i++){
2733                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2734                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2735                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2736                     }
2737                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
2738                                  &dmin, &next_block, 0, 0);
2739                 }
2740                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
2741                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2742                     s->mv_type = MV_TYPE_FIELD;
2743                     s->mb_intra= 0;
2744                     for(dir=0; dir<2; dir++){
2745                         for(i=0; i<2; i++){
2746                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
2747                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
2748                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
2749                         }
2750                     }
2751                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
2752                                  &dmin, &next_block, 0, 0);
2753                 }
2754                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
2755                     s->mv_dir = 0;
2756                     s->mv_type = MV_TYPE_16X16;
2757                     s->mb_intra= 1;
2758                     s->mv[0][0][0] = 0;
2759                     s->mv[0][0][1] = 0;
2760                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
2761                                  &dmin, &next_block, 0, 0);
2762                     if(s->h263_pred || s->h263_aic){
2763                         if(best_s.mb_intra)
2764                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
2765                         else
2766                             ff_clean_intra_table_entries(s); //old mode?
2767                     }
2768                 }
2769
2770                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
2771                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
2772                         const int last_qp= backup_s.qscale;
2773                         int qpi, qp, dc[6];
2774                         int16_t ac[6][16];
2775                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
2776                         static const int dquant_tab[4]={-1,1,-2,2};
2777                         int storecoefs = s->mb_intra && s->dc_val[0];
2778
2779                         av_assert2(backup_s.dquant == 0);
2780
2781                         //FIXME intra
2782                         s->mv_dir= best_s.mv_dir;
2783                         s->mv_type = MV_TYPE_16X16;
2784                         s->mb_intra= best_s.mb_intra;
2785                         s->mv[0][0][0] = best_s.mv[0][0][0];
2786                         s->mv[0][0][1] = best_s.mv[0][0][1];
2787                         s->mv[1][0][0] = best_s.mv[1][0][0];
2788                         s->mv[1][0][1] = best_s.mv[1][0][1];
2789
2790                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
2791                         for(; qpi<4; qpi++){
2792                             int dquant= dquant_tab[qpi];
2793                             qp= last_qp + dquant;
2794                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
2795                                 continue;
2796                             backup_s.dquant= dquant;
2797                             if(storecoefs){
2798                                 for(i=0; i<6; i++){
2799                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
2800                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
2801                                 }
2802                             }
2803
2804                             encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2805                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
2806                             if(best_s.qscale != qp){
2807                                 if(storecoefs){
2808                                     for(i=0; i<6; i++){
2809                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
2810                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
2811                                     }
2812                                 }
2813                             }
2814                         }
2815                     }
2816                 }
2817                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
2818                     int mx= s->b_direct_mv_table[xy][0];
2819                     int my= s->b_direct_mv_table[xy][1];
2820
2821                     backup_s.dquant = 0;
2822                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2823                     s->mb_intra= 0;
2824                     ff_mpeg4_set_direct_mv(s, mx, my);
2825                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2826                                  &dmin, &next_block, mx, my);
2827                 }
2828                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
2829                     backup_s.dquant = 0;
2830                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
2831                     s->mb_intra= 0;
2832                     ff_mpeg4_set_direct_mv(s, 0, 0);
2833                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
2834                                  &dmin, &next_block, 0, 0);
2835                 }
2836                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
2837                     int coded=0;
2838                     for(i=0; i<6; i++)
2839                         coded |= s->block_last_index[i];
2840                     if(coded){
2841                         int mx,my;
2842                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
2843                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
2844                             mx=my=0; //FIXME find the one we actually used
2845                             ff_mpeg4_set_direct_mv(s, mx, my);
2846                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
2847                             mx= s->mv[1][0][0];
2848                             my= s->mv[1][0][1];
2849                         }else{
2850                             mx= s->mv[0][0][0];
2851                             my= s->mv[0][0][1];
2852                         }
2853
2854                         s->mv_dir= best_s.mv_dir;
2855                         s->mv_type = best_s.mv_type;
2856                         s->mb_intra= 0;
2857 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
2858                         s->mv[0][0][1] = best_s.mv[0][0][1];
2859                         s->mv[1][0][0] = best_s.mv[1][0][0];
2860                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
2861                         backup_s.dquant= 0;
2862                         s->skipdct=1;
2863                         encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
2864                                         &dmin, &next_block, mx, my);
2865                         s->skipdct=0;
2866                     }
2867                 }
2868
2869                 s->current_picture.qscale_table[xy] = best_s.qscale;
2870
2871                 copy_context_after_encode(s, &best_s, -1);
2872
2873                 pb_bits_count= put_bits_count(&s->pb);
2874                 flush_put_bits(&s->pb);
2875                 avpriv_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
2876                 s->pb= backup_s.pb;
2877
2878                 if(s->data_partitioning){
2879                     pb2_bits_count= put_bits_count(&s->pb2);
2880                     flush_put_bits(&s->pb2);
2881                     avpriv_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
2882                     s->pb2= backup_s.pb2;
2883
2884                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
2885                     flush_put_bits(&s->tex_pb);
2886                     avpriv_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
2887                     s->tex_pb= backup_s.tex_pb;
2888                 }
2889                 s->last_bits= put_bits_count(&s->pb);
2890
2891                 if (CONFIG_H263_ENCODER &&
2892                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
2893                     ff_h263_update_motion_val(s);
2894
2895                 if(next_block==0){ //FIXME 16 vs linesize16
2896                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
2897                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
2898                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
2899                 }
2900
2901                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
2902                     ff_MPV_decode_mb(s, s->block);
2903             } else {
2904                 int motion_x = 0, motion_y = 0;
2905                 s->mv_type=MV_TYPE_16X16;
2906                 // only one MB-Type possible
2907
2908                 switch(mb_type){
2909                 case CANDIDATE_MB_TYPE_INTRA:
2910                     s->mv_dir = 0;
2911                     s->mb_intra= 1;
2912                     motion_x= s->mv[0][0][0] = 0;
2913                     motion_y= s->mv[0][0][1] = 0;
2914                     break;
2915                 case CANDIDATE_MB_TYPE_INTER:
2916                     s->mv_dir = MV_DIR_FORWARD;
2917                     s->mb_intra= 0;
2918                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
2919                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
2920                     break;
2921                 case CANDIDATE_MB_TYPE_INTER_I:
2922                     s->mv_dir = MV_DIR_FORWARD;
2923                     s->mv_type = MV_TYPE_FIELD;
2924                     s->mb_intra= 0;
2925                     for(i=0; i<2; i++){
2926                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
2927                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
2928                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
2929                     }
2930                     break;
2931                 case CANDIDATE_MB_TYPE_INTER4V:
2932                     s->mv_dir = MV_DIR_FORWARD;
2933                     s->mv_type = MV_TYPE_8X8;
2934                     s->mb_intra= 0;
2935                     for(i=0; i<4; i++){
2936                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
2937                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
2938                     }
2939                     break;
2940                 case CANDIDATE_MB_TYPE_DIRECT:
2941                     if (CONFIG_MPEG4_ENCODER) {
2942                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2943                         s->mb_intra= 0;
2944                         motion_x=s->b_direct_mv_table[xy][0];
2945                         motion_y=s->b_direct_mv_table[xy][1];
2946                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
2947                     }
2948                     break;
2949                 case CANDIDATE_MB_TYPE_DIRECT0:
2950                     if (CONFIG_MPEG4_ENCODER) {
2951                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
2952                         s->mb_intra= 0;
2953                         ff_mpeg4_set_direct_mv(s, 0, 0);
2954                     }
2955                     break;
2956                 case CANDIDATE_MB_TYPE_BIDIR:
2957                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2958                     s->mb_intra= 0;
2959                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
2960                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
2961                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
2962                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
2963                     break;
2964                 case CANDIDATE_MB_TYPE_BACKWARD:
2965                     s->mv_dir = MV_DIR_BACKWARD;
2966                     s->mb_intra= 0;
2967                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
2968                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
2969                     break;
2970                 case CANDIDATE_MB_TYPE_FORWARD:
2971                     s->mv_dir = MV_DIR_FORWARD;
2972                     s->mb_intra= 0;
2973                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
2974                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
2975                     break;
2976                 case CANDIDATE_MB_TYPE_FORWARD_I:
2977                     s->mv_dir = MV_DIR_FORWARD;
2978                     s->mv_type = MV_TYPE_FIELD;
2979                     s->mb_intra= 0;
2980                     for(i=0; i<2; i++){
2981                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
2982                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
2983                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
2984                     }
2985                     break;
2986                 case CANDIDATE_MB_TYPE_BACKWARD_I:
2987                     s->mv_dir = MV_DIR_BACKWARD;
2988                     s->mv_type = MV_TYPE_FIELD;
2989                     s->mb_intra= 0;
2990                     for(i=0; i<2; i++){
2991                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
2992                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
2993                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
2994                     }
2995                     break;
2996                 case CANDIDATE_MB_TYPE_BIDIR_I:
2997                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
2998                     s->mv_type = MV_TYPE_FIELD;
2999                     s->mb_intra= 0;
3000                     for(dir=0; dir<2; dir++){
3001                         for(i=0; i<2; i++){
3002                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3003                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3004                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3005                         }
3006                     }
3007                     break;
3008                 default:
3009                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3010                 }
3011
3012                 encode_mb(s, motion_x, motion_y);
3013
3014                 // RAL: Update last macroblock type
3015                 s->last_mv_dir = s->mv_dir;
3016
3017                 if (CONFIG_H263_ENCODER &&
3018                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3019                     ff_h263_update_motion_val(s);
3020
3021                 ff_MPV_decode_mb(s, s->block);
3022             }
3023
3024             /* clean the MV table in IPS frames for direct mode in B frames */
3025             if(s->mb_intra /* && I,P,S_TYPE */){
3026                 s->p_mv_table[xy][0]=0;
3027                 s->p_mv_table[xy][1]=0;
3028             }
3029
3030             if(s->flags&CODEC_FLAG_PSNR){
3031                 int w= 16;
3032                 int h= 16;
3033
3034                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3035                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3036
3037                 s->current_picture.f.error[0] += sse(
3038                     s, s->new_picture.f.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3039                     s->dest[0], w, h, s->linesize);
3040                 s->current_picture.f.error[1] += sse(
3041                     s, s->new_picture.f.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3042                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3043                 s->current_picture.f.error[2] += sse(
3044                     s, s->new_picture.f.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3045                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3046             }
3047             if(s->loop_filter){
3048                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3049                     ff_h263_loop_filter(s);
3050             }
3051             av_dlog(s->avctx, "MB %d %d bits\n",
3052                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3053         }
3054     }
3055
3056     //not beautiful here but we must write it before flushing so it has to be here
3057     if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == AV_PICTURE_TYPE_I)
3058         ff_msmpeg4_encode_ext_header(s);
3059
3060     write_slice_end(s);
3061
3062     /* Send the last GOB if RTP */
3063     if (s->avctx->rtp_callback) {
3064         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
3065         pdif = put_bits_ptr(&s->pb) - s->ptr_lastgob;
3066         /* Call the RTP callback to send the last GOB */
3067         emms_c();
3068         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb);
3069     }
3070
3071     return 0;
3072 }
3073
3074 #define MERGE(field) dst->field += src->field; src->field=0
3075 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3076     MERGE(me.scene_change_score);
3077     MERGE(me.mc_mb_var_sum_temp);
3078     MERGE(me.mb_var_sum_temp);
3079 }
3080
3081 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3082     int i;
3083
3084     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3085     MERGE(dct_count[1]);
3086     MERGE(mv_bits);
3087     MERGE(i_tex_bits);
3088     MERGE(p_tex_bits);
3089     MERGE(i_count);
3090     MERGE(f_count);
3091     MERGE(b_count);
3092     MERGE(skip_count);
3093     MERGE(misc_bits);
3094     MERGE(er.error_count);
3095     MERGE(padding_bug_score);
3096     MERGE(current_picture.f.error[0]);
3097     MERGE(current_picture.f.error[1]);
3098     MERGE(current_picture.f.error[2]);
3099
3100     if(dst->avctx->noise_reduction){
3101         for(i=0; i<64; i++){
3102             MERGE(dct_error_sum[0][i]);
3103             MERGE(dct_error_sum[1][i]);
3104         }
3105     }
3106
3107     assert(put_bits_count(&src->pb) % 8 ==0);
3108     assert(put_bits_count(&dst->pb) % 8 ==0);
3109     avpriv_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3110     flush_put_bits(&dst->pb);
3111 }
3112
3113 static int estimate_qp(MpegEncContext *s, int dry_run){
3114     if (s->next_lambda){
3115         s->current_picture_ptr->f.quality =
3116         s->current_picture.f.quality = s->next_lambda;
3117         if(!dry_run) s->next_lambda= 0;
3118     } else if (!s->fixed_qscale) {
3119         s->current_picture_ptr->f.quality =
3120         s->current_picture.f.quality = ff_rate_estimate_qscale(s, dry_run);
3121         if (s->current_picture.f.quality < 0)
3122             return -1;
3123     }
3124
3125     if(s->adaptive_quant){
3126         switch(s->codec_id){
3127         case AV_CODEC_ID_MPEG4:
3128             if (CONFIG_MPEG4_ENCODER)
3129                 ff_clean_mpeg4_qscales(s);
3130             break;
3131         case AV_CODEC_ID_H263:
3132         case AV_CODEC_ID_H263P:
3133         case AV_CODEC_ID_FLV1:
3134             if (CONFIG_H263_ENCODER)
3135                 ff_clean_h263_qscales(s);
3136             break;
3137         default:
3138             ff_init_qscale_tab(s);
3139         }
3140
3141         s->lambda= s->lambda_table[0];
3142         //FIXME broken
3143     }else
3144         s->lambda = s->current_picture.f.quality;
3145     update_qscale(s);
3146     return 0;
3147 }
3148
3149 /* must be called before writing the header */
3150 static void set_frame_distances(MpegEncContext * s){
3151     assert(s->current_picture_ptr->f.pts != AV_NOPTS_VALUE);
3152     s->time = s->current_picture_ptr->f.pts * s->avctx->time_base.num;
3153
3154     if(s->pict_type==AV_PICTURE_TYPE_B){
3155         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3156         assert(s->pb_time > 0 && s->pb_time < s->pp_time);
3157     }else{
3158         s->pp_time= s->time - s->last_non_b_time;
3159         s->last_non_b_time= s->time;
3160         assert(s->picture_number==0 || s->pp_time > 0);
3161     }
3162 }
3163
3164 static int encode_picture(MpegEncContext *s, int picture_number)
3165 {
3166     int i, ret;
3167     int bits;
3168     int context_count = s->slice_context_count;
3169
3170     s->picture_number = picture_number;
3171
3172     /* Reset the average MB variance */
3173     s->me.mb_var_sum_temp    =
3174     s->me.mc_mb_var_sum_temp = 0;
3175
3176     /* we need to initialize some time vars before we can encode b-frames */
3177     // RAL: Condition added for MPEG1VIDEO
3178     if (s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->msmpeg4_version))
3179         set_frame_distances(s);
3180     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3181         ff_set_mpeg4_time(s);
3182
3183     s->me.scene_change_score=0;
3184
3185 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3186
3187     if(s->pict_type==AV_PICTURE_TYPE_I){
3188         if(s->msmpeg4_version >= 3) s->no_rounding=1;
3189         else                        s->no_rounding=0;
3190     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3191         if(s->flipflop_rounding || s->codec_id == AV_CODEC_ID_H263P || s->codec_id == AV_CODEC_ID_MPEG4)
3192             s->no_rounding ^= 1;
3193     }
3194
3195     if(s->flags & CODEC_FLAG_PASS2){
3196         if (estimate_qp(s,1) < 0)
3197             return -1;
3198         ff_get_2pass_fcode(s);
3199     }else if(!(s->flags & CODEC_FLAG_QSCALE)){
3200         if(s->pict_type==AV_PICTURE_TYPE_B)
3201             s->lambda= s->last_lambda_for[s->pict_type];
3202         else
3203             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3204         update_qscale(s);
3205     }
3206
3207     if(s->codec_id != AV_CODEC_ID_AMV){
3208         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3209         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3210         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3211         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3212     }
3213
3214     s->mb_intra=0; //for the rate distortion & bit compare functions
3215     for(i=1; i<context_count; i++){
3216         ret = ff_update_duplicate_context(s->thread_context[i], s);
3217         if (ret < 0)
3218             return ret;
3219     }
3220
3221     if(ff_init_me(s)<0)
3222         return -1;
3223
3224     /* Estimate motion for every MB */
3225     if(s->pict_type != AV_PICTURE_TYPE_I){
3226         s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8;
3227         s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8;
3228         if (s->pict_type != AV_PICTURE_TYPE_B) {
3229             if((s->avctx->pre_me && s->last_non_b_pict_type==AV_PICTURE_TYPE_I) || s->avctx->pre_me==2){
3230                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3231             }
3232         }
3233
3234         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3235     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3236         /* I-Frame */
3237         for(i=0; i<s->mb_stride*s->mb_height; i++)
3238             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3239
3240         if(!s->fixed_qscale){
3241             /* finding spatial complexity for I-frame rate control */
3242             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3243         }
3244     }
3245     for(i=1; i<context_count; i++){
3246         merge_context_after_me(s, s->thread_context[i]);
3247     }
3248     s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp;
3249     s->current_picture.   mb_var_sum= s->current_picture_ptr->   mb_var_sum= s->me.   mb_var_sum_temp;
3250     emms_c();
3251
3252     if(s->me.scene_change_score > s->avctx->scenechange_threshold && s->pict_type == AV_PICTURE_TYPE_P){
3253         s->pict_type= AV_PICTURE_TYPE_I;
3254         for(i=0; i<s->mb_stride*s->mb_height; i++)
3255             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3256         if(s->msmpeg4_version >= 3)
3257             s->no_rounding=1;
3258         av_dlog(s, "Scene change detected, encoding as I Frame %d %d\n",
3259                 s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3260     }
3261
3262     if(!s->umvplus){
3263         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3264             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3265
3266             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3267                 int a,b;
3268                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3269                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3270                 s->f_code= FFMAX3(s->f_code, a, b);
3271             }
3272
3273             ff_fix_long_p_mvs(s);
3274             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
3275             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3276                 int j;
3277                 for(i=0; i<2; i++){
3278                     for(j=0; j<2; j++)
3279                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3280                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
3281                 }
3282             }
3283         }
3284
3285         if(s->pict_type==AV_PICTURE_TYPE_B){
3286             int a, b;
3287
3288             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3289             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3290             s->f_code = FFMAX(a, b);
3291
3292             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3293             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3294             s->b_code = FFMAX(a, b);
3295
3296             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3297             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3298             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3299             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3300             if(s->flags & CODEC_FLAG_INTERLACED_ME){
3301                 int dir, j;
3302                 for(dir=0; dir<2; dir++){
3303                     for(i=0; i<2; i++){
3304                         for(j=0; j<2; j++){
3305                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3306                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3307                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3308                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3309                         }
3310                     }
3311                 }
3312             }
3313         }
3314     }
3315
3316     if (estimate_qp(s, 0) < 0)
3317         return -1;
3318
3319     if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==AV_PICTURE_TYPE_I && !(s->flags & CODEC_FLAG_QSCALE))
3320         s->qscale= 3; //reduce clipping problems
3321
3322     if (s->out_format == FMT_MJPEG) {
3323         /* for mjpeg, we do include qscale in the matrix */
3324         for(i=1;i<64;i++){
3325             int j= s->dsp.idct_permutation[i];
3326
3327             s->intra_matrix[j] = av_clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
3328         }
3329         s->y_dc_scale_table=
3330         s->c_dc_scale_table= ff_mpeg2_dc_scale_table[s->intra_dc_precision];
3331         s->intra_matrix[0] = ff_mpeg2_dc_scale_table[s->intra_dc_precision][8];
3332         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3333                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3334         s->qscale= 8;
3335     }
3336     if(s->codec_id == AV_CODEC_ID_AMV){
3337         static const uint8_t y[32]={13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3338         static const uint8_t c[32]={14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3339         for(i=1;i<64;i++){
3340             int j= s->dsp.idct_permutation[ff_zigzag_direct[i]];
3341
3342             s->intra_matrix[j] = sp5x_quant_table[5*2+0][i];
3343             s->chroma_intra_matrix[j] = sp5x_quant_table[5*2+1][i];
3344         }
3345         s->y_dc_scale_table= y;
3346         s->c_dc_scale_table= c;
3347         s->intra_matrix[0] = 13;
3348         s->chroma_intra_matrix[0] = 14;
3349         ff_convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
3350                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3351         ff_convert_matrix(&s->dsp, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3352                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3353         s->qscale= 8;
3354     }
3355
3356     //FIXME var duplication
3357     s->current_picture_ptr->f.key_frame =
3358     s->current_picture.f.key_frame = s->pict_type == AV_PICTURE_TYPE_I; //FIXME pic_ptr
3359     s->current_picture_ptr->f.pict_type =
3360     s->current_picture.f.pict_type = s->pict_type;
3361
3362     if (s->current_picture.f.key_frame)
3363         s->picture_in_gop_number=0;
3364
3365     s->mb_x = s->mb_y = 0;
3366     s->last_bits= put_bits_count(&s->pb);
3367     switch(s->out_format) {
3368     case FMT_MJPEG:
3369         if (CONFIG_MJPEG_ENCODER)
3370             ff_mjpeg_encode_picture_header(s);
3371         break;
3372     case FMT_H261:
3373         if (CONFIG_H261_ENCODER)
3374             ff_h261_encode_picture_header(s, picture_number);
3375         break;
3376     case FMT_H263:
3377         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3378             ff_wmv2_encode_picture_header(s, picture_number);
3379         else if (CONFIG_MSMPEG4_ENCODER && s->msmpeg4_version)
3380             ff_msmpeg4_encode_picture_header(s, picture_number);
3381         else if (CONFIG_MPEG4_ENCODER && s->h263_pred)
3382             ff_mpeg4_encode_picture_header(s, picture_number);
3383         else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10)
3384             ff_rv10_encode_picture_header(s, picture_number);
3385         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3386             ff_rv20_encode_picture_header(s, picture_number);
3387         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3388             ff_flv_encode_picture_header(s, picture_number);
3389         else if (CONFIG_H263_ENCODER)
3390             ff_h263_encode_picture_header(s, picture_number);
3391         break;
3392     case FMT_MPEG1:
3393         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3394             ff_mpeg1_encode_picture_header(s, picture_number);
3395         break;
3396     default:
3397         av_assert0(0);
3398     }
3399     bits= put_bits_count(&s->pb);
3400     s->header_bits= bits - s->last_bits;
3401
3402     for(i=1; i<context_count; i++){
3403         update_duplicate_context_after_me(s->thread_context[i], s);
3404     }
3405     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3406     for(i=1; i<context_count; i++){
3407         merge_context_after_encode(s, s->thread_context[i]);
3408     }
3409     emms_c();
3410     return 0;
3411 }
3412
3413 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3414     const int intra= s->mb_intra;
3415     int i;
3416
3417     s->dct_count[intra]++;
3418
3419     for(i=0; i<64; i++){
3420         int level= block[i];
3421
3422         if(level){
3423             if(level>0){
3424                 s->dct_error_sum[intra][i] += level;
3425                 level -= s->dct_offset[intra][i];
3426                 if(level<0) level=0;
3427             }else{
3428                 s->dct_error_sum[intra][i] -= level;
3429                 level += s->dct_offset[intra][i];
3430                 if(level>0) level=0;
3431             }
3432             block[i]= level;
3433         }
3434     }
3435 }
3436
3437 static int dct_quantize_trellis_c(MpegEncContext *s,
3438                                   int16_t *block, int n,
3439                                   int qscale, int *overflow){
3440     const int *qmat;
3441     const uint8_t *scantable= s->intra_scantable.scantable;
3442     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3443     int max=0;
3444     unsigned int threshold1, threshold2;
3445     int bias=0;
3446     int run_tab[65];
3447     int level_tab[65];
3448     int score_tab[65];
3449     int survivor[65];
3450     int survivor_count;
3451     int last_run=0;
3452     int last_level=0;
3453     int last_score= 0;
3454     int last_i;
3455     int coeff[2][64];
3456     int coeff_count[64];
3457     int qmul, qadd, start_i, last_non_zero, i, dc;
3458     const int esc_length= s->ac_esc_length;
3459     uint8_t * length;
3460     uint8_t * last_length;
3461     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3462
3463     s->dsp.fdct (block);
3464
3465     if(s->dct_error_sum)
3466         s->denoise_dct(s, block);
3467     qmul= qscale*16;
3468     qadd= ((qscale-1)|1)*8;
3469
3470     if (s->mb_intra) {
3471         int q;
3472         if (!s->h263_aic) {
3473             if (n < 4)
3474                 q = s->y_dc_scale;
3475             else
3476                 q = s->c_dc_scale;
3477             q = q << 3;
3478         } else{
3479             /* For AIC we skip quant/dequant of INTRADC */
3480             q = 1 << 3;
3481             qadd=0;
3482         }
3483
3484         /* note: block[0] is assumed to be positive */
3485         block[0] = (block[0] + (q >> 1)) / q;
3486         start_i = 1;
3487         last_non_zero = 0;
3488         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3489         if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3490             bias= 1<<(QMAT_SHIFT-1);
3491         length     = s->intra_ac_vlc_length;
3492         last_length= s->intra_ac_vlc_last_length;
3493     } else {
3494         start_i = 0;
3495         last_non_zero = -1;
3496         qmat = s->q_inter_matrix[qscale];
3497         length     = s->inter_ac_vlc_length;
3498         last_length= s->inter_ac_vlc_last_length;
3499     }
3500     last_i= start_i;
3501
3502     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3503     threshold2= (threshold1<<1);
3504
3505     for(i=63; i>=start_i; i--) {
3506         const int j = scantable[i];
3507         int level = block[j] * qmat[j];
3508
3509         if(((unsigned)(level+threshold1))>threshold2){
3510             last_non_zero = i;
3511             break;
3512         }
3513     }
3514
3515     for(i=start_i; i<=last_non_zero; i++) {
3516         const int j = scantable[i];
3517         int level = block[j] * qmat[j];
3518
3519 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3520 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3521         if(((unsigned)(level+threshold1))>threshold2){
3522             if(level>0){
3523                 level= (bias + level)>>QMAT_SHIFT;
3524                 coeff[0][i]= level;
3525                 coeff[1][i]= level-1;
3526 //                coeff[2][k]= level-2;
3527             }else{
3528                 level= (bias - level)>>QMAT_SHIFT;
3529                 coeff[0][i]= -level;
3530                 coeff[1][i]= -level+1;
3531 //                coeff[2][k]= -level+2;
3532             }
3533             coeff_count[i]= FFMIN(level, 2);
3534             av_assert2(coeff_count[i]);
3535             max |=level;
3536         }else{
3537             coeff[0][i]= (level>>31)|1;
3538             coeff_count[i]= 1;
3539         }
3540     }
3541
3542     *overflow= s->max_qcoeff < max; //overflow might have happened
3543
3544     if(last_non_zero < start_i){
3545         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3546         return last_non_zero;
3547     }
3548
3549     score_tab[start_i]= 0;
3550     survivor[0]= start_i;
3551     survivor_count= 1;
3552
3553     for(i=start_i; i<=last_non_zero; i++){
3554         int level_index, j, zero_distortion;
3555         int dct_coeff= FFABS(block[ scantable[i] ]);
3556         int best_score=256*256*256*120;
3557
3558         if (s->dsp.fdct == ff_fdct_ifast)
3559             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
3560         zero_distortion= dct_coeff*dct_coeff;
3561
3562         for(level_index=0; level_index < coeff_count[i]; level_index++){
3563             int distortion;
3564             int level= coeff[level_index][i];
3565             const int alevel= FFABS(level);
3566             int unquant_coeff;
3567
3568             av_assert2(level);
3569
3570             if(s->out_format == FMT_H263){
3571                 unquant_coeff= alevel*qmul + qadd;
3572             }else{ //MPEG1
3573                 j= s->dsp.idct_permutation[ scantable[i] ]; //FIXME optimize
3574                 if(s->mb_intra){
3575                         unquant_coeff = (int)(  alevel  * qscale * s->intra_matrix[j]) >> 3;
3576                         unquant_coeff =   (unquant_coeff - 1) | 1;
3577                 }else{
3578                         unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[j])) >> 4;
3579                         unquant_coeff =   (unquant_coeff - 1) | 1;
3580                 }
3581                 unquant_coeff<<= 3;
3582             }
3583
3584             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
3585             level+=64;
3586             if((level&(~127)) == 0){
3587                 for(j=survivor_count-1; j>=0; j--){
3588                     int run= i - survivor[j];
3589                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3590                     score += score_tab[i-run];
3591
3592                     if(score < best_score){
3593                         best_score= score;
3594                         run_tab[i+1]= run;
3595                         level_tab[i+1]= level-64;
3596                     }
3597                 }
3598
3599                 if(s->out_format == FMT_H263){
3600                     for(j=survivor_count-1; j>=0; j--){
3601                         int run= i - survivor[j];
3602                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
3603                         score += score_tab[i-run];
3604                         if(score < last_score){
3605                             last_score= score;
3606                             last_run= run;
3607                             last_level= level-64;
3608                             last_i= i+1;
3609                         }
3610                     }
3611                 }
3612             }else{
3613                 distortion += esc_length*lambda;
3614                 for(j=survivor_count-1; j>=0; j--){
3615                     int run= i - survivor[j];
3616                     int score= distortion + score_tab[i-run];
3617
3618                     if(score < best_score){
3619                         best_score= score;
3620                         run_tab[i+1]= run;
3621                         level_tab[i+1]= level-64;
3622                     }
3623                 }
3624
3625                 if(s->out_format == FMT_H263){
3626                   for(j=survivor_count-1; j>=0; j--){
3627                         int run= i - survivor[j];
3628                         int score= distortion + score_tab[i-run];
3629                         if(score < last_score){
3630                             last_score= score;
3631                             last_run= run;
3632                             last_level= level-64;
3633                             last_i= i+1;
3634                         }
3635                     }
3636                 }
3637             }
3638         }
3639
3640         score_tab[i+1]= best_score;
3641
3642         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
3643         if(last_non_zero <= 27){
3644             for(; survivor_count; survivor_count--){
3645                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
3646                     break;
3647             }
3648         }else{
3649             for(; survivor_count; survivor_count--){
3650                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
3651                     break;
3652             }
3653         }
3654
3655         survivor[ survivor_count++ ]= i+1;
3656     }
3657
3658     if(s->out_format != FMT_H263){
3659         last_score= 256*256*256*120;
3660         for(i= survivor[0]; i<=last_non_zero + 1; i++){
3661             int score= score_tab[i];
3662             if(i) score += lambda*2; //FIXME exacter?
3663
3664             if(score < last_score){
3665                 last_score= score;
3666                 last_i= i;
3667                 last_level= level_tab[i];
3668                 last_run= run_tab[i];
3669             }
3670         }
3671     }
3672
3673     s->coded_score[n] = last_score;
3674
3675     dc= FFABS(block[0]);
3676     last_non_zero= last_i - 1;
3677     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
3678
3679     if(last_non_zero < start_i)
3680         return last_non_zero;
3681
3682     if(last_non_zero == 0 && start_i == 0){
3683         int best_level= 0;
3684         int best_score= dc * dc;
3685
3686         for(i=0; i<coeff_count[0]; i++){
3687             int level= coeff[i][0];
3688             int alevel= FFABS(level);
3689             int unquant_coeff, score, distortion;
3690
3691             if(s->out_format == FMT_H263){
3692                     unquant_coeff= (alevel*qmul + qadd)>>3;
3693             }else{ //MPEG1
3694                     unquant_coeff = (((  alevel  << 1) + 1) * qscale * ((int) s->inter_matrix[0])) >> 4;
3695                     unquant_coeff =   (unquant_coeff - 1) | 1;
3696             }
3697             unquant_coeff = (unquant_coeff + 4) >> 3;
3698             unquant_coeff<<= 3 + 3;
3699
3700             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
3701             level+=64;
3702             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
3703             else                    score= distortion + esc_length*lambda;
3704
3705             if(score < best_score){
3706                 best_score= score;
3707                 best_level= level - 64;
3708             }
3709         }
3710         block[0]= best_level;
3711         s->coded_score[n] = best_score - dc*dc;
3712         if(best_level == 0) return -1;
3713         else                return last_non_zero;
3714     }
3715
3716     i= last_i;
3717     av_assert2(last_level);
3718
3719     block[ perm_scantable[last_non_zero] ]= last_level;
3720     i -= last_run + 1;
3721
3722     for(; i>start_i; i -= run_tab[i] + 1){
3723         block[ perm_scantable[i-1] ]= level_tab[i];
3724     }
3725
3726     return last_non_zero;
3727 }
3728
3729 //#define REFINE_STATS 1
3730 static int16_t basis[64][64];
3731
3732 static void build_basis(uint8_t *perm){
3733     int i, j, x, y;
3734     emms_c();
3735     for(i=0; i<8; i++){
3736         for(j=0; j<8; j++){
3737             for(y=0; y<8; y++){
3738                 for(x=0; x<8; x++){
3739                     double s= 0.25*(1<<BASIS_SHIFT);
3740                     int index= 8*i + j;
3741                     int perm_index= perm[index];
3742                     if(i==0) s*= sqrt(0.5);
3743                     if(j==0) s*= sqrt(0.5);
3744                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
3745                 }
3746             }
3747         }
3748     }
3749 }
3750
3751 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
3752                         int16_t *block, int16_t *weight, int16_t *orig,
3753                         int n, int qscale){
3754     int16_t rem[64];
3755     LOCAL_ALIGNED_16(int16_t, d1, [64]);
3756     const uint8_t *scantable= s->intra_scantable.scantable;
3757     const uint8_t *perm_scantable= s->intra_scantable.permutated;
3758 //    unsigned int threshold1, threshold2;
3759 //    int bias=0;
3760     int run_tab[65];
3761     int prev_run=0;
3762     int prev_level=0;
3763     int qmul, qadd, start_i, last_non_zero, i, dc;
3764     uint8_t * length;
3765     uint8_t * last_length;
3766     int lambda;
3767     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
3768 #ifdef REFINE_STATS
3769 static int count=0;
3770 static int after_last=0;
3771 static int to_zero=0;
3772 static int from_zero=0;
3773 static int raise=0;
3774 static int lower=0;
3775 static int messed_sign=0;
3776 #endif
3777
3778     if(basis[0][0] == 0)
3779         build_basis(s->dsp.idct_permutation);
3780
3781     qmul= qscale*2;
3782     qadd= (qscale-1)|1;
3783     if (s->mb_intra) {
3784         if (!s->h263_aic) {
3785             if (n < 4)
3786                 q = s->y_dc_scale;
3787             else
3788                 q = s->c_dc_scale;
3789         } else{
3790             /* For AIC we skip quant/dequant of INTRADC */
3791             q = 1;
3792             qadd=0;
3793         }
3794         q <<= RECON_SHIFT-3;
3795         /* note: block[0] is assumed to be positive */
3796         dc= block[0]*q;
3797 //        block[0] = (block[0] + (q >> 1)) / q;
3798         start_i = 1;
3799 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
3800 //            bias= 1<<(QMAT_SHIFT-1);
3801         length     = s->intra_ac_vlc_length;
3802         last_length= s->intra_ac_vlc_last_length;
3803     } else {
3804         dc= 0;
3805         start_i = 0;
3806         length     = s->inter_ac_vlc_length;
3807         last_length= s->inter_ac_vlc_last_length;
3808     }
3809     last_non_zero = s->block_last_index[n];
3810
3811 #ifdef REFINE_STATS
3812 {START_TIMER
3813 #endif
3814     dc += (1<<(RECON_SHIFT-1));
3815     for(i=0; i<64; i++){
3816         rem[i]= dc - (orig[i]<<RECON_SHIFT); //FIXME  use orig dirrectly instead of copying to rem[]
3817     }
3818 #ifdef REFINE_STATS
3819 STOP_TIMER("memset rem[]")}
3820 #endif
3821     sum=0;
3822     for(i=0; i<64; i++){
3823         int one= 36;
3824         int qns=4;
3825         int w;
3826
3827         w= FFABS(weight[i]) + qns*one;
3828         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
3829
3830         weight[i] = w;
3831 //        w=weight[i] = (63*qns + (w/2)) / w;
3832
3833         av_assert2(w>0);
3834         av_assert2(w<(1<<6));
3835         sum += w*w;
3836     }
3837     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
3838 #ifdef REFINE_STATS
3839 {START_TIMER
3840 #endif
3841     run=0;
3842     rle_index=0;
3843     for(i=start_i; i<=last_non_zero; i++){
3844         int j= perm_scantable[i];
3845         const int level= block[j];
3846         int coeff;
3847
3848         if(level){
3849             if(level<0) coeff= qmul*level - qadd;
3850             else        coeff= qmul*level + qadd;
3851             run_tab[rle_index++]=run;
3852             run=0;
3853
3854             s->dsp.add_8x8basis(rem, basis[j], coeff);
3855         }else{
3856             run++;
3857         }
3858     }
3859 #ifdef REFINE_STATS
3860 if(last_non_zero>0){
3861 STOP_TIMER("init rem[]")
3862 }
3863 }
3864
3865 {START_TIMER
3866 #endif
3867     for(;;){
3868         int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
3869         int best_coeff=0;
3870         int best_change=0;
3871         int run2, best_unquant_change=0, analyze_gradient;
3872 #ifdef REFINE_STATS
3873 {START_TIMER
3874 #endif
3875         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
3876
3877         if(analyze_gradient){
3878 #ifdef REFINE_STATS
3879 {START_TIMER
3880 #endif
3881             for(i=0; i<64; i++){
3882                 int w= weight[i];
3883
3884                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
3885             }
3886 #ifdef REFINE_STATS
3887 STOP_TIMER("rem*w*w")}
3888 {START_TIMER
3889 #endif
3890             s->dsp.fdct(d1);
3891 #ifdef REFINE_STATS
3892 STOP_TIMER("dct")}
3893 #endif
3894         }
3895
3896         if(start_i){
3897             const int level= block[0];
3898             int change, old_coeff;
3899
3900             av_assert2(s->mb_intra);
3901
3902             old_coeff= q*level;
3903
3904             for(change=-1; change<=1; change+=2){
3905                 int new_level= level + change;
3906                 int score, new_coeff;
3907
3908                 new_coeff= q*new_level;
3909                 if(new_coeff >= 2048 || new_coeff < 0)
3910                     continue;
3911
3912                 score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
3913                 if(score<best_score){
3914                     best_score= score;
3915                     best_coeff= 0;
3916                     best_change= change;
3917                     best_unquant_change= new_coeff - old_coeff;
3918                 }
3919             }
3920         }
3921
3922         run=0;
3923         rle_index=0;
3924         run2= run_tab[rle_index++];
3925         prev_level=0;
3926         prev_run=0;
3927
3928         for(i=start_i; i<64; i++){
3929             int j= perm_scantable[i];
3930             const int level= block[j];
3931             int change, old_coeff;
3932
3933             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
3934                 break;
3935
3936             if(level){
3937                 if(level<0) old_coeff= qmul*level - qadd;
3938                 else        old_coeff= qmul*level + qadd;
3939                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
3940             }else{
3941                 old_coeff=0;
3942                 run2--;
3943                 av_assert2(run2>=0 || i >= last_non_zero );
3944             }
3945
3946             for(change=-1; change<=1; change+=2){
3947                 int new_level= level + change;
3948                 int score, new_coeff, unquant_change;
3949
3950                 score=0;
3951                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
3952                    continue;
3953
3954                 if(new_level){
3955                     if(new_level<0) new_coeff= qmul*new_level - qadd;
3956                     else            new_coeff= qmul*new_level + qadd;
3957                     if(new_coeff >= 2048 || new_coeff <= -2048)
3958                         continue;
3959                     //FIXME check for overflow
3960
3961                     if(level){
3962                         if(level < 63 && level > -63){
3963                             if(i < last_non_zero)
3964                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
3965                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
3966                             else
3967                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
3968                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
3969                         }
3970                     }else{
3971                         av_assert2(FFABS(new_level)==1);
3972
3973                         if(analyze_gradient){
3974                             int g= d1[ scantable[i] ];
3975                             if(g && (g^new_level) >= 0)
3976                                 continue;
3977                         }
3978
3979                         if(i < last_non_zero){
3980                             int next_i= i + run2 + 1;
3981                             int next_level= block[ perm_scantable[next_i] ] + 64;
3982
3983                             if(next_level&(~127))
3984                                 next_level= 0;
3985
3986                             if(next_i < last_non_zero)
3987                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
3988                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
3989                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3990                             else
3991                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
3992                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
3993                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
3994                         }else{
3995                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
3996                             if(prev_level){
3997                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
3998                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
3999                             }
4000                         }
4001                     }
4002                 }else{
4003                     new_coeff=0;
4004                     av_assert2(FFABS(level)==1);
4005
4006                     if(i < last_non_zero){
4007                         int next_i= i + run2 + 1;
4008                         int next_level= block[ perm_scantable[next_i] ] + 64;
4009
4010                         if(next_level&(~127))
4011                             next_level= 0;
4012
4013                         if(next_i < last_non_zero)
4014                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4015                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4016                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4017                         else
4018                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4019                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4020                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4021                     }else{
4022                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4023                         if(prev_level){
4024                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4025                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4026                         }
4027                     }
4028                 }
4029
4030                 score *= lambda;
4031
4032                 unquant_change= new_coeff - old_coeff;
4033                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4034
4035                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
4036                 if(score<best_score){
4037                     best_score= score;
4038                     best_coeff= i;
4039                     best_change= change;
4040                     best_unquant_change= unquant_change;
4041                 }
4042             }
4043             if(level){
4044                 prev_level= level + 64;
4045                 if(prev_level&(~127))
4046                     prev_level= 0;
4047                 prev_run= run;
4048                 run=0;
4049             }else{
4050                 run++;
4051             }
4052         }
4053 #ifdef REFINE_STATS
4054 STOP_TIMER("iterative step")}
4055 #endif
4056
4057         if(best_change){
4058             int j= perm_scantable[ best_coeff ];
4059
4060             block[j] += best_change;
4061
4062             if(best_coeff > last_non_zero){
4063                 last_non_zero= best_coeff;
4064                 av_assert2(block[j]);
4065 #ifdef REFINE_STATS
4066 after_last++;
4067 #endif
4068             }else{
4069 #ifdef REFINE_STATS
4070 if(block[j]){
4071     if(block[j] - best_change){
4072         if(FFABS(block[j]) > FFABS(block[j] - best_change)){
4073             raise++;
4074         }else{
4075             lower++;
4076         }
4077     }else{
4078         from_zero++;
4079     }
4080 }else{
4081     to_zero++;
4082 }
4083 #endif
4084                 for(; last_non_zero>=start_i; last_non_zero--){
4085                     if(block[perm_scantable[last_non_zero]])
4086                         break;
4087                 }
4088             }
4089 #ifdef REFINE_STATS
4090 count++;
4091 if(256*256*256*64 % count == 0){
4092     av_log(s->avctx, AV_LOG_DEBUG, "after_last:%d to_zero:%d from_zero:%d raise:%d lower:%d sign:%d xyp:%d/%d/%d\n", after_last, to_zero, from_zero, raise, lower, messed_sign, s->mb_x, s->mb_y, s->picture_number);
4093 }
4094 #endif
4095             run=0;
4096             rle_index=0;
4097             for(i=start_i; i<=last_non_zero; i++){
4098                 int j= perm_scantable[i];
4099                 const int level= block[j];
4100
4101                  if(level){
4102                      run_tab[rle_index++]=run;
4103                      run=0;
4104                  }else{
4105                      run++;
4106                  }
4107             }
4108
4109             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
4110         }else{
4111             break;
4112         }
4113     }
4114 #ifdef REFINE_STATS
4115 if(last_non_zero>0){
4116 STOP_TIMER("iterative search")
4117 }
4118 }
4119 #endif
4120
4121     return last_non_zero;
4122 }
4123
4124 int ff_dct_quantize_c(MpegEncContext *s,
4125                         int16_t *block, int n,
4126                         int qscale, int *overflow)
4127 {
4128     int i, j, level, last_non_zero, q, start_i;
4129     const int *qmat;
4130     const uint8_t *scantable= s->intra_scantable.scantable;
4131     int bias;
4132     int max=0;
4133     unsigned int threshold1, threshold2;
4134
4135     s->dsp.fdct (block);
4136
4137     if(s->dct_error_sum)
4138         s->denoise_dct(s, block);
4139
4140     if (s->mb_intra) {
4141         if (!s->h263_aic) {
4142             if (n < 4)
4143                 q = s->y_dc_scale;
4144             else
4145                 q = s->c_dc_scale;
4146             q = q << 3;
4147         } else
4148             /* For AIC we skip quant/dequant of INTRADC */
4149             q = 1 << 3;
4150
4151         /* note: block[0] is assumed to be positive */
4152         block[0] = (block[0] + (q >> 1)) / q;
4153         start_i = 1;
4154         last_non_zero = 0;
4155         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4156         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4157     } else {
4158         start_i = 0;
4159         last_non_zero = -1;
4160         qmat = s->q_inter_matrix[qscale];
4161         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
4162     }
4163     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4164     threshold2= (threshold1<<1);
4165     for(i=63;i>=start_i;i--) {
4166         j = scantable[i];
4167         level = block[j] * qmat[j];
4168
4169         if(((unsigned)(level+threshold1))>threshold2){
4170             last_non_zero = i;
4171             break;
4172         }else{
4173             block[j]=0;
4174         }
4175     }
4176     for(i=start_i; i<=last_non_zero; i++) {
4177         j = scantable[i];
4178         level = block[j] * qmat[j];
4179
4180 //        if(   bias+level >= (1<<QMAT_SHIFT)
4181 //           || bias-level >= (1<<QMAT_SHIFT)){
4182         if(((unsigned)(level+threshold1))>threshold2){
4183             if(level>0){
4184                 level= (bias + level)>>QMAT_SHIFT;
4185                 block[j]= level;
4186             }else{
4187                 level= (bias - level)>>QMAT_SHIFT;
4188                 block[j]= -level;
4189             }
4190             max |=level;
4191         }else{
4192             block[j]=0;
4193         }
4194     }
4195     *overflow= s->max_qcoeff < max; //overflow might have happened
4196
4197     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4198     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
4199         ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
4200
4201     return last_non_zero;
4202 }
4203
4204 #define OFFSET(x) offsetof(MpegEncContext, x)
4205 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
4206 static const AVOption h263_options[] = {
4207     { "obmc",         "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4208     { "structured_slices","Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4209     { "mb_info",      "emit macroblock info for RFC 2190 packetization, the parameter value is the maximum payload size", OFFSET(mb_info), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
4210     FF_MPV_COMMON_OPTS
4211     { NULL },
4212 };
4213
4214 static const AVClass h263_class = {
4215     .class_name = "H.263 encoder",
4216     .item_name  = av_default_item_name,
4217     .option     = h263_options,
4218     .version    = LIBAVUTIL_VERSION_INT,
4219 };
4220
4221 AVCodec ff_h263_encoder = {
4222     .name           = "h263",
4223     .type           = AVMEDIA_TYPE_VIDEO,
4224     .id             = AV_CODEC_ID_H263,
4225     .priv_data_size = sizeof(MpegEncContext),
4226     .init           = ff_MPV_encode_init,
4227     .encode2        = ff_MPV_encode_picture,
4228     .close          = ff_MPV_encode_end,
4229     .pix_fmts= (const enum AVPixelFormat[]){AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE},
4230     .long_name= NULL_IF_CONFIG_SMALL("H.263 / H.263-1996"),
4231     .priv_class     = &h263_class,
4232 };
4233
4234 static const AVOption h263p_options[] = {
4235     { "umv",        "Use unlimited motion vectors.",    OFFSET(umvplus), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4236     { "aiv",        "Use alternative inter VLC.",       OFFSET(alt_inter_vlc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4237     { "obmc",       "use overlapped block motion compensation.", OFFSET(obmc), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
4238     { "structured_slices", "Write slice start position at every GOB header instead of just GOB number.", OFFSET(h263_slice_structured), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE},
4239     FF_MPV_COMMON_OPTS
4240     { NULL },
4241 };
4242 static const AVClass h263p_class = {
4243     .class_name = "H.263p encoder",
4244     .item_name  = av_default_item_name,
4245     .option     = h263p_options,
4246     .version    = LIBAVUTIL_VERSION_INT,
4247 };
4248
4249 AVCodec ff_h263p_encoder = {
4250     .name           = "h263p",
4251     .type           = AVMEDIA_TYPE_VIDEO,
4252     .id             = AV_CODEC_ID_H263P,
4253     .priv_data_size = sizeof(MpegEncContext),
4254     .init           = ff_MPV_encode_init,
4255     .encode2        = ff_MPV_encode_picture,
4256     .close          = ff_MPV_encode_end,
4257     .capabilities   = CODEC_CAP_SLICE_THREADS,
4258     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4259     .long_name      = NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
4260     .priv_class     = &h263p_class,
4261 };
4262
4263 FF_MPV_GENERIC_CLASS(msmpeg4v2)
4264
4265 AVCodec ff_msmpeg4v2_encoder = {
4266     .name           = "msmpeg4v2",
4267     .type           = AVMEDIA_TYPE_VIDEO,
4268     .id             = AV_CODEC_ID_MSMPEG4V2,
4269     .priv_data_size = sizeof(MpegEncContext),
4270     .init           = ff_MPV_encode_init,
4271     .encode2        = ff_MPV_encode_picture,
4272     .close          = ff_MPV_encode_end,
4273     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4274     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 2"),
4275     .priv_class     = &msmpeg4v2_class,
4276 };
4277
4278 FF_MPV_GENERIC_CLASS(msmpeg4v3)
4279
4280 AVCodec ff_msmpeg4v3_encoder = {
4281     .name           = "msmpeg4",
4282     .type           = AVMEDIA_TYPE_VIDEO,
4283     .id             = AV_CODEC_ID_MSMPEG4V3,
4284     .priv_data_size = sizeof(MpegEncContext),
4285     .init           = ff_MPV_encode_init,
4286     .encode2        = ff_MPV_encode_picture,
4287     .close          = ff_MPV_encode_end,
4288     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4289     .long_name      = NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 3"),
4290     .priv_class     = &msmpeg4v3_class,
4291 };
4292
4293 FF_MPV_GENERIC_CLASS(wmv1)
4294
4295 AVCodec ff_wmv1_encoder = {
4296     .name           = "wmv1",
4297     .type           = AVMEDIA_TYPE_VIDEO,
4298     .id             = AV_CODEC_ID_WMV1,
4299     .priv_data_size = sizeof(MpegEncContext),
4300     .init           = ff_MPV_encode_init,
4301     .encode2        = ff_MPV_encode_picture,
4302     .close          = ff_MPV_encode_end,
4303     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE },
4304     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Video 7"),
4305     .priv_class     = &wmv1_class,
4306 };